Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d4bf0065 authored by Bob Liu's avatar Bob Liu Committed by Konrad Rzeszutek Wilk
Browse files

xen/blkback: make pool of persistent grants and free pages per-queue

Make pool of persistent grants and free pages per-queue/ring instead of
per-device to get better scalability.

Test was done based on null_blk driver:
dom0: v4.2-rc8 16vcpus 10GB "modprobe null_blk"
domu: v4.2-rc8 16vcpus 10GB

[test]
rw=read
direct=1
ioengine=libaio
bs=4k
time_based
runtime=30
filename=/dev/xvdb
numjobs=16
iodepth=64
iodepth_batch=64
iodepth_batch_complete=64
group_reporting

Results:
iops1: After patch "xen/blkfront: make persistent grants per-queue".
iops2: After this patch.

Queues:			  1 	   4 	  	  8 	 	 16
Iops orig(k):		810 	1064 		780 		700
Iops1(k):		810     1230(~20%)	1024(~20%)	850(~20%)
Iops2(k):		810     1410(~35%)	1354(~75%)      1440(~100%)

With 4 queues after this commit we can get ~75% increase in IOPS, and
performance won't drop if increasing queue numbers.

Please find the respective chart in this link:
https://www.dropbox.com/s/agrcy2pbzbsvmwv/iops.png?dl=0



Signed-off-by: default avatarBob Liu <bob.liu@oracle.com>
Signed-off-by: default avatarKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
parent d62d8600
Loading
Loading
Loading
Loading
+92 −110
Original line number Diff line number Diff line
@@ -123,60 +123,60 @@ module_param(log_stats, int, 0644);
/* Number of free pages to remove on each call to gnttab_free_pages */
#define NUM_BATCH_FREE_PAGES 10

static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
{
	unsigned long flags;

	spin_lock_irqsave(&blkif->free_pages_lock, flags);
	if (list_empty(&blkif->free_pages)) {
		BUG_ON(blkif->free_pages_num != 0);
		spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
	spin_lock_irqsave(&ring->free_pages_lock, flags);
	if (list_empty(&ring->free_pages)) {
		BUG_ON(ring->free_pages_num != 0);
		spin_unlock_irqrestore(&ring->free_pages_lock, flags);
		return gnttab_alloc_pages(1, page);
	}
	BUG_ON(blkif->free_pages_num == 0);
	page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
	BUG_ON(ring->free_pages_num == 0);
	page[0] = list_first_entry(&ring->free_pages, struct page, lru);
	list_del(&page[0]->lru);
	blkif->free_pages_num--;
	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
	ring->free_pages_num--;
	spin_unlock_irqrestore(&ring->free_pages_lock, flags);

	return 0;
}

static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page,
                                  int num)
{
	unsigned long flags;
	int i;

	spin_lock_irqsave(&blkif->free_pages_lock, flags);
	spin_lock_irqsave(&ring->free_pages_lock, flags);
	for (i = 0; i < num; i++)
		list_add(&page[i]->lru, &blkif->free_pages);
	blkif->free_pages_num += num;
	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
		list_add(&page[i]->lru, &ring->free_pages);
	ring->free_pages_num += num;
	spin_unlock_irqrestore(&ring->free_pages_lock, flags);
}

static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
{
	/* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
	struct page *page[NUM_BATCH_FREE_PAGES];
	unsigned int num_pages = 0;
	unsigned long flags;

	spin_lock_irqsave(&blkif->free_pages_lock, flags);
	while (blkif->free_pages_num > num) {
		BUG_ON(list_empty(&blkif->free_pages));
		page[num_pages] = list_first_entry(&blkif->free_pages,
	spin_lock_irqsave(&ring->free_pages_lock, flags);
	while (ring->free_pages_num > num) {
		BUG_ON(list_empty(&ring->free_pages));
		page[num_pages] = list_first_entry(&ring->free_pages,
		                                   struct page, lru);
		list_del(&page[num_pages]->lru);
		blkif->free_pages_num--;
		ring->free_pages_num--;
		if (++num_pages == NUM_BATCH_FREE_PAGES) {
			spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
			spin_unlock_irqrestore(&ring->free_pages_lock, flags);
			gnttab_free_pages(num_pages, page);
			spin_lock_irqsave(&blkif->free_pages_lock, flags);
			spin_lock_irqsave(&ring->free_pages_lock, flags);
			num_pages = 0;
		}
	}
	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
	spin_unlock_irqrestore(&ring->free_pages_lock, flags);
	if (num_pages != 0)
		gnttab_free_pages(num_pages, page);
}
@@ -199,23 +199,29 @@ static void make_response(struct xen_blkif_ring *ring, u64 id,


/*
 * pers_gnts_lock must be used around all the persistent grant helpers
 * because blkback may use multi-thread/queue for each backend.
 * We don't need locking around the persistent grant helpers
 * because blkback uses a single-thread for each backend, so we
 * can be sure that this functions will never be called recursively.
 *
 * The only exception to that is put_persistent_grant, that can be called
 * from interrupt context (by xen_blkbk_unmap), so we have to use atomic
 * bit operations to modify the flags of a persistent grant and to count
 * the number of used grants.
 */
static int add_persistent_gnt(struct xen_blkif *blkif,
static int add_persistent_gnt(struct xen_blkif_ring *ring,
			       struct persistent_gnt *persistent_gnt)
{
	struct rb_node **new = NULL, *parent = NULL;
	struct persistent_gnt *this;
	struct xen_blkif *blkif = ring->blkif;

	BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
	if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
	if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
		if (!blkif->vbd.overflow_max_grants)
			blkif->vbd.overflow_max_grants = 1;
		return -EBUSY;
	}
	/* Figure out where to put new node */
	new = &blkif->persistent_gnts.rb_node;
	new = &ring->persistent_gnts.rb_node;
	while (*new) {
		this = container_of(*new, struct persistent_gnt, node);

@@ -234,20 +240,19 @@ static int add_persistent_gnt(struct xen_blkif *blkif,
	set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
	/* Add new node and rebalance tree. */
	rb_link_node(&(persistent_gnt->node), parent, new);
	rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
	blkif->persistent_gnt_c++;
	atomic_inc(&blkif->persistent_gnt_in_use);
	rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
	ring->persistent_gnt_c++;
	atomic_inc(&ring->persistent_gnt_in_use);
	return 0;
}

static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
						 grant_ref_t gref)
{
	struct persistent_gnt *data;
	struct rb_node *node = NULL;

	BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
	node = blkif->persistent_gnts.rb_node;
	node = ring->persistent_gnts.rb_node;
	while (node) {
		data = container_of(node, struct persistent_gnt, node);

@@ -261,25 +266,24 @@ static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
				return NULL;
			}
			set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
			atomic_inc(&blkif->persistent_gnt_in_use);
			atomic_inc(&ring->persistent_gnt_in_use);
			return data;
		}
	}
	return NULL;
}

static void put_persistent_gnt(struct xen_blkif *blkif,
static void put_persistent_gnt(struct xen_blkif_ring *ring,
                               struct persistent_gnt *persistent_gnt)
{
	BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
	if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
		pr_alert_ratelimited("freeing a grant already unused\n");
	set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
	clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
	atomic_dec(&blkif->persistent_gnt_in_use);
	atomic_dec(&ring->persistent_gnt_in_use);
}

static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *root,
                                 unsigned int num)
{
	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -293,7 +297,6 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
	unmap_data.unmap_ops = unmap;
	unmap_data.kunmap_ops = NULL;

	BUG_ON(!spin_is_locked(&blkif->pers_gnts_lock));
	foreach_grant_safe(persistent_gnt, n, root, node) {
		BUG_ON(persistent_gnt->handle ==
			BLKBACK_INVALID_HANDLE);
@@ -311,7 +314,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
			unmap_data.count = segs_to_unmap;
			BUG_ON(gnttab_unmap_refs_sync(&unmap_data));

			put_free_pages(blkif, pages, segs_to_unmap);
			put_free_pages(ring, pages, segs_to_unmap);
			segs_to_unmap = 0;
		}

@@ -328,17 +331,15 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
	struct persistent_gnt *persistent_gnt;
	int segs_to_unmap = 0;
	struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
	struct xen_blkif_ring *ring = container_of(work, typeof(*ring), persistent_purge_work);
	struct gntab_unmap_queue_data unmap_data;
	unsigned long flags;

	unmap_data.pages = pages;
	unmap_data.unmap_ops = unmap;
	unmap_data.kunmap_ops = NULL;

	spin_lock_irqsave(&blkif->pers_gnts_lock, flags);
	while(!list_empty(&blkif->persistent_purge_list)) {
		persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
	while(!list_empty(&ring->persistent_purge_list)) {
		persistent_gnt = list_first_entry(&ring->persistent_purge_list,
		                                  struct persistent_gnt,
		                                  remove_node);
		list_del(&persistent_gnt->remove_node);
@@ -353,45 +354,42 @@ void xen_blkbk_unmap_purged_grants(struct work_struct *work)
		if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
			unmap_data.count = segs_to_unmap;
			BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
			put_free_pages(blkif, pages, segs_to_unmap);
			put_free_pages(ring, pages, segs_to_unmap);
			segs_to_unmap = 0;
		}
		kfree(persistent_gnt);
	}
	spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
	if (segs_to_unmap > 0) {
		unmap_data.count = segs_to_unmap;
		BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
		put_free_pages(blkif, pages, segs_to_unmap);
		put_free_pages(ring, pages, segs_to_unmap);
	}
}

static void purge_persistent_gnt(struct xen_blkif *blkif)
static void purge_persistent_gnt(struct xen_blkif_ring *ring)
{
	struct persistent_gnt *persistent_gnt;
	struct rb_node *n;
	unsigned int num_clean, total;
	bool scan_used = false, clean_used = false;
	struct rb_root *root;
	unsigned long flags;

	spin_lock_irqsave(&blkif->pers_gnts_lock, flags);
	if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
	    (blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
	    !blkif->vbd.overflow_max_grants)) {
	if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
	    (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
	    !ring->blkif->vbd.overflow_max_grants)) {
		goto out;
	}

	if (work_busy(&blkif->persistent_purge_work)) {
	if (work_busy(&ring->persistent_purge_work)) {
		pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
		goto out;
	}

	num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
	num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
	num_clean = min(blkif->persistent_gnt_c, num_clean);
	num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
	num_clean = min(ring->persistent_gnt_c, num_clean);
	if ((num_clean == 0) ||
	    (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use))))
	    (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use))))
		goto out;

	/*
@@ -407,8 +405,8 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)

	pr_debug("Going to purge %u persistent grants\n", num_clean);

	BUG_ON(!list_empty(&blkif->persistent_purge_list));
	root = &blkif->persistent_gnts;
	BUG_ON(!list_empty(&ring->persistent_purge_list));
	root = &ring->persistent_gnts;
purge_list:
	foreach_grant_safe(persistent_gnt, n, root, node) {
		BUG_ON(persistent_gnt->handle ==
@@ -427,7 +425,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)

		rb_erase(&persistent_gnt->node, root);
		list_add(&persistent_gnt->remove_node,
		         &blkif->persistent_purge_list);
			 &ring->persistent_purge_list);
		if (--num_clean == 0)
			goto finished;
	}
@@ -448,18 +446,14 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
		goto purge_list;
	}

	blkif->persistent_gnt_c -= (total - num_clean);
	spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
	blkif->vbd.overflow_max_grants = 0;
	ring->persistent_gnt_c -= (total - num_clean);
	ring->blkif->vbd.overflow_max_grants = 0;

	/* We can defer this work */
	schedule_work(&blkif->persistent_purge_work);
	schedule_work(&ring->persistent_purge_work);
	pr_debug("Purged %u/%u\n", (total - num_clean), total);
	return;

out:
	spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);

	return;
}

@@ -591,14 +585,16 @@ irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
 * SCHEDULER FUNCTIONS
 */

static void print_stats(struct xen_blkif *blkif)
static void print_stats(struct xen_blkif_ring *ring)
{
	struct xen_blkif *blkif = ring->blkif;

	pr_info("(%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu"
		 "  |  ds %4llu | pg: %4u/%4d\n",
		 current->comm, blkif->st_oo_req,
		 blkif->st_rd_req, blkif->st_wr_req,
		 blkif->st_f_req, blkif->st_ds_req,
		 blkif->persistent_gnt_c,
		 ring->persistent_gnt_c,
		 xen_blkif_max_pgrants);
	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
	blkif->st_rd_req = 0;
@@ -651,23 +647,23 @@ int xen_blkif_schedule(void *arg)

purge_gnt_list:
		if (blkif->vbd.feature_gnt_persistent &&
		    time_after(jiffies, blkif->next_lru)) {
			purge_persistent_gnt(blkif);
			blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
		    time_after(jiffies, ring->next_lru)) {
			purge_persistent_gnt(ring);
			ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
		}

		/* Shrink if we have more than xen_blkif_max_buffer_pages */
		shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
		shrink_free_pagepool(ring, xen_blkif_max_buffer_pages);

		if (log_stats && time_after(jiffies, blkif->st_print))
			print_stats(blkif);
		if (log_stats && time_after(jiffies, ring->blkif->st_print))
			print_stats(ring);
	}

	/* Drain pending purge work */
	flush_work(&blkif->persistent_purge_work);
	flush_work(&ring->persistent_purge_work);

	if (log_stats)
		print_stats(blkif);
		print_stats(ring);

	ring->xenblkd = NULL;
	xen_blkif_put(blkif);
@@ -680,21 +676,16 @@ int xen_blkif_schedule(void *arg)
 */
void xen_blkbk_free_caches(struct xen_blkif_ring *ring)
{
	struct xen_blkif *blkif = ring->blkif;
	unsigned long flags;

	/* Free all persistent grant pages */
	spin_lock_irqsave(&blkif->pers_gnts_lock, flags);
	if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
		free_persistent_gnts(blkif, &blkif->persistent_gnts,
			blkif->persistent_gnt_c);
	if (!RB_EMPTY_ROOT(&ring->persistent_gnts))
		free_persistent_gnts(ring, &ring->persistent_gnts,
			ring->persistent_gnt_c);

	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
	blkif->persistent_gnt_c = 0;
	spin_unlock_irqrestore(&blkif->pers_gnts_lock, flags);
	BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
	ring->persistent_gnt_c = 0;

	/* Since we are shutting down remove all pages from the buffer */
	shrink_free_pagepool(blkif, 0 /* All */);
	shrink_free_pagepool(ring, 0 /* All */);
}

static unsigned int xen_blkbk_unmap_prepare(
@@ -705,13 +696,10 @@ static unsigned int xen_blkbk_unmap_prepare(
	struct page **unmap_pages)
{
	unsigned int i, invcount = 0;
	unsigned long flags;

	for (i = 0; i < num; i++) {
		if (pages[i]->persistent_gnt != NULL) {
			spin_lock_irqsave(&ring->blkif->pers_gnts_lock, flags);
			put_persistent_gnt(ring->blkif, pages[i]->persistent_gnt);
			spin_unlock_irqrestore(&ring->blkif->pers_gnts_lock, flags);
			put_persistent_gnt(ring, pages[i]->persistent_gnt);
			continue;
		}
		if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
@@ -736,7 +724,7 @@ static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_
	   but is this the best way to deal with this? */
	BUG_ON(result);

	put_free_pages(blkif, data->pages, data->count);
	put_free_pages(ring, data->pages, data->count);
	make_response(ring, pending_req->id,
		      pending_req->operation, pending_req->status);
	free_req(ring, pending_req);
@@ -803,7 +791,7 @@ static void xen_blkbk_unmap(struct xen_blkif_ring *ring,
		if (invcount) {
			ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
			BUG_ON(ret);
			put_free_pages(ring->blkif, unmap_pages, invcount);
			put_free_pages(ring, unmap_pages, invcount);
		}
		pages += batch;
		num -= batch;
@@ -824,7 +812,6 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
	int last_map = 0, map_until = 0;
	int use_persistent_gnts;
	struct xen_blkif *blkif = ring->blkif;
	unsigned long irq_flags;

	use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);

@@ -838,11 +825,9 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
		uint32_t flags;

		if (use_persistent_gnts) {
			spin_lock_irqsave(&blkif->pers_gnts_lock, irq_flags);
			persistent_gnt = get_persistent_gnt(
				blkif,
				ring,
				pages[i]->gref);
			spin_unlock_irqrestore(&blkif->pers_gnts_lock, irq_flags);
		}

		if (persistent_gnt) {
@@ -853,7 +838,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
			pages[i]->page = persistent_gnt->page;
			pages[i]->persistent_gnt = persistent_gnt;
		} else {
			if (get_free_page(blkif, &pages[i]->page))
			if (get_free_page(ring, &pages[i]->page))
				goto out_of_memory;
			addr = vaddr(pages[i]->page);
			pages_to_gnt[segs_to_map] = pages[i]->page;
@@ -886,7 +871,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
			BUG_ON(new_map_idx >= segs_to_map);
			if (unlikely(map[new_map_idx].status != 0)) {
				pr_debug("invalid buffer -- could not remap it\n");
				put_free_pages(blkif, &pages[seg_idx]->page, 1);
				put_free_pages(ring, &pages[seg_idx]->page, 1);
				pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
				ret |= 1;
				goto next;
@@ -896,7 +881,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
			continue;
		}
		if (use_persistent_gnts &&
		    blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
		    ring->persistent_gnt_c < xen_blkif_max_pgrants) {
			/*
			 * We are using persistent grants, the grant is
			 * not mapped but we might have room for it.
@@ -914,19 +899,16 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
			persistent_gnt->gnt = map[new_map_idx].ref;
			persistent_gnt->handle = map[new_map_idx].handle;
			persistent_gnt->page = pages[seg_idx]->page;
			spin_lock_irqsave(&blkif->pers_gnts_lock, irq_flags);
			if (add_persistent_gnt(blkif,
			if (add_persistent_gnt(ring,
			                       persistent_gnt)) {
				spin_unlock_irqrestore(&blkif->pers_gnts_lock, irq_flags);
				kfree(persistent_gnt);
				persistent_gnt = NULL;
				goto next;
			}
			pages[seg_idx]->persistent_gnt = persistent_gnt;
			pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n",
				 persistent_gnt->gnt, blkif->persistent_gnt_c,
				 persistent_gnt->gnt, ring->persistent_gnt_c,
				 xen_blkif_max_pgrants);
			spin_unlock_irqrestore(&blkif->pers_gnts_lock, irq_flags);
			goto next;
		}
		if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
@@ -950,7 +932,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,

out_of_memory:
	pr_alert("%s: out of memory\n", __func__);
	put_free_pages(blkif, pages_to_gnt, segs_to_map);
	put_free_pages(ring, pages_to_gnt, segs_to_map);
	return -ENOMEM;
}

+16 −16
Original line number Diff line number Diff line
@@ -291,6 +291,22 @@ struct xen_blkif_ring {
	spinlock_t		pending_free_lock;
	wait_queue_head_t	pending_free_wq;

	/* Tree to store persistent grants. */
	spinlock_t		pers_gnts_lock;
	struct rb_root		persistent_gnts;
	unsigned int		persistent_gnt_c;
	atomic_t		persistent_gnt_in_use;
	unsigned long           next_lru;

	/* Used by the kworker that offload work from the persistent purge. */
	struct list_head	persistent_purge_list;
	struct work_struct	persistent_purge_work;

	/* Buffer of free pages to map grant refs. */
	spinlock_t		free_pages_lock;
	int			free_pages_num;
	struct list_head	free_pages;

	struct work_struct	free_work;
	/* Thread shutdown wait queue. */
	wait_queue_head_t	shutdown_wq;
@@ -312,22 +328,6 @@ struct xen_blkif {
	struct completion	drain_complete;
	atomic_t		drain;

	/* tree to store persistent grants */
	spinlock_t		pers_gnts_lock;
	struct rb_root		persistent_gnts;
	unsigned int		persistent_gnt_c;
	atomic_t		persistent_gnt_in_use;
	unsigned long           next_lru;

	/* used by the kworker that offload work from the persistent purge */
	struct list_head	persistent_purge_list;
	struct work_struct	persistent_purge_work;

	/* buffer of free pages to map grant refs */
	spinlock_t		free_pages_lock;
	int			free_pages_num;
	struct list_head	free_pages;

	/* statistics */
	unsigned long		st_print;
	unsigned long long			st_rd_req;
+10 −11
Original line number Diff line number Diff line
@@ -150,6 +150,10 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
		spin_lock_init(&ring->blk_ring_lock);
		init_waitqueue_head(&ring->wq);
		INIT_LIST_HEAD(&ring->pending_free);
		INIT_LIST_HEAD(&ring->persistent_purge_list);
		INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
		spin_lock_init(&ring->free_pages_lock);
		INIT_LIST_HEAD(&ring->free_pages);

		spin_lock_init(&ring->pending_free_lock);
		init_waitqueue_head(&ring->pending_free_wq);
@@ -175,11 +179,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
	atomic_set(&blkif->refcnt, 1);
	init_completion(&blkif->drain_complete);
	INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
	spin_lock_init(&blkif->free_pages_lock);
	INIT_LIST_HEAD(&blkif->free_pages);
	INIT_LIST_HEAD(&blkif->persistent_purge_list);
	blkif->st_print = jiffies;
	INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);

	return blkif;
}
@@ -290,6 +290,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
			i++;
		}

		BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
		BUG_ON(!list_empty(&ring->persistent_purge_list));
		BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
		BUG_ON(!list_empty(&ring->free_pages));
		BUG_ON(ring->free_pages_num != 0);
		BUG_ON(ring->persistent_gnt_c != 0);
		WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
	}
	blkif->nr_ring_pages = 0;
@@ -304,13 +310,6 @@ static void xen_blkif_free(struct xen_blkif *blkif)
	xen_vbd_free(&blkif->vbd);

	/* Make sure everything is drained before shutting down */
	BUG_ON(blkif->persistent_gnt_c != 0);
	BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
	BUG_ON(blkif->free_pages_num != 0);
	BUG_ON(!list_empty(&blkif->persistent_purge_list));
	BUG_ON(!list_empty(&blkif->free_pages));
	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));

	kfree(blkif->rings);
	kmem_cache_free(xen_blkif_cachep, blkif);
}