Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 814d04e7 authored by Valentin Priescu's avatar Valentin Priescu Committed by Konrad Rzeszutek Wilk
Browse files

xen-blkback: defer freeing blkif to avoid blocking xenwatch



Currently xenwatch blocks in VBD disconnect, waiting for all pending I/O
requests to finish. If the VBD is attached to a hot-swappable disk, then
xenwatch can hang for a long period of time, stalling other watches.

 INFO: task xenwatch:39 blocked for more than 120 seconds.
 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 ffff880057f01bd0 0000000000000246 ffff880057f01ac0 ffffffff810b0782
 ffff880057f01ad0 00000000000131c0 0000000000000004 ffff880057edb040
 ffff8800344c6080 0000000000000000 ffff880058c00ba0 ffff880057edb040
 Call Trace:
 [<ffffffff810b0782>] ? irq_to_desc+0x12/0x20
 [<ffffffff8128f761>] ? list_del+0x11/0x40
 [<ffffffff8147a080>] ? wait_for_common+0x60/0x160
 [<ffffffff8147bcef>] ? _raw_spin_lock_irqsave+0x2f/0x50
 [<ffffffff8147bd49>] ? _raw_spin_unlock_irqrestore+0x19/0x20
 [<ffffffff8147a26a>] schedule+0x3a/0x60
 [<ffffffffa018fe6a>] xen_blkif_disconnect+0x8a/0x100 [xen_blkback]
 [<ffffffff81079f70>] ? wake_up_bit+0x40/0x40
 [<ffffffffa018ffce>] xen_blkbk_remove+0xae/0x1e0 [xen_blkback]
 [<ffffffff8130b254>] xenbus_dev_remove+0x44/0x90
 [<ffffffff81345cb7>] __device_release_driver+0x77/0xd0
 [<ffffffff81346488>] device_release_driver+0x28/0x40
 [<ffffffff813456e8>] bus_remove_device+0x78/0xe0
 [<ffffffff81342c9f>] device_del+0x12f/0x1a0
 [<ffffffff81342d2d>] device_unregister+0x1d/0x60
 [<ffffffffa0190826>] frontend_changed+0xa6/0x4d0 [xen_blkback]
 [<ffffffffa019c252>] ? frontend_changed+0x192/0x650 [xen_netback]
 [<ffffffff8130ae50>] ? cmp_dev+0x60/0x60
 [<ffffffff81344fe4>] ? bus_for_each_dev+0x94/0xa0
 [<ffffffff8130b06e>] xenbus_otherend_changed+0xbe/0x120
 [<ffffffff8130b4cb>] frontend_changed+0xb/0x10
 [<ffffffff81309c82>] xenwatch_thread+0xf2/0x130
 [<ffffffff81079f70>] ? wake_up_bit+0x40/0x40
 [<ffffffff81309b90>] ? xenbus_directory+0x80/0x80
 [<ffffffff810799d6>] kthread+0x96/0xa0
 [<ffffffff81485934>] kernel_thread_helper+0x4/0x10
 [<ffffffff814839f3>] ? int_ret_from_sys_call+0x7/0x1b
 [<ffffffff8147c17c>] ? retint_restore_args+0x5/0x6
 [<ffffffff81485930>] ? gs_change+0x13/0x13

With this patch, when there is still pending I/O, the actual disconnect
is done by the last reference holder (last pending I/O request). In this
case, xenwatch doesn't block indefinitely.

Signed-off-by: default avatarValentin Priescu <priescuv@amazon.com>
Reviewed-by: default avatarSteven Kady <stevkady@amazon.com>
Reviewed-by: default avatarSteven Noonan <snoonan@amazon.com>
Reviewed-by: default avatarDavid Vrabel <david.vrabel@citrix.com>
Signed-off-by: default avatarKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
parent 1c339ef7
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -314,7 +314,7 @@ struct xen_blkif {
	unsigned long long			st_rd_sect;
	unsigned long long			st_wr_sect;

	wait_queue_head_t	waiting_to_free;
	struct work_struct	free_work;
	/* Thread shutdown wait queue. */
	wait_queue_head_t	shutdown_wq;
};
@@ -361,7 +361,7 @@ struct pending_req {
#define xen_blkif_put(_b)				\
	do {						\
		if (atomic_dec_and_test(&(_b)->refcnt))	\
			wake_up(&(_b)->waiting_to_free);\
			schedule_work(&(_b)->free_work);\
	} while (0)

struct phys_req {
+34 −12
Original line number Diff line number Diff line
@@ -35,12 +35,26 @@ static void connect(struct backend_info *);
static int connect_ring(struct backend_info *);
static void backend_changed(struct xenbus_watch *, const char **,
			    unsigned int);
static void xen_blkif_free(struct xen_blkif *blkif);
static void xen_vbd_free(struct xen_vbd *vbd);

struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
{
	return be->dev;
}

/*
 * The last request could free the device from softirq context and
 * xen_blkif_free() can sleep.
 */
static void xen_blkif_deferred_free(struct work_struct *work)
{
	struct xen_blkif *blkif;

	blkif = container_of(work, struct xen_blkif, free_work);
	xen_blkif_free(blkif);
}

static int blkback_name(struct xen_blkif *blkif, char *buf)
{
	char *devpath, *devname;
@@ -121,7 +135,6 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
	init_completion(&blkif->drain_complete);
	atomic_set(&blkif->drain, 0);
	blkif->st_print = jiffies;
	init_waitqueue_head(&blkif->waiting_to_free);
	blkif->persistent_gnts.rb_node = NULL;
	spin_lock_init(&blkif->free_pages_lock);
	INIT_LIST_HEAD(&blkif->free_pages);
@@ -132,6 +145,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
	INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);

	INIT_LIST_HEAD(&blkif->pending_free);
	INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);

	for (i = 0; i < XEN_BLKIF_REQS; i++) {
		req = kzalloc(sizeof(*req), GFP_KERNEL);
@@ -231,7 +245,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
	return 0;
}

static void xen_blkif_disconnect(struct xen_blkif *blkif)
static int xen_blkif_disconnect(struct xen_blkif *blkif)
{
	if (blkif->xenblkd) {
		kthread_stop(blkif->xenblkd);
@@ -239,9 +253,12 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
		blkif->xenblkd = NULL;
	}

	atomic_dec(&blkif->refcnt);
	wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
	atomic_inc(&blkif->refcnt);
	/* The above kthread_stop() guarantees that at this point we
	 * don't have any discard_io or other_io requests. So, checking
	 * for inflight IO is enough.
	 */
	if (atomic_read(&blkif->inflight) > 0)
		return -EBUSY;

	if (blkif->irq) {
		unbind_from_irqhandler(blkif->irq, blkif);
@@ -252,6 +269,8 @@ static void xen_blkif_disconnect(struct xen_blkif *blkif)
		xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
		blkif->blk_rings.common.sring = NULL;
	}

	return 0;
}

static void xen_blkif_free(struct xen_blkif *blkif)
@@ -259,8 +278,8 @@ static void xen_blkif_free(struct xen_blkif *blkif)
	struct pending_req *req, *n;
	int i = 0, j;

	if (!atomic_dec_and_test(&blkif->refcnt))
		BUG();
	xen_blkif_disconnect(blkif);
	xen_vbd_free(&blkif->vbd);

	/* Remove all persistent grants and the cache of ballooned pages. */
	xen_blkbk_free_caches(blkif);
@@ -449,16 +468,15 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
		be->backend_watch.node = NULL;
	}

	dev_set_drvdata(&dev->dev, NULL);

	if (be->blkif) {
		xen_blkif_disconnect(be->blkif);
		xen_vbd_free(&be->blkif->vbd);
		xen_blkif_free(be->blkif);
		be->blkif = NULL;
		xen_blkif_put(be->blkif);
	}

	kfree(be->mode);
	kfree(be);
	dev_set_drvdata(&dev->dev, NULL);
	return 0;
}

@@ -705,7 +723,11 @@ static void frontend_changed(struct xenbus_device *dev,
		 * Enforce precondition before potential leak point.
		 * xen_blkif_disconnect() is idempotent.
		 */
		xen_blkif_disconnect(be->blkif);
		err = xen_blkif_disconnect(be->blkif);
		if (err) {
			xenbus_dev_fatal(dev, err, "pending I/O");
			break;
		}

		err = connect_ring(be);
		if (err)