Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 26b69a33 authored by Ilya Dryomov's avatar Ilya Dryomov Committed by Greg Kroah-Hartman
Browse files

rbd: avoid a deadlock on header_rwsem when flushing notifies



[ Upstream commit 0e4e1de5b63fa423b13593337a27fd2d2b0bcf77 ]

rbd_unregister_watch() flushes notifies and therefore cannot be called
under header_rwsem because a header update notify takes header_rwsem to
synchronize with "rbd map".  If mapping an image fails after the watch
is established and a header update notify sneaks in, we deadlock when
erroring out from rbd_dev_image_probe().

Move watch registration and unregistration out of the critical section.
The only reason they were put there was to make header_rwsem management
slightly more obvious.

Fixes: 811c6688 ("rbd: fix rbd map vs notify races")
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
Reviewed-by: default avatarJason Dillaman <dillaman@redhat.com>
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
parent adff7c6c
Loading
Loading
Loading
Loading
+13 −4
Original line number Diff line number Diff line
@@ -3427,6 +3427,10 @@ static void cancel_tasks_sync(struct rbd_device *rbd_dev)
	cancel_work_sync(&rbd_dev->unlock_work);
}

/*
 * header_rwsem must not be held to avoid a deadlock with
 * rbd_dev_refresh() when flushing notifies.
 */
static void rbd_unregister_watch(struct rbd_device *rbd_dev)
{
	WARN_ON(waitqueue_active(&rbd_dev->lock_waitq));
@@ -5732,6 +5736,9 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
 * device.  If this image is the one being mapped (i.e., not a
 * parent), initiate a watch on its header object before using that
 * object to get detailed information about the rbd image.
 *
 * On success, returns with header_rwsem held for write if called
 * with @depth == 0.
 */
static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
{
@@ -5764,6 +5771,9 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
		}
	}

	if (!depth)
		down_write(&rbd_dev->header_rwsem);

	ret = rbd_dev_header_info(rbd_dev);
	if (ret)
		goto err_out_watch;
@@ -5814,6 +5824,8 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
err_out_probe:
	rbd_dev_unprobe(rbd_dev);
err_out_watch:
	if (!depth)
		up_write(&rbd_dev->header_rwsem);
	if (!depth)
		rbd_unregister_watch(rbd_dev);
err_out_format:
@@ -5872,12 +5884,9 @@ static ssize_t do_rbd_add(struct bus_type *bus,
		goto err_out_rbd_dev;
	}

	down_write(&rbd_dev->header_rwsem);
	rc = rbd_dev_image_probe(rbd_dev, 0);
	if (rc < 0) {
		up_write(&rbd_dev->header_rwsem);
	if (rc < 0)
		goto err_out_rbd_dev;
	}

	/* If we are mapping a snapshot it must be marked read-only */
	if (rbd_dev->spec->snap_id != CEPH_NOSNAP)