Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 33d32fa7 authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Jens Axboe
Browse files

drbd: fix potential deadlock when trying to detach during handshake



When requesting a detach, we first suspend IO, and also inhibit meta-data IO
by means of drbd_md_get_buffer(), because we don't want to "fail" the disk
while there is IO in-flight: the transition into D_FAILED for detach purposes
may get misinterpreted as actual IO error in a confused endio function.

We wrap it all into wait_event(), to retry in case the drbd_req_state()
returns SS_IN_TRANSIENT_STATE, as it does for example during an ongoing
connection handshake.

In that example, the receiver thread may need to grab drbd_md_get_buffer()
during the handshake to make progress.  To avoid potential deadlock with
detach, detach needs to grab and release the meta data buffer inside of
that wait_event retry loop. To avoid lock inversion between
mutex_lock(&device->state_mutex) and drbd_md_get_buffer(device),
introduce a new enum chg_state_flag CS_INHIBIT_MD_IO, and move the
call to drbd_md_get_buffer() inside the state_mutex grabbed in
drbd_req_state().

Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 427fd2be
Loading
Loading
Loading
Loading
+2 −23
Original line number Diff line number Diff line
@@ -2149,34 +2149,13 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)

static int adm_detach(struct drbd_device *device, int force)
{
	enum drbd_state_rv retcode;
	void *buffer;
	int ret;

	if (force) {
		set_bit(FORCE_DETACH, &device->flags);
		drbd_force_state(device, NS(disk, D_FAILED));
		retcode = SS_SUCCESS;
		goto out;
		return SS_SUCCESS;
	}

	drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
	buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
	if (buffer) {
		retcode = drbd_request_state(device, NS(disk, D_FAILED));
		drbd_md_put_buffer(device);
	} else /* already <= D_FAILED */
		retcode = SS_NOTHING_TO_DO;
	/* D_FAILED will transition to DISKLESS. */
	drbd_resume_io(device);
	ret = wait_event_interruptible(device->misc_wait,
			device->state.disk != D_FAILED);
	if ((int)retcode == (int)SS_IS_DISKLESS)
		retcode = SS_NOTHING_TO_DO;
	if (ret)
		retcode = ERR_INTR;
out:
	return retcode;
	return drbd_request_detach_interruptible(device);
}

/* Detaching the disk is a process in multiple stages.  First we need to lock
+46 −0
Original line number Diff line number Diff line
@@ -579,11 +579,14 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
	unsigned long flags;
	union drbd_state os, ns;
	enum drbd_state_rv rv;
	void *buffer = NULL;

	init_completion(&done);

	if (f & CS_SERIALIZE)
		mutex_lock(device->state_mutex);
	if (f & CS_INHIBIT_MD_IO)
		buffer = drbd_md_get_buffer(device, __func__);

	spin_lock_irqsave(&device->resource->req_lock, flags);
	os = drbd_read_state(device);
@@ -636,6 +639,8 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask,
	}

abort:
	if (buffer)
		drbd_md_put_buffer(device);
	if (f & CS_SERIALIZE)
		mutex_unlock(device->state_mutex);

@@ -664,6 +669,47 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask,
	return rv;
}

/*
 * We grab drbd_md_get_buffer(), because we don't want to "fail" the disk while
 * there is IO in-flight: the transition into D_FAILED for detach purposes
 * may get misinterpreted as actual IO error in a confused endio function.
 *
 * We wrap it all into wait_event(), to retry in case the drbd_req_state()
 * returns SS_IN_TRANSIENT_STATE.
 *
 * To avoid potential deadlock with e.g. the receiver thread trying to grab
 * drbd_md_get_buffer() while trying to get out of the "transient state", we
 * need to grab and release the meta data buffer inside of that wait_event loop.
 */
static enum drbd_state_rv
request_detach(struct drbd_device *device)
{
	return drbd_req_state(device, NS(disk, D_FAILED),
			CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO);
}

enum drbd_state_rv
drbd_request_detach_interruptible(struct drbd_device *device)
{
	enum drbd_state_rv rv;
	int ret;

	drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
	wait_event_interruptible(device->state_wait,
		(rv = request_detach(device)) != SS_IN_TRANSIENT_STATE);
	drbd_resume_io(device);

	ret = wait_event_interruptible(device->misc_wait,
			device->state.disk != D_FAILED);

	if (rv == SS_IS_DISKLESS)
		rv = SS_NOTHING_TO_DO;
	if (ret)
		rv = ERR_INTR;

	return rv;
}

enum drbd_state_rv
_drbd_request_state_holding_state_mutex(struct drbd_device *device, union drbd_state mask,
		    union drbd_state val, enum chg_state_flags f)
+8 −0
Original line number Diff line number Diff line
@@ -71,6 +71,10 @@ enum chg_state_flags {
	CS_DC_SUSP       = 1 << 10,
	CS_DC_MASK       = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK,
	CS_IGN_OUTD_FAIL = 1 << 11,

	/* Make sure no meta data IO is in flight, by calling
	 * drbd_md_get_buffer().  Used for graceful detach. */
	CS_INHIBIT_MD_IO = 1 << 12,
};

/* drbd_dev_state and drbd_state are different types. This is to stress the
@@ -156,6 +160,10 @@ static inline int drbd_request_state(struct drbd_device *device,
	return _drbd_request_state(device, mask, val, CS_VERBOSE + CS_ORDERED);
}

/* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */
enum drbd_state_rv
drbd_request_detach_interruptible(struct drbd_device *device);

enum drbd_role conn_highest_role(struct drbd_connection *connection);
enum drbd_role conn_highest_peer(struct drbd_connection *connection);
enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection);