Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit fb22c402 authored by Philipp Reisner's avatar Philipp Reisner
Browse files

drbd: Track the reasons to suspend IO in dedicated state bits



There are three ways to get IO suspended:

 * Loss of any access to data
 * Fence-peer-handler running
 * User requested to suspend IO

Track those in different bits, so that one condition clearing its
state bit does not interfere with the other two conditions.

Only when the user resumes IO he overrules all three bits.

The fact is hidden from the user, he sees only a single suspend
bit.

Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent 78db8928
Loading
Loading
Loading
Loading
+8 −1
Original line number Original line Diff line number Diff line
@@ -1681,6 +1681,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
#define susp_MASK 1
#define susp_MASK 1
#define user_isp_MASK 1
#define user_isp_MASK 1
#define aftr_isp_MASK 1
#define aftr_isp_MASK 1
#define susp_nod_MASK 1
#define susp_fen_MASK 1


#define NS(T, S) \
#define NS(T, S) \
	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
@@ -2254,11 +2256,16 @@ static inline int drbd_state_is_stable(union drbd_state s)
	return 1;
	return 1;
}
}


static inline int is_susp(union drbd_state s)
{
	return s.susp || s.susp_nod || s.susp_fen;
}

static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
{
{
	int mxb = drbd_get_max_buffers(mdev);
	int mxb = drbd_get_max_buffers(mdev);


	if (mdev->state.susp)
	if (is_susp(mdev->state))
		return 0;
		return 0;
	if (test_bit(SUSPEND_IO, &mdev->flags))
	if (test_bit(SUSPEND_IO, &mdev->flags))
		return 0;
		return 0;
+23 −13
Original line number Original line Diff line number Diff line
@@ -654,7 +654,7 @@ static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
	    drbd_role_str(ns.peer),
	    drbd_role_str(ns.peer),
	    drbd_disk_str(ns.disk),
	    drbd_disk_str(ns.disk),
	    drbd_disk_str(ns.pdsk),
	    drbd_disk_str(ns.pdsk),
	    ns.susp ? 's' : 'r',
	    is_susp(ns) ? 's' : 'r',
	    ns.aftr_isp ? 'a' : '-',
	    ns.aftr_isp ? 'a' : '-',
	    ns.peer_isp ? 'p' : '-',
	    ns.peer_isp ? 'p' : '-',
	    ns.user_isp ? 'u' : '-'
	    ns.user_isp ? 'u' : '-'
@@ -925,12 +925,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
	if (fp == FP_STONITH &&
	if (fp == FP_STONITH &&
	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
	    !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
	    !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
		ns.susp = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
		ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */


	if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
	if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
	    !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
	    !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
		ns.susp = 1; /* Suspend IO while no data available (no accessible data available) */
		ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */


	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
		if (ns.conn == C_SYNC_SOURCE)
		if (ns.conn == C_SYNC_SOURCE)
@@ -1030,7 +1030,10 @@ int __drbd_set_state(struct drbd_conf *mdev,
		PSC(conn);
		PSC(conn);
		PSC(disk);
		PSC(disk);
		PSC(pdsk);
		PSC(pdsk);
		PSC(susp);
		if (is_susp(ns) != is_susp(os))
			pbp += sprintf(pbp, "susp( %s -> %s ) ",
				       drbd_susp_str(is_susp(os)),
				       drbd_susp_str(is_susp(ns)));
		PSC(aftr_isp);
		PSC(aftr_isp);
		PSC(peer_isp);
		PSC(peer_isp);
		PSC(user_isp);
		PSC(user_isp);
@@ -1218,6 +1221,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
{
{
	enum drbd_fencing_p fp;
	enum drbd_fencing_p fp;
	enum drbd_req_event what = nothing;
	enum drbd_req_event what = nothing;
	union drbd_state nsm = (union drbd_state){ .i = -1 };


	if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
	if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
		clear_bit(CRASHED_PRIMARY, &mdev->flags);
		clear_bit(CRASHED_PRIMARY, &mdev->flags);
@@ -1241,19 +1245,21 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
	/* Here we have the actions that are performed after a
	/* Here we have the actions that are performed after a
	   state change. This function might sleep */
	   state change. This function might sleep */


	if (os.susp && ns.susp && mdev->sync_conf.on_no_data == OND_SUSPEND_IO) {
	nsm.i = -1;
	if (ns.susp_nod) {
		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
			if (ns.conn == C_CONNECTED)
			if (ns.conn == C_CONNECTED)
				what = resend;
				what = resend, nsm.susp_nod = 0;
			else /* ns.conn > C_CONNECTED */
			else /* ns.conn > C_CONNECTED */
				dev_err(DEV, "Unexpected Resynd going on!\n");
				dev_err(DEV, "Unexpected Resynd going on!\n");
		}
		}


		if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
		if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
			what = restart_frozen_disk_io;
			what = restart_frozen_disk_io, nsm.susp_nod = 0;

	}
	}


	if (fp == FP_STONITH && ns.susp) {
	if (ns.susp_fen) {
		/* case1: The outdate peer handler is successful: */
		/* case1: The outdate peer handler is successful: */
		if (os.pdsk > D_OUTDATED  && ns.pdsk <= D_OUTDATED) {
		if (os.pdsk > D_OUTDATED  && ns.pdsk <= D_OUTDATED) {
			tl_clear(mdev);
			tl_clear(mdev);
@@ -1263,20 +1269,22 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
				drbd_md_sync(mdev);
				drbd_md_sync(mdev);
			}
			}
			spin_lock_irq(&mdev->req_lock);
			spin_lock_irq(&mdev->req_lock);
			_drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL);
			_drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
			spin_unlock_irq(&mdev->req_lock);
			spin_unlock_irq(&mdev->req_lock);
		}
		}
		/* case2: The connection was established again: */
		/* case2: The connection was established again: */
		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
		if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
			clear_bit(NEW_CUR_UUID, &mdev->flags);
			clear_bit(NEW_CUR_UUID, &mdev->flags);
			what = resend;
			what = resend;
			nsm.susp_fen = 0;
		}
		}
	}
	}


	if (what != nothing) {
	if (what != nothing) {
		spin_lock_irq(&mdev->req_lock);
		spin_lock_irq(&mdev->req_lock);
		_tl_restart(mdev, what);
		_tl_restart(mdev, what);
		_drbd_set_state(_NS(mdev, susp, 0), CS_VERBOSE, NULL);
		nsm.i &= mdev->state.i;
		_drbd_set_state(mdev, nsm, CS_VERBOSE, NULL);
		spin_unlock_irq(&mdev->req_lock);
		spin_unlock_irq(&mdev->req_lock);
	}
	}


@@ -1298,7 +1306,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
		if (get_ldev(mdev)) {
		if (get_ldev(mdev)) {
			if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
			if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
			    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
			    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
				if (mdev->state.susp) {
				if (is_susp(mdev->state)) {
					set_bit(NEW_CUR_UUID, &mdev->flags);
					set_bit(NEW_CUR_UUID, &mdev->flags);
				} else {
				} else {
					drbd_uuid_new_current(mdev);
					drbd_uuid_new_current(mdev);
@@ -1417,7 +1425,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
		resume_next_sg(mdev);
		resume_next_sg(mdev);


	/* free tl_hash if we Got thawed and are C_STANDALONE */
	/* free tl_hash if we Got thawed and are C_STANDALONE */
	if (ns.conn == C_STANDALONE && ns.susp == 0 && mdev->tl_hash)
	if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
		drbd_free_tl_hash(mdev);
		drbd_free_tl_hash(mdev);


	/* Upon network connection, we need to start the receiver */
	/* Upon network connection, we need to start the receiver */
@@ -2732,7 +2740,9 @@ static void drbd_set_defaults(struct drbd_conf *mdev)
		  .conn = C_STANDALONE,
		  .conn = C_STANDALONE,
		  .disk = D_DISKLESS,
		  .disk = D_DISKLESS,
		  .pdsk = D_UNKNOWN,
		  .pdsk = D_UNKNOWN,
		  .susp = 0
		  .susp = 0,
		  .susp_nod = 0,
		  .susp_fen = 0
		} };
		} };
}
}


+14 −6
Original line number Original line Diff line number Diff line
@@ -209,7 +209,8 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
		put_ldev(mdev);
		put_ldev(mdev);
	} else {
	} else {
		dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
		dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
		return mdev->state.pdsk;
		nps = mdev->state.pdsk;
		goto out;
	}
	}


	r = drbd_khelper(mdev, "fence-peer");
	r = drbd_khelper(mdev, "fence-peer");
@@ -256,6 +257,14 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)


	dev_info(DEV, "fence-peer helper returned %d (%s)\n",
	dev_info(DEV, "fence-peer helper returned %d (%s)\n",
			(r>>8) & 0xff, ex_to_string);
			(r>>8) & 0xff, ex_to_string);

out:
	if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
		/* The handler was not successful... unfreeze here, the
		   state engine can not unfreeze... */
		_drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
	}

	return nps;
	return nps;
}
}


@@ -550,7 +559,7 @@ char *ppsize(char *buf, unsigned long long size)
void drbd_suspend_io(struct drbd_conf *mdev)
void drbd_suspend_io(struct drbd_conf *mdev)
{
{
	set_bit(SUSPEND_IO, &mdev->flags);
	set_bit(SUSPEND_IO, &mdev->flags);
	if (mdev->state.susp)
	if (is_susp(mdev->state))
		return;
		return;
	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
}
}
@@ -1016,7 +1025,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp


	drbd_suspend_io(mdev);
	drbd_suspend_io(mdev);
	/* also wait for the last barrier ack. */
	/* also wait for the last barrier ack. */
	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || mdev->state.susp);
	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state));
	/* and for any other previously queued work */
	/* and for any other previously queued work */
	drbd_flush_workqueue(mdev);
	drbd_flush_workqueue(mdev);


@@ -1114,8 +1123,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
		clear_bit(CRASHED_PRIMARY, &mdev->flags);
		clear_bit(CRASHED_PRIMARY, &mdev->flags);


	if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
	if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
	    !(mdev->state.role == R_PRIMARY && mdev->state.susp &&
	    !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) {
	      mdev->sync_conf.on_no_data == OND_SUSPEND_IO)) {
		set_bit(CRASHED_PRIMARY, &mdev->flags);
		set_bit(CRASHED_PRIMARY, &mdev->flags);
		cp_discovered = 1;
		cp_discovered = 1;
	}
	}
@@ -1939,7 +1947,7 @@ static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
		drbd_md_sync(mdev);
		drbd_md_sync(mdev);
	}
	}
	drbd_suspend_io(mdev);
	drbd_suspend_io(mdev);
	reply->ret_code = drbd_request_state(mdev, NS(susp, 0));
	reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
	if (reply->ret_code == SS_SUCCESS) {
	if (reply->ret_code == SS_SUCCESS) {
		if (mdev->state.conn < C_CONNECTED)
		if (mdev->state.conn < C_CONNECTED)
			tl_clear(mdev);
			tl_clear(mdev);
+1 −1
Original line number Original line Diff line number Diff line
@@ -213,7 +213,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
			   drbd_disk_str(mdev->state.pdsk),
			   drbd_disk_str(mdev->state.pdsk),
			   (mdev->net_conf == NULL ? ' ' :
			   (mdev->net_conf == NULL ? ' ' :
			    (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
			    (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
			   mdev->state.susp ? 's' : 'r',
			   is_susp(mdev->state) ? 's' : 'r',
			   mdev->state.aftr_isp ? 'a' : '-',
			   mdev->state.aftr_isp ? 'a' : '-',
			   mdev->state.peer_isp ? 'p' : '-',
			   mdev->state.peer_isp ? 'p' : '-',
			   mdev->state.user_isp ? 'u' : '-',
			   mdev->state.user_isp ? 'u' : '-',
+3 −3
Original line number Original line Diff line number Diff line
@@ -3315,7 +3315,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
	if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
	if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
		ns.disk = mdev->new_state_tmp.disk;
		ns.disk = mdev->new_state_tmp.disk;
	cs_flags = CS_VERBOSE + (oconn < C_CONNECTED && nconn >= C_CONNECTED ? 0 : CS_HARD);
	cs_flags = CS_VERBOSE + (oconn < C_CONNECTED && nconn >= C_CONNECTED ? 0 : CS_HARD);
	if (ns.pdsk == D_CONSISTENT && ns.susp && nconn == C_CONNECTED && oconn < C_CONNECTED &&
	if (ns.pdsk == D_CONSISTENT && is_susp(ns) && nconn == C_CONNECTED && oconn < C_CONNECTED &&
	    test_bit(NEW_CUR_UUID, &mdev->flags)) {
	    test_bit(NEW_CUR_UUID, &mdev->flags)) {
		/* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this
		/* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this
		   for temporal network outages! */
		   for temporal network outages! */
@@ -3829,7 +3829,7 @@ static void drbd_disconnect(struct drbd_conf *mdev)
	kfree(mdev->p_uuid);
	kfree(mdev->p_uuid);
	mdev->p_uuid = NULL;
	mdev->p_uuid = NULL;


	if (!mdev->state.susp)
	if (!is_susp(mdev->state))
		tl_clear(mdev);
		tl_clear(mdev);


	dev_info(DEV, "Connection closed\n");
	dev_info(DEV, "Connection closed\n");
@@ -3858,7 +3858,7 @@ static void drbd_disconnect(struct drbd_conf *mdev)
	if (os.conn == C_DISCONNECTING) {
	if (os.conn == C_DISCONNECTING) {
		wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0);
		wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0);


		if (!mdev->state.susp) {
		if (!is_susp(mdev->state)) {
			/* we must not free the tl_hash
			/* we must not free the tl_hash
			 * while application io is still on the fly */
			 * while application io is still on the fly */
			wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
			wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
Loading