Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 07be15b1 authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Philipp Reisner
Browse files

drbd: fix resend/resubmit of frozen IO



DRBD can freeze IO, due to fencing policy (fencing resource-and-stonith),
or because we lost access to data (on-no-data-accessible suspend-io).

Resuming from there (re-connect, or re-attach, or explicit admin
intervention) should "just work".

Unfortunately, if the re-attach/re-connect did not happen within
the timeout, since the commit

  drbd: Implemented real timeout checking for request processing time

if so configured, the request_timer_fn() would timeout and
detach/disconnect virtually immediately.

This change tracks the most recent attach and connect, and does not
timeout within <configured timeout interval> after attach/connect.

Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent 3ea35df8
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -859,6 +859,7 @@ struct drbd_tconn { /* is a resource from the config file */
	unsigned int epochs;
	enum write_ordering_e write_ordering;

	unsigned long last_reconnect_jif;
	struct drbd_thread receiver;
	struct drbd_thread worker;
	struct drbd_thread asender;
@@ -881,6 +882,7 @@ struct drbd_conf {
	struct block_device *this_bdev;
	struct gendisk	    *vdisk;

	unsigned long last_reattach_jif;
	struct drbd_work  resync_work,
			  unplug_work,
			  go_diskless,
+36 −15
Original line number Diff line number Diff line
@@ -1171,12 +1171,14 @@ void request_timer_fn(unsigned long data)
	struct list_head *le;
	struct net_conf *nc;
	unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
	unsigned long now;

	rcu_read_lock();
	nc = rcu_dereference(tconn->net_conf);
	ent = nc ? nc->timeout * HZ/10 * nc->ko_count : 0;
	if (nc && mdev->state.conn >= C_WF_REPORT_PARAMS)
		ent = nc->timeout * HZ/10 * nc->ko_count;

	if (get_ldev(mdev)) {
	if (get_ldev(mdev)) { /* implicit state.disk >= D_INCONSISTENT */
		dt = rcu_dereference(mdev->ldev->disk_conf)->disk_timeout * HZ / 10;
		put_ldev(mdev);
	}
@@ -1184,32 +1186,51 @@ void request_timer_fn(unsigned long data)

	et = min_not_zero(dt, ent);

	if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))
	if (!et)
		return; /* Recurring timer stopped */

	now = jiffies;

	spin_lock_irq(&tconn->req_lock);
	le = &tconn->oldest_tle->requests;
	if (list_empty(le)) {
		spin_unlock_irq(&tconn->req_lock);
		mod_timer(&mdev->request_timer, jiffies + et);
		mod_timer(&mdev->request_timer, now + et);
		return;
	}

	le = le->prev;
	req = list_entry(le, struct drbd_request, tl_requests);
	if (ent && req->rq_state & RQ_NET_PENDING) {
		if (time_is_before_eq_jiffies(req->start_time + ent)) {

	/* The request is considered timed out, if
	 * - we have some effective timeout from the configuration,
	 *   with above state restrictions applied,
	 * - the oldest request is waiting for a response from the network
	 *   resp. the local disk,
	 * - the oldest request is in fact older than the effective timeout,
	 * - the connection was established (resp. disk was attached)
	 *   for longer than the timeout already.
	 * Note that for 32bit jiffies and very stable connections/disks,
	 * we may have a wrap around, which is catched by
	 *   !time_in_range(now, last_..._jif, last_..._jif + timeout).
	 *
	 * Side effect: once per 32bit wrap-around interval, which means every
	 * ~198 days with 250 HZ, we have a window where the timeout would need
	 * to expire twice (worst case) to become effective. Good enough.
	 */
	if (ent && req->rq_state & RQ_NET_PENDING &&
		 time_after(now, req->start_time + ent) &&
		!time_in_range(now, tconn->last_reconnect_jif, tconn->last_reconnect_jif + ent)) {
		dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
		_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
	}
	}
	if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev) {
		if (time_is_before_eq_jiffies(req->start_time + dt)) {
	if (dt && req->rq_state & RQ_LOCAL_PENDING && req->w.mdev == mdev &&
		 time_after(now, req->start_time + dt) &&
		!time_in_range(now, mdev->last_reattach_jif, mdev->last_reattach_jif + dt)) {
		dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
		__drbd_chk_io_error(mdev, 1);
	}
	}
	nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et;
	nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et;
	spin_unlock_irq(&tconn->req_lock);
	mod_timer(&mdev->request_timer, nt);
}
+15 −1
Original line number Diff line number Diff line
@@ -1075,6 +1075,13 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
		drbd_resume_al(mdev);

	/* remember last attach time so request_timer_fn() won't
	 * kill newly established sessions while we are still trying to thaw
	 * previously frozen IO */
	if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
	    ns.disk > D_NEGOTIATING)
		mdev->last_reattach_jif = jiffies;

	ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
	if (ascw) {
		ascw->os = os;
@@ -1609,8 +1616,15 @@ conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state
	enum drbd_state_rv rv;
	int vnr, number_of_volumes = 0;

	if (mask.conn == C_MASK)
	if (mask.conn == C_MASK) {
		/* remember last connect time so request_timer_fn() won't
		 * kill newly established sessions while we are still trying to thaw
		 * previously frozen IO */
		if (tconn->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS)
			tconn->last_reconnect_jif = jiffies;

		tconn->cstate = val.conn;
	}

	rcu_read_lock();
	idr_for_each_entry(&tconn->volumes, mdev, vnr) {