Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit f8dfcffd authored by NeilBrown's avatar NeilBrown
Browse files

md/raid5: ensure sync and DISCARD don't happen at the same time.



A number of problems can occur due to races between
resync/recovery and discard.

- if sync_request calls handle_stripe() while a discard is
  happening on the stripe, it might call handle_stripe_clean_event
  before all of the individual discard requests have completed
  (so some devices are still locked, but not all).
  Since commit ca64cae9
     md/raid5: Make sure we clear R5_Discard when discard is finished.
  this will cause R5_Discard to be cleared for the parity device,
  so handle_stripe_clean_event() will not be called when the other
  devices do become unlocked, so their ->written will not be cleared.
  This ultimately leads to a WARN_ON in init_stripe and a lock-up.

- If handle_stripe_clean_event() does clear R5_UPTODATE at an awkward
  time for resync, it can lead to s->uptodate being less than disks
  in handle_parity_checks5(), which triggers a BUG (because it is
  one).

So:
 - keep R5_Discard on the parity device until all other devices have
   completed their discard request
 - make sure we don't try to have a 'discard' and a 'sync' action at
   the same time.
   This involves a new stripe flag to we know when a 'discard' is
   happening, and the use of R5_Overlap on the parity disk so when a
   discard is wanted while a sync is active, so we know to wake up
   the discard at the appropriate time.

Discard support for RAID5 was added in 3.7, so this is suitable for
any -stable kernel since 3.7.

Cc: stable@vger.kernel.org (v3.7+)
Reported-by: default avatarJes Sorensen <Jes.Sorensen@redhat.com>
Tested-by: default avatarJes Sorensen <Jes.Sorensen@redhat.com>
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 90584fc9
Loading
Loading
Loading
Loading
+39 −6
Original line number Diff line number Diff line
@@ -2576,6 +2576,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
	int i;

	clear_bit(STRIPE_SYNCING, &sh->state);
	if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
		wake_up(&conf->wait_for_overlap);
	s->syncing = 0;
	s->replacing = 0;
	/* There is nothing more to do for sync/check/repair.
@@ -2749,6 +2751,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
{
	int i;
	struct r5dev *dev;
	int discard_pending = 0;

	for (i = disks; i--; )
		if (sh->dev[i].written) {
@@ -2777,9 +2780,23 @@ static void handle_stripe_clean_event(struct r5conf *conf,
						STRIPE_SECTORS,
					 !test_bit(STRIPE_DEGRADED, &sh->state),
						0);
			} else if (test_bit(R5_Discard, &dev->flags))
				discard_pending = 1;
		}
	if (!discard_pending &&
	    test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
		clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
		clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
		if (sh->qd_idx >= 0) {
			clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
			clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags);
		}
		/* now that discard is done we can proceed with any sync */
		clear_bit(STRIPE_DISCARD, &sh->state);
		if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
			set_bit(STRIPE_HANDLE, &sh->state);

	}
		} else if (test_bit(R5_Discard, &sh->dev[i].flags))
			clear_bit(R5_Discard, &sh->dev[i].flags);

	if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
		if (atomic_dec_and_test(&conf->pending_full_writes))
@@ -3431,10 +3448,16 @@ static void handle_stripe(struct stripe_head *sh)
		return;
	}

	if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
	if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
		spin_lock(&sh->stripe_lock);
		/* Cannot process 'sync' concurrently with 'discard' */
		if (!test_bit(STRIPE_DISCARD, &sh->state) &&
		    test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
			set_bit(STRIPE_SYNCING, &sh->state);
			clear_bit(STRIPE_INSYNC, &sh->state);
		}
		spin_unlock(&sh->stripe_lock);
	}
	clear_bit(STRIPE_DELAYED, &sh->state);

	pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
@@ -3593,6 +3616,8 @@ static void handle_stripe(struct stripe_head *sh)
	    test_bit(STRIPE_INSYNC, &sh->state)) {
		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
		clear_bit(STRIPE_SYNCING, &sh->state);
		if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
			wake_up(&conf->wait_for_overlap);
	}

	/* If the failed drives are just a ReadError, then we might need
@@ -4159,6 +4184,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
		sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
		prepare_to_wait(&conf->wait_for_overlap, &w,
				TASK_UNINTERRUPTIBLE);
		set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
		if (test_bit(STRIPE_SYNCING, &sh->state)) {
			release_stripe(sh);
			schedule();
			goto again;
		}
		clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
		spin_lock_irq(&sh->stripe_lock);
		for (d = 0; d < conf->raid_disks; d++) {
			if (d == sh->pd_idx || d == sh->qd_idx)
@@ -4171,6 +4203,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
				goto again;
			}
		}
		set_bit(STRIPE_DISCARD, &sh->state);
		finish_wait(&conf->wait_for_overlap, &w);
		for (d = 0; d < conf->raid_disks; d++) {
			if (d == sh->pd_idx || d == sh->qd_idx)
+1 −0
Original line number Diff line number Diff line
@@ -323,6 +323,7 @@ enum {
	STRIPE_COMPUTE_RUN,
	STRIPE_OPS_REQ_PENDING,
	STRIPE_ON_UNPLUG_LIST,
	STRIPE_DISCARD,
};

/*