Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 85ad1d13 authored by Guoqing Jiang's avatar Guoqing Jiang Committed by Shaohua Li
Browse files

md: set MD_CHANGE_PENDING in a atomic region



Some code waits for a metadata update by:

1. flagging that it is needed (MD_CHANGE_DEVS or MD_CHANGE_CLEAN)
2. setting MD_CHANGE_PENDING and waking the management thread
3. waiting for MD_CHANGE_PENDING to be cleared

If the first two are done without locking, the code in md_update_sb()
which checks if it needs to repeat might test if an update is needed
before step 1, then clear MD_CHANGE_PENDING after step 2, resulting
in the wait returning early.

So make sure all places that set MD_CHANGE_PENDING are atomicial, and
bit_clear_unless (suggested by Neil) is introduced for the purpose.

Cc: Martin Kepplinger <martink@posteo.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: <linux-kernel@vger.kernel.org>
Reviewed-by: default avatarNeilBrown <neilb@suse.com>
Signed-off-by: default avatarGuoqing Jiang <gqjiang@suse.com>
Signed-off-by: default avatarShaohua Li <shli@fb.com>
parent fe67d19a
Loading
Loading
Loading
Loading
+14 −13
Original line number Diff line number Diff line
@@ -2295,12 +2295,16 @@ void md_update_sb(struct mddev *mddev, int force_change)
	if (mddev_is_clustered(mddev)) {
		if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
			force_change = 1;
		if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
			nospares = 1;
		ret = md_cluster_ops->metadata_update_start(mddev);
		/* Has someone else has updated the sb */
		if (!does_sb_need_changing(mddev)) {
			if (ret == 0)
				md_cluster_ops->metadata_update_cancel(mddev);
			clear_bit(MD_CHANGE_PENDING, &mddev->flags);
			bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
							 BIT(MD_CHANGE_DEVS) |
							 BIT(MD_CHANGE_CLEAN));
			return;
		}
	}
@@ -2434,15 +2438,11 @@ void md_update_sb(struct mddev *mddev, int force_change)
	if (mddev_is_clustered(mddev) && ret == 0)
		md_cluster_ops->metadata_update_finish(mddev);

	spin_lock(&mddev->lock);
	if (mddev->in_sync != sync_req ||
	    test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
	    !bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
			       BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_CLEAN)))
		/* have to write it out again */
		spin_unlock(&mddev->lock);
		goto repeat;
	}
	clear_bit(MD_CHANGE_PENDING, &mddev->flags);
	spin_unlock(&mddev->lock);
	wake_up(&mddev->sb_wait);
	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
@@ -8147,18 +8147,18 @@ void md_do_sync(struct md_thread *thread)
		}
	}
 skip:
	set_bit(MD_CHANGE_DEVS, &mddev->flags);

	if (mddev_is_clustered(mddev) &&
	    ret == 0) {
		/* set CHANGE_PENDING here since maybe another
		 * update is needed, so other nodes are informed */
		set_bit(MD_CHANGE_PENDING, &mddev->flags);
		set_mask_bits(&mddev->flags, 0,
			      BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
		md_wakeup_thread(mddev->thread);
		wait_event(mddev->sb_wait,
			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
		md_cluster_ops->resync_finish(mddev);
	}
	} else
		set_bit(MD_CHANGE_DEVS, &mddev->flags);

	spin_lock(&mddev->lock);
	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -8550,6 +8550,7 @@ EXPORT_SYMBOL(md_finish_reshape);
int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
		       int is_new)
{
	struct mddev *mddev = rdev->mddev;
	int rv;
	if (is_new)
		s += rdev->new_data_offset;
@@ -8559,8 +8560,8 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
	if (rv == 0) {
		/* Make sure they get written out promptly */
		sysfs_notify_dirent_safe(rdev->sysfs_state);
		set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
		set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
		set_mask_bits(&mddev->flags, 0,
			      BIT(MD_CHANGE_CLEAN) | BIT(MD_CHANGE_PENDING));
		md_wakeup_thread(rdev->mddev->thread);
		return 1;
	} else
+2 −2
Original line number Diff line number Diff line
@@ -1474,8 +1474,8 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
	 * if recovery is running, make sure it aborts.
	 */
	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
	set_bit(MD_CHANGE_DEVS, &mddev->flags);
	set_bit(MD_CHANGE_PENDING, &mddev->flags);
	set_mask_bits(&mddev->flags, 0,
		      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
	printk(KERN_ALERT
	       "md/raid1:%s: Disk failure on %s, disabling device.\n"
	       "md/raid1:%s: Operation continuing on %d devices.\n",
+4 −4
Original line number Diff line number Diff line
@@ -1102,8 +1102,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
		bio->bi_iter.bi_sector < conf->reshape_progress))) {
		/* Need to update reshape_position in metadata */
		mddev->reshape_position = conf->reshape_progress;
		set_bit(MD_CHANGE_DEVS, &mddev->flags);
		set_bit(MD_CHANGE_PENDING, &mddev->flags);
		set_mask_bits(&mddev->flags, 0,
			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
		md_wakeup_thread(mddev->thread);
		wait_event(mddev->sb_wait,
			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
@@ -1591,8 +1591,8 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
	set_bit(Blocked, &rdev->flags);
	set_bit(Faulty, &rdev->flags);
	set_bit(MD_CHANGE_DEVS, &mddev->flags);
	set_bit(MD_CHANGE_PENDING, &mddev->flags);
	set_mask_bits(&mddev->flags, 0,
		      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
	spin_unlock_irqrestore(&conf->device_lock, flags);
	printk(KERN_ALERT
	       "md/raid10:%s: Disk failure on %s, disabling device.\n"
+2 −2
Original line number Diff line number Diff line
@@ -712,8 +712,8 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
	 * in_teardown check workaround this issue.
	 */
	if (!log->in_teardown) {
		set_bit(MD_CHANGE_DEVS, &mddev->flags);
		set_bit(MD_CHANGE_PENDING, &mddev->flags);
		set_mask_bits(&mddev->flags, 0,
			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
		md_wakeup_thread(mddev->thread);
		wait_event(mddev->sb_wait,
			!test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
+2 −2
Original line number Diff line number Diff line
@@ -2514,8 +2514,8 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)

	set_bit(Blocked, &rdev->flags);
	set_bit(Faulty, &rdev->flags);
	set_bit(MD_CHANGE_DEVS, &mddev->flags);
	set_bit(MD_CHANGE_PENDING, &mddev->flags);
	set_mask_bits(&mddev->flags, 0,
		      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
	printk(KERN_ALERT
	       "md/raid:%s: Disk failure on %s, disabling device.\n"
	       "md/raid:%s: Operation continuing on %d devices.\n",
Loading