Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c91abf5a authored by NeilBrown's avatar NeilBrown
Browse files

md: use MD_RECOVERY_INTR instead of kthread_should_stop in resync thread.



We currently use kthread_should_stop() in various places in the
sync/reshape code to abort early.
However some places set MD_RECOVERY_INTR but don't immediately call
md_reap_sync_thread() (and we will shortly get another one).
When this happens we are relying on md_check_recovery() to reap the
thread and that only happen when it finishes normally.
So MD_RECOVERY_INTR must lead to a normal finish without the
kthread_should_stop() test.

So replace all relevant tests, and be more careful when the thread is
interrupted not to acknowledge that latest step in a reshape as it may
not be fully committed yet.

Also add a test on MD_RECOVERY_INTR in the 'is_mddev_idle' loop
so we don't wait have to wait for the speed to drop before we can abort.

Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 29f097c4
Loading
Loading
Loading
Loading
+14 −26
Original line number Diff line number Diff line
@@ -7410,9 +7410,6 @@ void md_do_sync(struct md_thread *thread)
		mddev->curr_resync = 2;

	try_again:
		if (kthread_should_stop())
			set_bit(MD_RECOVERY_INTR, &mddev->recovery);

		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
			goto skip;
		for_each_mddev(mddev2, tmp) {
@@ -7437,7 +7434,7 @@ void md_do_sync(struct md_thread *thread)
				 * be caught by 'softlockup'
				 */
				prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
				if (!kthread_should_stop() &&
				if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
				    mddev2->curr_resync >= mddev->curr_resync) {
					printk(KERN_INFO "md: delaying %s of %s"
					       " until %s has finished (they"
@@ -7550,7 +7547,8 @@ void md_do_sync(struct md_thread *thread)
			sysfs_notify(&mddev->kobj, NULL, "sync_completed");
		}

		while (j >= mddev->resync_max && !kthread_should_stop()) {
		while (j >= mddev->resync_max &&
		       !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
			/* As this condition is controlled by user-space,
			 * we can block indefinitely, so use '_interruptible'
			 * to avoid triggering warnings.
@@ -7558,17 +7556,18 @@ void md_do_sync(struct md_thread *thread)
			flush_signals(current); /* just in case */
			wait_event_interruptible(mddev->recovery_wait,
						 mddev->resync_max > j
						 || kthread_should_stop());
						 || test_bit(MD_RECOVERY_INTR,
							     &mddev->recovery));
		}

		if (kthread_should_stop())
			goto interrupted;
		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
			break;

		sectors = mddev->pers->sync_request(mddev, j, &skipped,
						  currspeed < speed_min(mddev));
		if (sectors == 0) {
			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
			goto out;
			break;
		}

		if (!skipped) { /* actual IO requested */
@@ -7605,10 +7604,8 @@ void md_do_sync(struct md_thread *thread)
			last_mark = next;
		}


		if (kthread_should_stop())
			goto interrupted;

		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
			break;

		/*
		 * this loop exits only if either when we are slower than
@@ -7631,11 +7628,12 @@ void md_do_sync(struct md_thread *thread)
			}
		}
	}
	printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
	printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
	       test_bit(MD_RECOVERY_INTR, &mddev->recovery)
	       ? "interrupted" : "done");
	/*
	 * this also signals 'finished resyncing' to md_stop
	 */
 out:
	blk_finish_plug(&plug);
	wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));

@@ -7689,16 +7687,6 @@ void md_do_sync(struct md_thread *thread)
	set_bit(MD_RECOVERY_DONE, &mddev->recovery);
	md_wakeup_thread(mddev->thread);
	return;

 interrupted:
	/*
	 * got a signal, exit.
	 */
	printk(KERN_INFO
	       "md: md_do_sync() got signal ... exiting\n");
	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
	goto out;

}
EXPORT_SYMBOL_GPL(md_do_sync);

+5 −1
Original line number Diff line number Diff line
@@ -4386,7 +4386,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
		set_bit(MD_CHANGE_DEVS, &mddev->flags);
		md_wakeup_thread(mddev->thread);
		wait_event(mddev->sb_wait, mddev->flags == 0 ||
			   kthread_should_stop());
			   test_bit(MD_RECOVERY_INTR, &mddev->recovery));
		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
			allow_barrier(conf);
			return sectors_done;
		}
		conf->reshape_safe = mddev->reshape_position;
		allow_barrier(conf);
	}
+15 −4
Original line number Diff line number Diff line
@@ -4842,14 +4842,19 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
	    time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
		/* Cannot proceed until we've updated the superblock... */
		wait_event(conf->wait_for_overlap,
			   atomic_read(&conf->reshape_stripes)==0);
			   atomic_read(&conf->reshape_stripes)==0
			   || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
		if (atomic_read(&conf->reshape_stripes) != 0)
			return 0;
		mddev->reshape_position = conf->reshape_progress;
		mddev->curr_resync_completed = sector_nr;
		conf->reshape_checkpoint = jiffies;
		set_bit(MD_CHANGE_DEVS, &mddev->flags);
		md_wakeup_thread(mddev->thread);
		wait_event(mddev->sb_wait, mddev->flags == 0 ||
			   kthread_should_stop());
			   test_bit(MD_RECOVERY_INTR, &mddev->recovery));
		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
			return 0;
		spin_lock_irq(&conf->device_lock);
		conf->reshape_safe = mddev->reshape_position;
		spin_unlock_irq(&conf->device_lock);
@@ -4932,7 +4937,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
	    >= mddev->resync_max - mddev->curr_resync_completed) {
		/* Cannot proceed until we've updated the superblock... */
		wait_event(conf->wait_for_overlap,
			   atomic_read(&conf->reshape_stripes) == 0);
			   atomic_read(&conf->reshape_stripes) == 0
			   || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
		if (atomic_read(&conf->reshape_stripes) != 0)
			goto ret;
		mddev->reshape_position = conf->reshape_progress;
		mddev->curr_resync_completed = sector_nr;
		conf->reshape_checkpoint = jiffies;
@@ -4940,13 +4948,16 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
		md_wakeup_thread(mddev->thread);
		wait_event(mddev->sb_wait,
			   !test_bit(MD_CHANGE_DEVS, &mddev->flags)
			   || kthread_should_stop());
			   || test_bit(MD_RECOVERY_INTR, &mddev->recovery));
		if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
			goto ret;
		spin_lock_irq(&conf->device_lock);
		conf->reshape_safe = mddev->reshape_position;
		spin_unlock_irq(&conf->device_lock);
		wake_up(&conf->wait_for_overlap);
		sysfs_notify(&mddev->kobj, NULL, "sync_completed");
	}
ret:
	return reshape_sectors;
}