Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 106f2e59 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull MD fixes from Shaohua Li:
 "A few bug fixes for MD:

   - Guoqing fixed a bug compiling md-cluster in kernel

   - I fixed a potential deadlock in raid5-cache superblock write, a
     hang in raid5 reshape resume and a race condition introduced in
     rc4"

* tag 'md/4.8-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  raid5: fix a small race condition
  md-cluster: make md-cluster also can work when compiled into kernel
  raid5: guarantee enough stripes to avoid reshape hang
  raid5-cache: fix a deadlock in superblock write
parents 309a18ae c9445555
Loading
Loading
Loading
Loading
+4 −8
Original line number Diff line number Diff line
@@ -7610,16 +7610,12 @@ EXPORT_SYMBOL(unregister_md_cluster_operations);

int md_setup_cluster(struct mddev *mddev, int nodes)
{
	int err;

	err = request_module("md-cluster");
	if (err) {
		pr_err("md-cluster module not found.\n");
		return -ENOENT;
	}

	if (!md_cluster_ops)
		request_module("md-cluster");
	spin_lock(&pers_lock);
	/* ensure module won't be unloaded */
	if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
		pr_err("can't find md-cluster module or get it's reference.\n");
		spin_unlock(&pers_lock);
		return -ENOENT;
	}
+15 −31
Original line number Diff line number Diff line
@@ -96,7 +96,6 @@ struct r5l_log {
	spinlock_t no_space_stripes_lock;

	bool need_cache_flush;
	bool in_teardown;
};

/*
@@ -704,31 +703,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,

	mddev = log->rdev->mddev;
	/*
	 * This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and
	 * wait for this thread to finish. This thread waits for
	 * MD_CHANGE_PENDING clear, which is supposed to be done in
	 * md_check_recovery(). md_check_recovery() tries to get
	 * reconfig_mutex. Since r5l_quiesce already holds the mutex,
	 * md_check_recovery() fails, so the PENDING never get cleared. The
	 * in_teardown check workaround this issue.
	 * Discard could zero data, so before discard we must make sure
	 * superblock is updated to new log tail. Updating superblock (either
	 * directly call md_update_sb() or depend on md thread) must hold
	 * reconfig mutex. On the other hand, raid5_quiesce is called with
	 * reconfig_mutex hold. The first step of raid5_quiesce() is waitting
	 * for all IO finish, hence waitting for reclaim thread, while reclaim
	 * thread is calling this function and waitting for reconfig mutex. So
	 * there is a deadlock. We workaround this issue with a trylock.
	 * FIXME: we could miss discard if we can't take reconfig mutex
	 */
	if (!log->in_teardown) {
	set_mask_bits(&mddev->flags, 0,
		BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
		md_wakeup_thread(mddev->thread);
		wait_event(mddev->sb_wait,
			!test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
			log->in_teardown);
		/*
		 * r5l_quiesce could run after in_teardown check and hold
		 * mutex first. Superblock might get updated twice.
		 */
		if (log->in_teardown)
			md_update_sb(mddev, 1);
	} else {
		WARN_ON(!mddev_is_locked(mddev));
	if (!mddev_trylock(mddev))
		return;
	md_update_sb(mddev, 1);
	}
	mddev_unlock(mddev);

	/* discard IO error really doesn't matter, ignore it */
	if (log->last_checkpoint < end) {
@@ -827,7 +817,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
	if (!log || state == 2)
		return;
	if (state == 0) {
		log->in_teardown = 0;
		/*
		 * This is a special case for hotadd. In suspend, the array has
		 * no journal. In resume, journal is initialized as well as the
@@ -838,11 +827,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
		log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
					log->rdev->mddev, "reclaim");
	} else if (state == 1) {
		/*
		 * at this point all stripes are finished, so io_unit is at
		 * least in STRIPE_END state
		 */
		log->in_teardown = 1;
		/* make sure r5l_write_super_and_discard_space exits */
		mddev = log->rdev->mddev;
		wake_up(&mddev->sb_wait);
+12 −2
Original line number Diff line number Diff line
@@ -2423,10 +2423,10 @@ static void raid5_end_read_request(struct bio * bi)
		}
	}
	rdev_dec_pending(rdev, conf->mddev);
	bio_reset(bi);
	clear_bit(R5_LOCKED, &sh->dev[i].flags);
	set_bit(STRIPE_HANDLE, &sh->state);
	raid5_release_stripe(sh);
	bio_reset(bi);
}

static void raid5_end_write_request(struct bio *bi)
@@ -2498,6 +2498,7 @@ static void raid5_end_write_request(struct bio *bi)
	if (sh->batch_head && bi->bi_error && !replacement)
		set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);

	bio_reset(bi);
	if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
		clear_bit(R5_LOCKED, &sh->dev[i].flags);
	set_bit(STRIPE_HANDLE, &sh->state);
@@ -2505,7 +2506,6 @@ static void raid5_end_write_request(struct bio *bi)

	if (sh->batch_head && sh != sh->batch_head)
		raid5_release_stripe(sh->batch_head);
	bio_reset(bi);
}

static void raid5_build_block(struct stripe_head *sh, int i, int previous)
@@ -6639,6 +6639,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
	}

	conf->min_nr_stripes = NR_STRIPES;
	if (mddev->reshape_position != MaxSector) {
		int stripes = max_t(int,
			((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4,
			((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4);
		conf->min_nr_stripes = max(NR_STRIPES, stripes);
		if (conf->min_nr_stripes != NR_STRIPES)
			printk(KERN_INFO
				"md/raid:%s: force stripe size %d for reshape\n",
				mdname(mddev), conf->min_nr_stripes);
	}
	memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
		 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
	atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);