Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 7abe8493 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull md updates from Shaohua Li:
 "This mainly improves raid10 cluster and fixes some bugs:

   - raid10 cluster improvements from Guoqing

   - Memory leak fixes from Jack and Xiao

   - raid10 hang fix from Alex

   - raid5 block faulty device fix from Mariusz

   - metadata update fix from Neil

   - Invalid disk role fix from Me

   - Other clearnups"

* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  MD: Memory leak when flush bio size is zero
  md: fix memleak for mempool
  md-cluster: remove suspend_info
  md-cluster: send BITMAP_NEEDS_SYNC message if reshaping is interrupted
  md-cluster/bitmap: don't call md_bitmap_sync_with_cluster during reshaping stage
  md-cluster/raid10: don't call remove_and_add_spares during reshaping stage
  md-cluster/raid10: call update_size in md_reap_sync_thread
  md-cluster: introduce resync_info_get interface for sanity check
  md-cluster/raid10: support add disk under grow mode
  md-cluster/raid10: resize all the bitmaps before start reshape
  MD: fix invalid stored role for a disk - try2
  md/bitmap: use mddev_suspend/resume instead of ->quiesce()
  md: remove redundant code that is no longer reachable
  md: allow metadata updates while suspending an array - fix
  MD: fix invalid stored role for a disk
  md/raid10: Fix raid10 replace hang when new added disk faulty
  raid5: block failing device if raid will be failed
parents 71f4d95b af9b926d
Loading
Loading
Loading
Loading
+5 −4
Original line number Diff line number Diff line
@@ -2288,9 +2288,9 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
			goto out;
		}
		if (mddev->pers) {
			mddev->pers->quiesce(mddev, 1);
			mddev_suspend(mddev);
			md_bitmap_destroy(mddev);
			mddev->pers->quiesce(mddev, 0);
			mddev_resume(mddev);
		}
		mddev->bitmap_info.offset = 0;
		if (mddev->bitmap_info.file) {
@@ -2327,8 +2327,8 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
			mddev->bitmap_info.offset = offset;
			if (mddev->pers) {
				struct bitmap *bitmap;
				mddev->pers->quiesce(mddev, 1);
				bitmap = md_bitmap_create(mddev, -1);
				mddev_suspend(mddev);
				if (IS_ERR(bitmap))
					rv = PTR_ERR(bitmap);
				else {
@@ -2337,11 +2337,12 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
					if (rv)
						mddev->bitmap_info.offset = 0;
				}
				mddev->pers->quiesce(mddev, 0);
				if (rv) {
					md_bitmap_destroy(mddev);
					mddev_resume(mddev);
					goto out;
				}
				mddev_resume(mddev);
			}
		}
	}
+158 −76
Original line number Diff line number Diff line
@@ -33,13 +33,6 @@ struct dlm_lock_resource {
	int mode;
};

struct suspend_info {
	int slot;
	sector_t lo;
	sector_t hi;
	struct list_head list;
};

struct resync_info {
	__le64 lo;
	__le64 hi;
@@ -80,7 +73,13 @@ struct md_cluster_info {
	struct dlm_lock_resource **other_bitmap_lockres;
	struct dlm_lock_resource *resync_lockres;
	struct list_head suspend_list;

	spinlock_t suspend_lock;
	/* record the region which write should be suspended */
	sector_t suspend_lo;
	sector_t suspend_hi;
	int suspend_from; /* the slot which broadcast suspend_lo/hi */

	struct md_thread *recovery_thread;
	unsigned long recovery_map;
	/* communication loc resources */
@@ -105,6 +104,7 @@ enum msg_type {
	RE_ADD,
	BITMAP_NEEDS_SYNC,
	CHANGE_CAPACITY,
	BITMAP_RESIZE,
};

struct cluster_msg {
@@ -270,25 +270,22 @@ static void add_resync_info(struct dlm_lock_resource *lockres,
	ri->hi = cpu_to_le64(hi);
}

static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres)
static int read_resync_info(struct mddev *mddev,
			    struct dlm_lock_resource *lockres)
{
	struct resync_info ri;
	struct suspend_info *s = NULL;
	sector_t hi = 0;
	struct md_cluster_info *cinfo = mddev->cluster_info;
	int ret = 0;

	dlm_lock_sync(lockres, DLM_LOCK_CR);
	memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
	hi = le64_to_cpu(ri.hi);
	if (hi > 0) {
		s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
		if (!s)
			goto out;
		s->hi = hi;
		s->lo = le64_to_cpu(ri.lo);
	if (le64_to_cpu(ri.hi) > 0) {
		cinfo->suspend_hi = le64_to_cpu(ri.hi);
		cinfo->suspend_lo = le64_to_cpu(ri.lo);
		ret = 1;
	}
	dlm_unlock_sync(lockres);
out:
	return s;
	return ret;
}

static void recover_bitmaps(struct md_thread *thread)
@@ -298,7 +295,6 @@ static void recover_bitmaps(struct md_thread *thread)
	struct dlm_lock_resource *bm_lockres;
	char str[64];
	int slot, ret;
	struct suspend_info *s, *tmp;
	sector_t lo, hi;

	while (cinfo->recovery_map) {
@@ -325,13 +321,17 @@ static void recover_bitmaps(struct md_thread *thread)

		/* Clear suspend_area associated with the bitmap */
		spin_lock_irq(&cinfo->suspend_lock);
		list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
			if (slot == s->slot) {
				list_del(&s->list);
				kfree(s);
			}
		cinfo->suspend_hi = 0;
		cinfo->suspend_lo = 0;
		cinfo->suspend_from = -1;
		spin_unlock_irq(&cinfo->suspend_lock);

		/* Kick off a reshape if needed */
		if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
		    test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
		    mddev->reshape_position != MaxSector)
			md_wakeup_thread(mddev->sync_thread);

		if (hi > 0) {
			if (lo < mddev->recovery_cp)
				mddev->recovery_cp = lo;
@@ -434,34 +434,23 @@ static void ack_bast(void *arg, int mode)
	}
}

static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
{
	struct suspend_info *s, *tmp;

	list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
		if (slot == s->slot) {
			list_del(&s->list);
			kfree(s);
			break;
		}
}

static void remove_suspend_info(struct mddev *mddev, int slot)
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	mddev->pers->quiesce(mddev, 1);
	spin_lock_irq(&cinfo->suspend_lock);
	__remove_suspend_info(cinfo, slot);
	cinfo->suspend_hi = 0;
	cinfo->suspend_lo = 0;
	spin_unlock_irq(&cinfo->suspend_lock);
	mddev->pers->quiesce(mddev, 0);
}


static void process_suspend_info(struct mddev *mddev,
		int slot, sector_t lo, sector_t hi)
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct suspend_info *s;
	struct mdp_superblock_1 *sb = NULL;
	struct md_rdev *rdev;

	if (!hi) {
		/*
@@ -475,6 +464,12 @@ static void process_suspend_info(struct mddev *mddev,
		return;
	}

	rdev_for_each(rdev, mddev)
		if (rdev->raid_disk > -1 && !test_bit(Faulty, &rdev->flags)) {
			sb = page_address(rdev->sb_page);
			break;
		}

	/*
	 * The bitmaps are not same for different nodes
	 * if RESYNCING is happening in one node, then
@@ -487,26 +482,26 @@ static void process_suspend_info(struct mddev *mddev,
	 * sync_low/hi is used to record the region which
	 * arrived in the previous RESYNCING message,
	 *
	 * Call bitmap_sync_with_cluster to clear
	 * NEEDED_MASK and set RESYNC_MASK since
	 * resync thread is running in another node,
	 * so we don't need to do the resync again
	 * with the same section */
	md_bitmap_sync_with_cluster(mddev, cinfo->sync_low, cinfo->sync_hi, lo, hi);
	 * Call md_bitmap_sync_with_cluster to clear NEEDED_MASK
	 * and set RESYNC_MASK since  resync thread is running
	 * in another node, so we don't need to do the resync
	 * again with the same section.
	 *
	 * Skip md_bitmap_sync_with_cluster in case reshape
	 * happening, because reshaping region is small and
	 * we don't want to trigger lots of WARN.
	 */
	if (sb && !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE))
		md_bitmap_sync_with_cluster(mddev, cinfo->sync_low,
					    cinfo->sync_hi, lo, hi);
	cinfo->sync_low = lo;
	cinfo->sync_hi = hi;

	s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
	if (!s)
		return;
	s->slot = slot;
	s->lo = lo;
	s->hi = hi;
	mddev->pers->quiesce(mddev, 1);
	spin_lock_irq(&cinfo->suspend_lock);
	/* Remove existing entry (if exists) before adding */
	__remove_suspend_info(cinfo, slot);
	list_add(&s->list, &cinfo->suspend_list);
	cinfo->suspend_from = slot;
	cinfo->suspend_lo = lo;
	cinfo->suspend_hi = hi;
	spin_unlock_irq(&cinfo->suspend_lock);
	mddev->pers->quiesce(mddev, 0);
}
@@ -612,6 +607,11 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
	case BITMAP_NEEDS_SYNC:
		__recover_slot(mddev, le32_to_cpu(msg->slot));
		break;
	case BITMAP_RESIZE:
		if (le64_to_cpu(msg->high) != mddev->pers->size(mddev, 0, 0))
			ret = md_bitmap_resize(mddev->bitmap,
					    le64_to_cpu(msg->high), 0, 0);
		break;
	default:
		ret = -1;
		pr_warn("%s:%d Received unknown message from %d\n",
@@ -800,7 +800,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
	struct md_cluster_info *cinfo = mddev->cluster_info;
	int i, ret = 0;
	struct dlm_lock_resource *bm_lockres;
	struct suspend_info *s;
	char str[64];
	sector_t lo, hi;

@@ -819,16 +818,13 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots)
		bm_lockres->flags |= DLM_LKF_NOQUEUE;
		ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
		if (ret == -EAGAIN) {
			s = read_resync_info(mddev, bm_lockres);
			if (s) {
			if (read_resync_info(mddev, bm_lockres)) {
				pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
						__func__, __LINE__,
						(unsigned long long) s->lo,
						(unsigned long long) s->hi, i);
				spin_lock_irq(&cinfo->suspend_lock);
				s->slot = i;
				list_add(&s->list, &cinfo->suspend_list);
				spin_unlock_irq(&cinfo->suspend_lock);
					(unsigned long long) cinfo->suspend_lo,
					(unsigned long long) cinfo->suspend_hi,
					i);
				cinfo->suspend_from = i;
			}
			ret = 0;
			lockres_free(bm_lockres);
@@ -1001,10 +997,17 @@ static int leave(struct mddev *mddev)
	if (!cinfo)
		return 0;

	/* BITMAP_NEEDS_SYNC message should be sent when node
	/*
	 * BITMAP_NEEDS_SYNC message should be sent when node
	 * is leaving the cluster with dirty bitmap, also we
	 * can only deliver it when dlm connection is available */
	if (cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector)
	 * can only deliver it when dlm connection is available.
	 *
	 * Also, we should send BITMAP_NEEDS_SYNC message in
	 * case reshaping is interrupted.
	 */
	if ((cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector) ||
	    (mddev->reshape_position != MaxSector &&
	     test_bit(MD_CLOSING, &mddev->flags)))
		resync_bitmap(mddev);

	set_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state);
@@ -1102,6 +1105,80 @@ static void metadata_update_cancel(struct mddev *mddev)
	unlock_comm(cinfo);
}

static int update_bitmap_size(struct mddev *mddev, sector_t size)
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	struct cluster_msg cmsg = {0};
	int ret;

	cmsg.type = cpu_to_le32(BITMAP_RESIZE);
	cmsg.high = cpu_to_le64(size);
	ret = sendmsg(cinfo, &cmsg, 0);
	if (ret)
		pr_err("%s:%d: failed to send BITMAP_RESIZE message (%d)\n",
			__func__, __LINE__, ret);
	return ret;
}

static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsize)
{
	struct bitmap_counts *counts;
	char str[64];
	struct dlm_lock_resource *bm_lockres;
	struct bitmap *bitmap = mddev->bitmap;
	unsigned long my_pages = bitmap->counts.pages;
	int i, rv;

	/*
	 * We need to ensure all the nodes can grow to a larger
	 * bitmap size before make the reshaping.
	 */
	rv = update_bitmap_size(mddev, newsize);
	if (rv)
		return rv;

	for (i = 0; i < mddev->bitmap_info.nodes; i++) {
		if (i == md_cluster_ops->slot_number(mddev))
			continue;

		bitmap = get_bitmap_from_slot(mddev, i);
		if (IS_ERR(bitmap)) {
			pr_err("can't get bitmap from slot %d\n", i);
			goto out;
		}
		counts = &bitmap->counts;

		/*
		 * If we can hold the bitmap lock of one node then
		 * the slot is not occupied, update the pages.
		 */
		snprintf(str, 64, "bitmap%04d", i);
		bm_lockres = lockres_init(mddev, str, NULL, 1);
		if (!bm_lockres) {
			pr_err("Cannot initialize %s lock\n", str);
			goto out;
		}
		bm_lockres->flags |= DLM_LKF_NOQUEUE;
		rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
		if (!rv)
			counts->pages = my_pages;
		lockres_free(bm_lockres);

		if (my_pages != counts->pages)
			/*
			 * Let's revert the bitmap size if one node
			 * can't resize bitmap
			 */
			goto out;
	}

	return 0;
out:
	md_bitmap_free(bitmap);
	update_bitmap_size(mddev, oldsize);
	return -1;
}

/*
 * return 0 if all the bitmaps have the same sync_size
 */
@@ -1243,6 +1320,16 @@ static int resync_start(struct mddev *mddev)
	return dlm_lock_sync_interruptible(cinfo->resync_lockres, DLM_LOCK_EX, mddev);
}

static void resync_info_get(struct mddev *mddev, sector_t *lo, sector_t *hi)
{
	struct md_cluster_info *cinfo = mddev->cluster_info;

	spin_lock_irq(&cinfo->suspend_lock);
	*lo = cinfo->suspend_lo;
	*hi = cinfo->suspend_hi;
	spin_unlock_irq(&cinfo->suspend_lock);
}

static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
@@ -1295,21 +1382,14 @@ static int area_resyncing(struct mddev *mddev, int direction,
{
	struct md_cluster_info *cinfo = mddev->cluster_info;
	int ret = 0;
	struct suspend_info *s;

	if ((direction == READ) &&
		test_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state))
		return 1;

	spin_lock_irq(&cinfo->suspend_lock);
	if (list_empty(&cinfo->suspend_list))
		goto out;
	list_for_each_entry(s, &cinfo->suspend_list, list)
		if (hi > s->lo && lo < s->hi) {
	if (hi > cinfo->suspend_lo && lo < cinfo->suspend_hi)
		ret = 1;
			break;
		}
out:
	spin_unlock_irq(&cinfo->suspend_lock);
	return ret;
}
@@ -1482,6 +1562,7 @@ static struct md_cluster_operations cluster_ops = {
	.resync_start = resync_start,
	.resync_finish = resync_finish,
	.resync_info_update = resync_info_update,
	.resync_info_get = resync_info_get,
	.metadata_update_start = metadata_update_start,
	.metadata_update_finish = metadata_update_finish,
	.metadata_update_cancel = metadata_update_cancel,
@@ -1492,6 +1573,7 @@ static struct md_cluster_operations cluster_ops = {
	.remove_disk = remove_disk,
	.load_bitmaps = load_bitmaps,
	.gather_bitmaps = gather_bitmaps,
	.resize_bitmaps = resize_bitmaps,
	.lock_all_bitmaps = lock_all_bitmaps,
	.unlock_all_bitmaps = unlock_all_bitmaps,
	.update_size = update_size,
+2 −0
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@ struct md_cluster_operations {
	int (*leave)(struct mddev *mddev);
	int (*slot_number)(struct mddev *mddev);
	int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
	void (*resync_info_get)(struct mddev *mddev, sector_t *lo, sector_t *hi);
	int (*metadata_update_start)(struct mddev *mddev);
	int (*metadata_update_finish)(struct mddev *mddev);
	void (*metadata_update_cancel)(struct mddev *mddev);
@@ -26,6 +27,7 @@ struct md_cluster_operations {
	int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
	void (*load_bitmaps)(struct mddev *mddev, int total_slots);
	int (*gather_bitmaps)(struct md_rdev *rdev);
	int (*resize_bitmaps)(struct mddev *mddev, sector_t newsize, sector_t oldsize);
	int (*lock_all_bitmaps)(struct mddev *mddev);
	void (*unlock_all_bitmaps)(struct mddev *mddev);
	void (*update_size)(struct mddev *mddev, sector_t old_dev_sectors);
+84 −29
Original line number Diff line number Diff line
@@ -452,10 +452,11 @@ static void md_end_flush(struct bio *fbio)
	rdev_dec_pending(rdev, mddev);

	if (atomic_dec_and_test(&fi->flush_pending)) {
		if (bio->bi_iter.bi_size == 0)
		if (bio->bi_iter.bi_size == 0) {
			/* an empty barrier - all done */
			bio_endio(bio);
		else {
			mempool_free(fi, mddev->flush_pool);
		} else {
			INIT_WORK(&fi->flush_work, submit_flushes);
			queue_work(md_wq, &fi->flush_work);
		}
@@ -509,10 +510,11 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
	rcu_read_unlock();

	if (atomic_dec_and_test(&fi->flush_pending)) {
		if (bio->bi_iter.bi_size == 0)
		if (bio->bi_iter.bi_size == 0) {
			/* an empty barrier - all done */
			bio_endio(bio);
		else {
			mempool_free(fi, mddev->flush_pool);
		} else {
			INIT_WORK(&fi->flush_work, submit_flushes);
			queue_work(md_wq, &fi->flush_work);
		}
@@ -5904,14 +5906,6 @@ static void __md_stop(struct mddev *mddev)
		mddev->to_remove = &md_redundancy_group;
	module_put(pers->owner);
	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}

void md_stop(struct mddev *mddev)
{
	/* stop the array and free an attached data structures.
	 * This is called from dm-raid
	 */
	__md_stop(mddev);
	if (mddev->flush_bio_pool) {
		mempool_destroy(mddev->flush_bio_pool);
		mddev->flush_bio_pool = NULL;
@@ -5920,6 +5914,14 @@ void md_stop(struct mddev *mddev)
		mempool_destroy(mddev->flush_pool);
		mddev->flush_pool = NULL;
	}
}

void md_stop(struct mddev *mddev)
{
	/* stop the array and free an attached data structures.
	 * This is called from dm-raid
	 */
	__md_stop(mddev);
	bioset_exit(&mddev->bio_set);
	bioset_exit(&mddev->sync_set);
}
@@ -8370,9 +8372,17 @@ void md_do_sync(struct md_thread *thread)
		else if (!mddev->bitmap)
			j = mddev->recovery_cp;

	} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
	} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
		max_sectors = mddev->resync_max_sectors;
	else {
		/*
		 * If the original node aborts reshaping then we continue the
		 * reshaping, so set j again to avoid restart reshape from the
		 * first beginning
		 */
		if (mddev_is_clustered(mddev) &&
		    mddev->reshape_position != MaxSector)
			j = mddev->reshape_position;
	} else {
		/* recovery follows the physical size of devices */
		max_sectors = mddev->dev_sectors;
		j = MaxSector;
@@ -8623,9 +8633,11 @@ void md_do_sync(struct md_thread *thread)
		mddev_lock_nointr(mddev);
		md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
		mddev_unlock(mddev);
		if (!mddev_is_clustered(mddev)) {
			set_capacity(mddev->gendisk, mddev->array_sectors);
			revalidate_disk(mddev->gendisk);
		}
	}

	spin_lock(&mddev->lock);
	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -8790,6 +8802,18 @@ static void md_start_sync(struct work_struct *ws)
 */
void md_check_recovery(struct mddev *mddev)
{
	if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
		/* Write superblock - thread that called mddev_suspend()
		 * holds reconfig_mutex for us.
		 */
		set_bit(MD_UPDATING_SB, &mddev->flags);
		smp_mb__after_atomic();
		if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
			md_update_sb(mddev, 0);
		clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
		wake_up(&mddev->sb_wait);
	}

	if (mddev->suspended)
		return;

@@ -8949,16 +8973,6 @@ void md_check_recovery(struct mddev *mddev)
	unlock:
		wake_up(&mddev->sb_wait);
		mddev_unlock(mddev);
	} else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
		/* Write superblock - thread that called mddev_suspend()
		 * holds reconfig_mutex for us.
		 */
		set_bit(MD_UPDATING_SB, &mddev->flags);
		smp_mb__after_atomic();
		if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
			md_update_sb(mddev, 0);
		clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
		wake_up(&mddev->sb_wait);
	}
}
EXPORT_SYMBOL(md_check_recovery);
@@ -8966,6 +8980,8 @@ EXPORT_SYMBOL(md_check_recovery);
void md_reap_sync_thread(struct mddev *mddev)
{
	struct md_rdev *rdev;
	sector_t old_dev_sectors = mddev->dev_sectors;
	bool is_reshaped = false;

	/* resync has finished, collect result */
	md_unregister_thread(&mddev->sync_thread);
@@ -8980,8 +8996,11 @@ void md_reap_sync_thread(struct mddev *mddev)
		}
	}
	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
	    mddev->pers->finish_reshape)
	    mddev->pers->finish_reshape) {
		mddev->pers->finish_reshape(mddev);
		if (mddev_is_clustered(mddev))
			is_reshaped = true;
	}

	/* If array is no-longer degraded, then any saved_raid_disk
	 * information must be scrapped.
@@ -9002,6 +9021,14 @@ void md_reap_sync_thread(struct mddev *mddev)
	clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
	clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
	/*
	 * We call md_cluster_ops->update_size here because sync_size could
	 * be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared,
	 * so it is time to update size across cluster.
	 */
	if (mddev_is_clustered(mddev) && is_reshaped
				      && !test_bit(MD_CLOSING, &mddev->flags))
		md_cluster_ops->update_size(mddev, old_dev_sectors);
	wake_up(&resync_wait);
	/* flag recovery needed just to double check */
	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -9201,8 +9228,12 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
		}

		if (role != rdev2->raid_disk) {
			/* got activated */
			if (rdev2->raid_disk == -1 && role != 0xffff) {
			/*
			 * got activated except reshape is happening.
			 */
			if (rdev2->raid_disk == -1 && role != 0xffff &&
			    !(le32_to_cpu(sb->feature_map) &
			      MD_FEATURE_RESHAPE_ACTIVE)) {
				rdev2->saved_raid_disk = role;
				ret = remove_and_add_spares(mddev, rdev2);
				pr_info("Activated spare: %s\n",
@@ -9228,6 +9259,30 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
	if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
		update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));

	/*
	 * Since mddev->delta_disks has already updated in update_raid_disks,
	 * so it is time to check reshape.
	 */
	if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
	    (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
		/*
		 * reshape is happening in the remote node, we need to
		 * update reshape_position and call start_reshape.
		 */
		mddev->reshape_position = sb->reshape_position;
		if (mddev->pers->update_reshape_pos)
			mddev->pers->update_reshape_pos(mddev);
		if (mddev->pers->start_reshape)
			mddev->pers->start_reshape(mddev);
	} else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
		   mddev->reshape_position != MaxSector &&
		   !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
		/* reshape is just done in another node. */
		mddev->reshape_position = MaxSector;
		if (mddev->pers->update_reshape_pos)
			mddev->pers->update_reshape_pos(mddev);
	}

	/* Finally set the event to be up to date */
	mddev->events = le64_to_cpu(sb->events);
}
+1 −0
Original line number Diff line number Diff line
@@ -557,6 +557,7 @@ struct md_personality
	int (*check_reshape) (struct mddev *mddev);
	int (*start_reshape) (struct mddev *mddev);
	void (*finish_reshape) (struct mddev *mddev);
	void (*update_reshape_pos) (struct mddev *mddev);
	/* quiesce suspends or resumes internal processing.
	 * 1 - stop new actions and wait for action io to complete
	 * 0 - return to normal behaviour
Loading