Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 509e4aef authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md: Fix removal of extra drives when converting RAID6 to RAID5
  md: range check slot number when manually adding a spare.
  md/raid5: handle manually-added spares in start_reshape.
  md: fix sync_completed reporting for very large drives (>2TB)
  md: allow suspend_lo and suspend_hi to decrease as well as increase.
  md: Don't let implementation detail of curr_resync leak out through sysfs.
  md: separate meta and data devs
  md-new-param-to_sync_page_io
  md-new-param-to-calc_dev_sboffset
  md: Be more careful about clearing flags bit in ->recovery
  md: md_stop_writes requires mddev_lock.
  md/raid5: use sysfs_notify_dirent_safe to avoid NULL pointer
  md: Ensure no IO request to get md device before it is properly initialised.
  md: Fix single printks with multiple KERN_<level>s
  md: fix regression resulting in delays in clearing bits in a bitmap
  md: fix regression with re-adding devices to arrays with no metadata
parents 375b6f5a bf2cb0da
Loading
Loading
Loading
Loading
+8 −4
Original line number Original line Diff line number Diff line
@@ -210,11 +210,11 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset,
		    || test_bit(Faulty, &rdev->flags))
		    || test_bit(Faulty, &rdev->flags))
			continue;
			continue;


		target = rdev->sb_start + offset + index * (PAGE_SIZE/512);
		target = offset + index * (PAGE_SIZE/512);


		if (sync_page_io(rdev, target,
		if (sync_page_io(rdev, target,
				 roundup(size, bdev_logical_block_size(rdev->bdev)),
				 roundup(size, bdev_logical_block_size(rdev->bdev)),
				 page, READ)) {
				 page, READ, true)) {
			page->index = index;
			page->index = index;
			attach_page_buffers(page, NULL); /* so that free_buffer will
			attach_page_buffers(page, NULL); /* so that free_buffer will
							  * quietly no-op */
							  * quietly no-op */
@@ -264,14 +264,18 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
{
{
	mdk_rdev_t *rdev = NULL;
	mdk_rdev_t *rdev = NULL;
	struct block_device *bdev;
	mddev_t *mddev = bitmap->mddev;
	mddev_t *mddev = bitmap->mddev;


	while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
	while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
		int size = PAGE_SIZE;
		int size = PAGE_SIZE;
		loff_t offset = mddev->bitmap_info.offset;
		loff_t offset = mddev->bitmap_info.offset;

		bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;

		if (page->index == bitmap->file_pages-1)
		if (page->index == bitmap->file_pages-1)
			size = roundup(bitmap->last_page_size,
			size = roundup(bitmap->last_page_size,
				       bdev_logical_block_size(rdev->bdev));
				       bdev_logical_block_size(bdev));
		/* Just make sure we aren't corrupting data or
		/* Just make sure we aren't corrupting data or
		 * metadata
		 * metadata
		 */
		 */
@@ -1542,7 +1546,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
	wait_event(bitmap->mddev->recovery_wait,
	wait_event(bitmap->mddev->recovery_wait,
		   atomic_read(&bitmap->mddev->recovery_active) == 0);
		   atomic_read(&bitmap->mddev->recovery_active) == 0);


	bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync;
	bitmap->mddev->curr_resync_completed = sector;
	set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
	set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
	sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
	sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
	s = 0;
	s = 0;
+120 −77
Original line number Original line Diff line number Diff line
@@ -288,10 +288,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
	int rv;
	int rv;
	int cpu;
	int cpu;


	if (mddev == NULL || mddev->pers == NULL) {
	if (mddev == NULL || mddev->pers == NULL
	    || !mddev->ready) {
		bio_io_error(bio);
		bio_io_error(bio);
		return 0;
		return 0;
	}
	}
	smp_rmb(); /* Ensure implications of  'active' are visible */
	rcu_read_lock();
	rcu_read_lock();
	if (mddev->suspended) {
	if (mddev->suspended) {
		DEFINE_WAIT(__wait);
		DEFINE_WAIT(__wait);
@@ -703,9 +705,9 @@ static struct mdk_personality *find_pers(int level, char *clevel)
}
}


/* return the offset of the super block in 512byte sectors */
/* return the offset of the super block in 512byte sectors */
static inline sector_t calc_dev_sboffset(struct block_device *bdev)
static inline sector_t calc_dev_sboffset(mdk_rdev_t *rdev)
{
{
	sector_t num_sectors = i_size_read(bdev->bd_inode) / 512;
	sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
	return MD_NEW_SIZE_SECTORS(num_sectors);
	return MD_NEW_SIZE_SECTORS(num_sectors);
}
}


@@ -763,7 +765,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
	 */
	 */
	struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
	struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);


	bio->bi_bdev = rdev->bdev;
	bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
	bio->bi_sector = sector;
	bio->bi_sector = sector;
	bio_add_page(bio, page, size, 0);
	bio_add_page(bio, page, size, 0);
	bio->bi_private = rdev;
	bio->bi_private = rdev;
@@ -793,7 +795,7 @@ static void bi_complete(struct bio *bio, int error)
}
}


int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
		 struct page *page, int rw)
		 struct page *page, int rw, bool metadata_op)
{
{
	struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
	struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
	struct completion event;
	struct completion event;
@@ -801,8 +803,12 @@ int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,


	rw |= REQ_SYNC | REQ_UNPLUG;
	rw |= REQ_SYNC | REQ_UNPLUG;


	bio->bi_bdev = rdev->bdev;
	bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
	bio->bi_sector = sector;
		rdev->meta_bdev : rdev->bdev;
	if (metadata_op)
		bio->bi_sector = sector + rdev->sb_start;
	else
		bio->bi_sector = sector + rdev->data_offset;
	bio_add_page(bio, page, size, 0);
	bio_add_page(bio, page, size, 0);
	init_completion(&event);
	init_completion(&event);
	bio->bi_private = &event;
	bio->bi_private = &event;
@@ -827,7 +833,7 @@ static int read_disk_sb(mdk_rdev_t * rdev, int size)
		return 0;
		return 0;




	if (!sync_page_io(rdev, rdev->sb_start, size, rdev->sb_page, READ))
	if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
		goto fail;
		goto fail;
	rdev->sb_loaded = 1;
	rdev->sb_loaded = 1;
	return 0;
	return 0;
@@ -989,7 +995,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
	 *
	 *
	 * It also happens to be a multiple of 4Kb.
	 * It also happens to be a multiple of 4Kb.
	 */
	 */
	rdev->sb_start = calc_dev_sboffset(rdev->bdev);
	rdev->sb_start = calc_dev_sboffset(rdev);


	ret = read_disk_sb(rdev, MD_SB_BYTES);
	ret = read_disk_sb(rdev, MD_SB_BYTES);
	if (ret) return ret;
	if (ret) return ret;
@@ -1330,7 +1336,7 @@ super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
		return 0; /* component must fit device */
		return 0; /* component must fit device */
	if (rdev->mddev->bitmap_info.offset)
	if (rdev->mddev->bitmap_info.offset)
		return 0; /* can't move bitmap */
		return 0; /* can't move bitmap */
	rdev->sb_start = calc_dev_sboffset(rdev->bdev);
	rdev->sb_start = calc_dev_sboffset(rdev);
	if (!num_sectors || num_sectors > rdev->sb_start)
	if (!num_sectors || num_sectors > rdev->sb_start)
		num_sectors = rdev->sb_start;
		num_sectors = rdev->sb_start;
	md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
	md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
@@ -2465,6 +2471,10 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
			if (rdev2->raid_disk == slot)
			if (rdev2->raid_disk == slot)
				return -EEXIST;
				return -EEXIST;


		if (slot >= rdev->mddev->raid_disks &&
		    slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
			return -ENOSPC;

		rdev->raid_disk = slot;
		rdev->raid_disk = slot;
		if (test_bit(In_sync, &rdev->flags))
		if (test_bit(In_sync, &rdev->flags))
			rdev->saved_raid_disk = slot;
			rdev->saved_raid_disk = slot;
@@ -2482,7 +2492,8 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
			/* failure here is OK */;
			/* failure here is OK */;
		/* don't wakeup anyone, leave that to userspace. */
		/* don't wakeup anyone, leave that to userspace. */
	} else {
	} else {
		if (slot >= rdev->mddev->raid_disks)
		if (slot >= rdev->mddev->raid_disks &&
		    slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
			return -ENOSPC;
			return -ENOSPC;
		rdev->raid_disk = slot;
		rdev->raid_disk = slot;
		/* assume it is working */
		/* assume it is working */
@@ -3107,7 +3118,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
		char nm[20];
		char nm[20];
		if (rdev->raid_disk < 0)
		if (rdev->raid_disk < 0)
			continue;
			continue;
		if (rdev->new_raid_disk > mddev->raid_disks)
		if (rdev->new_raid_disk >= mddev->raid_disks)
			rdev->new_raid_disk = -1;
			rdev->new_raid_disk = -1;
		if (rdev->new_raid_disk == rdev->raid_disk)
		if (rdev->new_raid_disk == rdev->raid_disk)
			continue;
			continue;
@@ -3736,6 +3747,8 @@ action_show(mddev_t *mddev, char *page)
	return sprintf(page, "%s\n", type);
	return sprintf(page, "%s\n", type);
}
}


static void reap_sync_thread(mddev_t *mddev);

static ssize_t
static ssize_t
action_store(mddev_t *mddev, const char *page, size_t len)
action_store(mddev_t *mddev, const char *page, size_t len)
{
{
@@ -3750,9 +3763,7 @@ action_store(mddev_t *mddev, const char *page, size_t len)
	if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
	if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
		if (mddev->sync_thread) {
		if (mddev->sync_thread) {
			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
			md_unregister_thread(mddev->sync_thread);
			reap_sync_thread(mddev);
			mddev->sync_thread = NULL;
			mddev->recovery = 0;
		}
		}
	} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
	} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
		   test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
		   test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
@@ -3904,7 +3915,7 @@ static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
static ssize_t
static ssize_t
sync_completed_show(mddev_t *mddev, char *page)
sync_completed_show(mddev_t *mddev, char *page)
{
{
	unsigned long max_sectors, resync;
	unsigned long long max_sectors, resync;


	if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
	if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
		return sprintf(page, "none\n");
		return sprintf(page, "none\n");
@@ -3915,7 +3926,7 @@ sync_completed_show(mddev_t *mddev, char *page)
		max_sectors = mddev->dev_sectors;
		max_sectors = mddev->dev_sectors;


	resync = mddev->curr_resync_completed;
	resync = mddev->curr_resync_completed;
	return sprintf(page, "%lu / %lu\n", resync, max_sectors);
	return sprintf(page, "%llu / %llu\n", resync, max_sectors);
}
}


static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
@@ -4002,19 +4013,24 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
{
{
	char *e;
	char *e;
	unsigned long long new = simple_strtoull(buf, &e, 10);
	unsigned long long new = simple_strtoull(buf, &e, 10);
	unsigned long long old = mddev->suspend_lo;


	if (mddev->pers == NULL || 
	if (mddev->pers == NULL || 
	    mddev->pers->quiesce == NULL)
	    mddev->pers->quiesce == NULL)
		return -EINVAL;
		return -EINVAL;
	if (buf == e || (*e && *e != '\n'))
	if (buf == e || (*e && *e != '\n'))
		return -EINVAL;
		return -EINVAL;
	if (new >= mddev->suspend_hi ||

	    (new > mddev->suspend_lo && new < mddev->suspend_hi)) {
	mddev->suspend_lo = new;
	mddev->suspend_lo = new;
	if (new >= old)
		/* Shrinking suspended region */
		mddev->pers->quiesce(mddev, 2);
		mddev->pers->quiesce(mddev, 2);
	else {
		/* Expanding suspended region - need to wait */
		mddev->pers->quiesce(mddev, 1);
		mddev->pers->quiesce(mddev, 0);
	}
	return len;
	return len;
	} else
		return -EINVAL;
}
}
static struct md_sysfs_entry md_suspend_lo =
static struct md_sysfs_entry md_suspend_lo =
__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
@@ -4031,20 +4047,24 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
{
{
	char *e;
	char *e;
	unsigned long long new = simple_strtoull(buf, &e, 10);
	unsigned long long new = simple_strtoull(buf, &e, 10);
	unsigned long long old = mddev->suspend_hi;


	if (mddev->pers == NULL ||
	if (mddev->pers == NULL ||
	    mddev->pers->quiesce == NULL)
	    mddev->pers->quiesce == NULL)
		return -EINVAL;
		return -EINVAL;
	if (buf == e || (*e && *e != '\n'))
	if (buf == e || (*e && *e != '\n'))
		return -EINVAL;
		return -EINVAL;
	if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) ||

	    (new > mddev->suspend_lo && new > mddev->suspend_hi)) {
	mddev->suspend_hi = new;
	mddev->suspend_hi = new;
	if (new <= old)
		/* Shrinking suspended region */
		mddev->pers->quiesce(mddev, 2);
	else {
		/* Expanding suspended region - need to wait */
		mddev->pers->quiesce(mddev, 1);
		mddev->pers->quiesce(mddev, 1);
		mddev->pers->quiesce(mddev, 0);
		mddev->pers->quiesce(mddev, 0);
	}
	return len;
	return len;
	} else
		return -EINVAL;
}
}
static struct md_sysfs_entry md_suspend_hi =
static struct md_sysfs_entry md_suspend_hi =
__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
@@ -4422,7 +4442,9 @@ int md_run(mddev_t *mddev)
		 * We don't want the data to overlap the metadata,
		 * We don't want the data to overlap the metadata,
		 * Internal Bitmap issues have been handled elsewhere.
		 * Internal Bitmap issues have been handled elsewhere.
		 */
		 */
		if (rdev->data_offset < rdev->sb_start) {
		if (rdev->meta_bdev) {
			/* Nothing to check */;
		} else if (rdev->data_offset < rdev->sb_start) {
			if (mddev->dev_sectors &&
			if (mddev->dev_sectors &&
			    rdev->data_offset + mddev->dev_sectors
			    rdev->data_offset + mddev->dev_sectors
			    > rdev->sb_start) {
			    > rdev->sb_start) {
@@ -4556,7 +4578,8 @@ int md_run(mddev_t *mddev)
	mddev->safemode_timer.data = (unsigned long) mddev;
	mddev->safemode_timer.data = (unsigned long) mddev;
	mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
	mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
	mddev->in_sync = 1;
	mddev->in_sync = 1;

	smp_wmb();
	mddev->ready = 1;
	list_for_each_entry(rdev, &mddev->disks, same_set)
	list_for_each_entry(rdev, &mddev->disks, same_set)
		if (rdev->raid_disk >= 0) {
		if (rdev->raid_disk >= 0) {
			char nm[20];
			char nm[20];
@@ -4693,13 +4716,12 @@ static void md_clean(mddev_t *mddev)
	mddev->plug = NULL;
	mddev->plug = NULL;
}
}


void md_stop_writes(mddev_t *mddev)
static void __md_stop_writes(mddev_t *mddev)
{
{
	if (mddev->sync_thread) {
	if (mddev->sync_thread) {
		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
		set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
		md_unregister_thread(mddev->sync_thread);
		reap_sync_thread(mddev);
		mddev->sync_thread = NULL;
	}
	}


	del_timer_sync(&mddev->safemode_timer);
	del_timer_sync(&mddev->safemode_timer);
@@ -4713,10 +4735,18 @@ void md_stop_writes(mddev_t *mddev)
		md_update_sb(mddev, 1);
		md_update_sb(mddev, 1);
	}
	}
}
}

void md_stop_writes(mddev_t *mddev)
{
	mddev_lock(mddev);
	__md_stop_writes(mddev);
	mddev_unlock(mddev);
}
EXPORT_SYMBOL_GPL(md_stop_writes);
EXPORT_SYMBOL_GPL(md_stop_writes);


void md_stop(mddev_t *mddev)
void md_stop(mddev_t *mddev)
{
{
	mddev->ready = 0;
	mddev->pers->stop(mddev);
	mddev->pers->stop(mddev);
	if (mddev->pers->sync_request && mddev->to_remove == NULL)
	if (mddev->pers->sync_request && mddev->to_remove == NULL)
		mddev->to_remove = &md_redundancy_group;
		mddev->to_remove = &md_redundancy_group;
@@ -4736,7 +4766,7 @@ static int md_set_readonly(mddev_t *mddev, int is_open)
		goto out;
		goto out;
	}
	}
	if (mddev->pers) {
	if (mddev->pers) {
		md_stop_writes(mddev);
		__md_stop_writes(mddev);


		err  = -ENXIO;
		err  = -ENXIO;
		if (mddev->ro==1)
		if (mddev->ro==1)
@@ -4773,7 +4803,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
		if (mddev->ro)
		if (mddev->ro)
			set_disk_ro(disk, 0);
			set_disk_ro(disk, 0);


		md_stop_writes(mddev);
		__md_stop_writes(mddev);
		md_stop(mddev);
		md_stop(mddev);
		mddev->queue->merge_bvec_fn = NULL;
		mddev->queue->merge_bvec_fn = NULL;
		mddev->queue->unplug_fn = NULL;
		mddev->queue->unplug_fn = NULL;
@@ -5151,9 +5181,10 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
		/* set saved_raid_disk if appropriate */
		/* set saved_raid_disk if appropriate */
		if (!mddev->persistent) {
		if (!mddev->persistent) {
			if (info->state & (1<<MD_DISK_SYNC)  &&
			if (info->state & (1<<MD_DISK_SYNC)  &&
			    info->raid_disk < mddev->raid_disks)
			    info->raid_disk < mddev->raid_disks) {
				rdev->raid_disk = info->raid_disk;
				rdev->raid_disk = info->raid_disk;
			else
				set_bit(In_sync, &rdev->flags);
			} else
				rdev->raid_disk = -1;
				rdev->raid_disk = -1;
		} else
		} else
			super_types[mddev->major_version].
			super_types[mddev->major_version].
@@ -5230,7 +5261,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
			printk(KERN_INFO "md: nonpersistent superblock ...\n");
			printk(KERN_INFO "md: nonpersistent superblock ...\n");
			rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
			rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
		} else
		} else
			rdev->sb_start = calc_dev_sboffset(rdev->bdev);
			rdev->sb_start = calc_dev_sboffset(rdev);
		rdev->sectors = rdev->sb_start;
		rdev->sectors = rdev->sb_start;


		err = bind_rdev_to_array(rdev, mddev);
		err = bind_rdev_to_array(rdev, mddev);
@@ -5297,7 +5328,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
	}
	}


	if (mddev->persistent)
	if (mddev->persistent)
		rdev->sb_start = calc_dev_sboffset(rdev->bdev);
		rdev->sb_start = calc_dev_sboffset(rdev);
	else
	else
		rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;
		rdev->sb_start = i_size_read(rdev->bdev->bd_inode) / 512;


@@ -5510,7 +5541,6 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
	 * sb_start or, if that is <data_offset, it must fit before the size
	 * sb_start or, if that is <data_offset, it must fit before the size
	 * of each device.  If num_sectors is zero, we find the largest size
	 * of each device.  If num_sectors is zero, we find the largest size
	 * that fits.
	 * that fits.

	 */
	 */
	if (mddev->sync_thread)
	if (mddev->sync_thread)
		return -EBUSY;
		return -EBUSY;
@@ -6033,7 +6063,8 @@ static int md_thread(void * arg)
			 || kthread_should_stop(),
			 || kthread_should_stop(),
			 thread->timeout);
			 thread->timeout);


		if (test_and_clear_bit(THREAD_WAKEUP, &thread->flags))
		clear_bit(THREAD_WAKEUP, &thread->flags);
		if (!kthread_should_stop())
			thread->run(thread->mddev);
			thread->run(thread->mddev);
	}
	}


@@ -6799,7 +6830,7 @@ void md_do_sync(mddev_t *mddev)
		       desc, mdname(mddev));
		       desc, mdname(mddev));
		mddev->curr_resync = j;
		mddev->curr_resync = j;
	}
	}
	mddev->curr_resync_completed = mddev->curr_resync;
	mddev->curr_resync_completed = j;


	while (j < max_sectors) {
	while (j < max_sectors) {
		sector_t sectors;
		sector_t sectors;
@@ -6817,8 +6848,7 @@ void md_do_sync(mddev_t *mddev)
			md_unplug(mddev);
			md_unplug(mddev);
			wait_event(mddev->recovery_wait,
			wait_event(mddev->recovery_wait,
				   atomic_read(&mddev->recovery_active) == 0);
				   atomic_read(&mddev->recovery_active) == 0);
			mddev->curr_resync_completed =
			mddev->curr_resync_completed = j;
				mddev->curr_resync;
			set_bit(MD_CHANGE_CLEAN, &mddev->flags);
			set_bit(MD_CHANGE_CLEAN, &mddev->flags);
			sysfs_notify(&mddev->kobj, NULL, "sync_completed");
			sysfs_notify(&mddev->kobj, NULL, "sync_completed");
		}
		}
@@ -7023,6 +7053,45 @@ static int remove_and_add_spares(mddev_t *mddev)
	}
	}
	return spares;
	return spares;
}
}

static void reap_sync_thread(mddev_t *mddev)
{
	mdk_rdev_t *rdev;

	/* resync has finished, collect result */
	md_unregister_thread(mddev->sync_thread);
	mddev->sync_thread = NULL;
	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
		/* success...*/
		/* activate any spares */
		if (mddev->pers->spare_active(mddev))
			sysfs_notify(&mddev->kobj, NULL,
				     "degraded");
	}
	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
	    mddev->pers->finish_reshape)
		mddev->pers->finish_reshape(mddev);
	md_update_sb(mddev, 1);

	/* if array is no-longer degraded, then any saved_raid_disk
	 * information must be scrapped
	 */
	if (!mddev->degraded)
		list_for_each_entry(rdev, &mddev->disks, same_set)
			rdev->saved_raid_disk = -1;

	clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
	clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
	clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
	clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
	/* flag recovery needed just to double check */
	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
	sysfs_notify_dirent_safe(mddev->sysfs_action);
	md_new_event(mddev);
}

/*
/*
 * This routine is regularly called by all per-raid-array threads to
 * This routine is regularly called by all per-raid-array threads to
 * deal with generic issues like resync and super-block update.
 * deal with generic issues like resync and super-block update.
@@ -7047,9 +7116,6 @@ static int remove_and_add_spares(mddev_t *mddev)
 */
 */
void md_check_recovery(mddev_t *mddev)
void md_check_recovery(mddev_t *mddev)
{
{
	mdk_rdev_t *rdev;


	if (mddev->bitmap)
	if (mddev->bitmap)
		bitmap_daemon_work(mddev);
		bitmap_daemon_work(mddev);


@@ -7117,34 +7183,7 @@ void md_check_recovery(mddev_t *mddev)
			goto unlock;
			goto unlock;
		}
		}
		if (mddev->sync_thread) {
		if (mddev->sync_thread) {
			/* resync has finished, collect result */
			reap_sync_thread(mddev);
			md_unregister_thread(mddev->sync_thread);
			mddev->sync_thread = NULL;
			if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
			    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
				/* success...*/
				/* activate any spares */
				if (mddev->pers->spare_active(mddev))
					sysfs_notify(&mddev->kobj, NULL,
						     "degraded");
			}
			if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
			    mddev->pers->finish_reshape)
				mddev->pers->finish_reshape(mddev);
			md_update_sb(mddev, 1);

			/* if array is no-longer degraded, then any saved_raid_disk
			 * information must be scrapped
			 */
			if (!mddev->degraded)
				list_for_each_entry(rdev, &mddev->disks, same_set)
					rdev->saved_raid_disk = -1;

			mddev->recovery = 0;
			/* flag recovery needed just to double check */
			set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
			sysfs_notify_dirent_safe(mddev->sysfs_action);
			md_new_event(mddev);
			goto unlock;
			goto unlock;
		}
		}
		/* Set RUNNING before clearing NEEDED to avoid
		/* Set RUNNING before clearing NEEDED to avoid
@@ -7202,7 +7241,11 @@ void md_check_recovery(mddev_t *mddev)
					" thread...\n", 
					" thread...\n", 
					mdname(mddev));
					mdname(mddev));
				/* leave the spares where they are, it shouldn't hurt */
				/* leave the spares where they are, it shouldn't hurt */
				mddev->recovery = 0;
				clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
				clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
				clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
				clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
				clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
			} else
			} else
				md_wakeup_thread(mddev->sync_thread);
				md_wakeup_thread(mddev->sync_thread);
			sysfs_notify_dirent_safe(mddev->sysfs_action);
			sysfs_notify_dirent_safe(mddev->sysfs_action);
+10 −3
Original line number Original line Diff line number Diff line
@@ -60,6 +60,12 @@ struct mdk_rdev_s
	mddev_t *mddev;			/* RAID array if running */
	mddev_t *mddev;			/* RAID array if running */
	int last_events;		/* IO event timestamp */
	int last_events;		/* IO event timestamp */


	/*
	 * If meta_bdev is non-NULL, it means that a separate device is
	 * being used to store the metadata (superblock/bitmap) which
	 * would otherwise be contained on the same device as the data (bdev).
	 */
	struct block_device *meta_bdev;
	struct block_device *bdev;	/* block device handle */
	struct block_device *bdev;	/* block device handle */


	struct page	*sb_page;
	struct page	*sb_page;
@@ -148,7 +154,8 @@ struct mddev_s
						       * are happening, so run/
						       * are happening, so run/
						       * takeover/stop are not safe
						       * takeover/stop are not safe
						       */
						       */

	int				ready; /* See when safe to pass 
						* IO requests down */
	struct gendisk			*gendisk;
	struct gendisk			*gendisk;


	struct kobject			kobj;
	struct kobject			kobj;
@@ -498,7 +505,7 @@ extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
			   sector_t sector, int size, struct page *page);
			   sector_t sector, int size, struct page *page);
extern void md_super_wait(mddev_t *mddev);
extern void md_super_wait(mddev_t *mddev);
extern int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, 
extern int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, 
			struct page *page, int rw);
			struct page *page, int rw, bool metadata_op);
extern void md_do_sync(mddev_t *mddev);
extern void md_do_sync(mddev_t *mddev);
extern void md_new_event(mddev_t *mddev);
extern void md_new_event(mddev_t *mddev);
extern int md_allow_write(mddev_t *mddev);
extern int md_allow_write(mddev_t *mddev);
+15 −18
Original line number Original line Diff line number Diff line
@@ -1027,8 +1027,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
	} else
	} else
		set_bit(Faulty, &rdev->flags);
		set_bit(Faulty, &rdev->flags);
	set_bit(MD_CHANGE_DEVS, &mddev->flags);
	set_bit(MD_CHANGE_DEVS, &mddev->flags);
	printk(KERN_ALERT "md/raid1:%s: Disk failure on %s, disabling device.\n"
	printk(KERN_ALERT
	       KERN_ALERT "md/raid1:%s: Operation continuing on %d devices.\n",
	       "md/raid1:%s: Disk failure on %s, disabling device.\n"
	       "md/raid1:%s: Operation continuing on %d devices.\n",
	       mdname(mddev), bdevname(rdev->bdev, b),
	       mdname(mddev), bdevname(rdev->bdev, b),
	       mdname(mddev), conf->raid_disks - mddev->degraded);
	       mdname(mddev), conf->raid_disks - mddev->degraded);
}
}
@@ -1364,10 +1365,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
					 */
					 */
					rdev = conf->mirrors[d].rdev;
					rdev = conf->mirrors[d].rdev;
					if (sync_page_io(rdev,
					if (sync_page_io(rdev,
							 sect + rdev->data_offset,
							 sect,
							 s<<9,
							 s<<9,
							 bio->bi_io_vec[idx].bv_page,
							 bio->bi_io_vec[idx].bv_page,
							 READ)) {
							 READ, false)) {
						success = 1;
						success = 1;
						break;
						break;
					}
					}
@@ -1390,10 +1391,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
					rdev = conf->mirrors[d].rdev;
					rdev = conf->mirrors[d].rdev;
					atomic_add(s, &rdev->corrected_errors);
					atomic_add(s, &rdev->corrected_errors);
					if (sync_page_io(rdev,
					if (sync_page_io(rdev,
							 sect + rdev->data_offset,
							 sect,
							 s<<9,
							 s<<9,
							 bio->bi_io_vec[idx].bv_page,
							 bio->bi_io_vec[idx].bv_page,
							 WRITE) == 0)
							 WRITE, false) == 0)
						md_error(mddev, rdev);
						md_error(mddev, rdev);
				}
				}
				d = start;
				d = start;
@@ -1405,10 +1406,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
						continue;
						continue;
					rdev = conf->mirrors[d].rdev;
					rdev = conf->mirrors[d].rdev;
					if (sync_page_io(rdev,
					if (sync_page_io(rdev,
							 sect + rdev->data_offset,
							 sect,
							 s<<9,
							 s<<9,
							 bio->bi_io_vec[idx].bv_page,
							 bio->bi_io_vec[idx].bv_page,
							 READ) == 0)
							 READ, false) == 0)
						md_error(mddev, rdev);
						md_error(mddev, rdev);
				}
				}
			} else {
			} else {
@@ -1488,10 +1489,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
			rdev = conf->mirrors[d].rdev;
			rdev = conf->mirrors[d].rdev;
			if (rdev &&
			if (rdev &&
			    test_bit(In_sync, &rdev->flags) &&
			    test_bit(In_sync, &rdev->flags) &&
			    sync_page_io(rdev,
			    sync_page_io(rdev, sect, s<<9,
					 sect + rdev->data_offset,
					 conf->tmppage, READ, false))
					 s<<9,
					 conf->tmppage, READ))
				success = 1;
				success = 1;
			else {
			else {
				d++;
				d++;
@@ -1514,9 +1513,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
			rdev = conf->mirrors[d].rdev;
			rdev = conf->mirrors[d].rdev;
			if (rdev &&
			if (rdev &&
			    test_bit(In_sync, &rdev->flags)) {
			    test_bit(In_sync, &rdev->flags)) {
				if (sync_page_io(rdev,
				if (sync_page_io(rdev, sect, s<<9,
						 sect + rdev->data_offset,
						 conf->tmppage, WRITE, false)
						 s<<9, conf->tmppage, WRITE)
				    == 0)
				    == 0)
					/* Well, this device is dead */
					/* Well, this device is dead */
					md_error(mddev, rdev);
					md_error(mddev, rdev);
@@ -1531,9 +1529,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
			rdev = conf->mirrors[d].rdev;
			rdev = conf->mirrors[d].rdev;
			if (rdev &&
			if (rdev &&
			    test_bit(In_sync, &rdev->flags)) {
			    test_bit(In_sync, &rdev->flags)) {
				if (sync_page_io(rdev,
				if (sync_page_io(rdev, sect, s<<9,
						 sect + rdev->data_offset,
						 conf->tmppage, READ, false)
						 s<<9, conf->tmppage, READ)
				    == 0)
				    == 0)
					/* Well, this device is dead */
					/* Well, this device is dead */
					md_error(mddev, rdev);
					md_error(mddev, rdev);
+9 −8
Original line number Original line Diff line number Diff line
@@ -1051,8 +1051,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
	}
	}
	set_bit(Faulty, &rdev->flags);
	set_bit(Faulty, &rdev->flags);
	set_bit(MD_CHANGE_DEVS, &mddev->flags);
	set_bit(MD_CHANGE_DEVS, &mddev->flags);
	printk(KERN_ALERT "md/raid10:%s: Disk failure on %s, disabling device.\n"
	printk(KERN_ALERT
	       KERN_ALERT "md/raid10:%s: Operation continuing on %d devices.\n",
	       "md/raid10:%s: Disk failure on %s, disabling device.\n"
	       "md/raid10:%s: Operation continuing on %d devices.\n",
	       mdname(mddev), bdevname(rdev->bdev, b),
	       mdname(mddev), bdevname(rdev->bdev, b),
	       mdname(mddev), conf->raid_disks - mddev->degraded);
	       mdname(mddev), conf->raid_disks - mddev->degraded);
}
}
@@ -1559,9 +1560,9 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
				rcu_read_unlock();
				rcu_read_unlock();
				success = sync_page_io(rdev,
				success = sync_page_io(rdev,
						       r10_bio->devs[sl].addr +
						       r10_bio->devs[sl].addr +
						       sect + rdev->data_offset,
						       sect,
						       s<<9,
						       s<<9,
						       conf->tmppage, READ);
						       conf->tmppage, READ, false);
				rdev_dec_pending(rdev, mddev);
				rdev_dec_pending(rdev, mddev);
				rcu_read_lock();
				rcu_read_lock();
				if (success)
				if (success)
@@ -1598,8 +1599,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
				atomic_add(s, &rdev->corrected_errors);
				atomic_add(s, &rdev->corrected_errors);
				if (sync_page_io(rdev,
				if (sync_page_io(rdev,
						 r10_bio->devs[sl].addr +
						 r10_bio->devs[sl].addr +
						 sect + rdev->data_offset,
						 sect,
						 s<<9, conf->tmppage, WRITE)
						 s<<9, conf->tmppage, WRITE, false)
				    == 0) {
				    == 0) {
					/* Well, this device is dead */
					/* Well, this device is dead */
					printk(KERN_NOTICE
					printk(KERN_NOTICE
@@ -1635,9 +1636,9 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
				rcu_read_unlock();
				rcu_read_unlock();
				if (sync_page_io(rdev,
				if (sync_page_io(rdev,
						 r10_bio->devs[sl].addr +
						 r10_bio->devs[sl].addr +
						 sect + rdev->data_offset,
						 sect,
						 s<<9, conf->tmppage,
						 s<<9, conf->tmppage,
						 READ) == 0) {
						 READ, false) == 0) {
					/* Well, this device is dead */
					/* Well, this device is dead */
					printk(KERN_NOTICE
					printk(KERN_NOTICE
					       "md/raid10:%s: unable to read back "
					       "md/raid10:%s: unable to read back "
Loading