Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 3f35e210 authored by Shaohua Li's avatar Shaohua Li
Browse files

Merge branch 'mymd/for-next' into mymd/for-linus

parents 194dc870 5d881783
Loading
Loading
Loading
Loading
+54 −20
Original line number Diff line number Diff line
@@ -2482,8 +2482,7 @@ static int add_bound_rdev(struct md_rdev *rdev)
		if (add_journal)
			mddev_resume(mddev);
		if (err) {
			unbind_rdev_from_array(rdev);
			export_rdev(rdev);
			md_kick_rdev_from_array(rdev);
			return err;
		}
	}
@@ -2600,6 +2599,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
		else
			err = -EBUSY;
	} else if (cmd_match(buf, "remove")) {
		if (rdev->mddev->pers) {
			clear_bit(Blocked, &rdev->flags);
			remove_and_add_spares(rdev->mddev, rdev);
		}
		if (rdev->raid_disk >= 0)
			err = -EBUSY;
		else {
@@ -3176,8 +3179,7 @@ int md_rdev_init(struct md_rdev *rdev)
	rdev->data_offset = 0;
	rdev->new_data_offset = 0;
	rdev->sb_events = 0;
	rdev->last_read_error.tv_sec  = 0;
	rdev->last_read_error.tv_nsec = 0;
	rdev->last_read_error = 0;
	rdev->sb_loaded = 0;
	rdev->bb_page = NULL;
	atomic_set(&rdev->nr_pending, 0);
@@ -3583,6 +3585,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
			mddev->to_remove = &md_redundancy_group;
	}

	module_put(oldpers->owner);

	rdev_for_each(rdev, mddev) {
		if (rdev->raid_disk < 0)
			continue;
@@ -3940,6 +3944,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
			} else
				err = -EBUSY;
		}
		if (!err)
			sysfs_notify_dirent_safe(mddev->sysfs_state);
		spin_unlock(&mddev->lock);
		return err ?: len;
	}
@@ -4191,6 +4197,7 @@ size_store(struct mddev *mddev, const char *buf, size_t len)
		return err;
	if (mddev->pers) {
		err = update_size(mddev, sectors);
		if (err == 0)
			md_update_sb(mddev, 1);
	} else {
		if (mddev->dev_sectors == 0 ||
@@ -7813,6 +7820,7 @@ void md_do_sync(struct md_thread *thread)
		if (ret)
			goto skip;

		set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
		if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
			test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
			test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
@@ -8151,18 +8159,11 @@ void md_do_sync(struct md_thread *thread)
		}
	}
 skip:
	if (mddev_is_clustered(mddev) &&
	    ret == 0) {
		/* set CHANGE_PENDING here since maybe another
		 * update is needed, so other nodes are informed */
	/* set CHANGE_PENDING here since maybe another update is needed,
	 * so other nodes are informed. It should be harmless for normal
	 * raid */
	set_mask_bits(&mddev->flags, 0,
		      BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
		md_wakeup_thread(mddev->thread);
		wait_event(mddev->sb_wait,
			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));
		md_cluster_ops->resync_finish(mddev);
	} else
		set_bit(MD_CHANGE_DEVS, &mddev->flags);

	spin_lock(&mddev->lock);
	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -8188,15 +8189,34 @@ static int remove_and_add_spares(struct mddev *mddev,
	struct md_rdev *rdev;
	int spares = 0;
	int removed = 0;
	bool remove_some = false;

	rdev_for_each(rdev, mddev)
	rdev_for_each(rdev, mddev) {
		if ((this == NULL || rdev == this) &&
		    rdev->raid_disk >= 0 &&
		    !test_bit(Blocked, &rdev->flags) &&
		    test_bit(Faulty, &rdev->flags) &&
		    atomic_read(&rdev->nr_pending)==0) {
			/* Faulty non-Blocked devices with nr_pending == 0
			 * never get nr_pending incremented,
			 * never get Faulty cleared, and never get Blocked set.
			 * So we can synchronize_rcu now rather than once per device
			 */
			remove_some = true;
			set_bit(RemoveSynchronized, &rdev->flags);
		}
	}

	if (remove_some)
		synchronize_rcu();
	rdev_for_each(rdev, mddev) {
		if ((this == NULL || rdev == this) &&
		    rdev->raid_disk >= 0 &&
		    !test_bit(Blocked, &rdev->flags) &&
		    (test_bit(Faulty, &rdev->flags) ||
		    ((test_bit(RemoveSynchronized, &rdev->flags) ||
		     (!test_bit(In_sync, &rdev->flags) &&
		      !test_bit(Journal, &rdev->flags))) &&
		    atomic_read(&rdev->nr_pending)==0) {
		    atomic_read(&rdev->nr_pending)==0)) {
			if (mddev->pers->hot_remove_disk(
				    mddev, rdev) == 0) {
				sysfs_unlink_rdev(mddev, rdev);
@@ -8204,6 +8224,10 @@ static int remove_and_add_spares(struct mddev *mddev,
				removed++;
			}
		}
		if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
			clear_bit(RemoveSynchronized, &rdev->flags);
	}

	if (removed && mddev->kobj.sd)
		sysfs_notify(&mddev->kobj, NULL, "degraded");

@@ -8506,6 +8530,11 @@ void md_reap_sync_thread(struct mddev *mddev)
			rdev->saved_raid_disk = -1;

	md_update_sb(mddev, 1);
	/* MD_CHANGE_PENDING should be cleared by md_update_sb, so we can
	 * call resync_finish here if MD_CLUSTER_RESYNC_LOCKED is set by
	 * clustered raid */
	if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
		md_cluster_ops->resync_finish(mddev);
	clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
	clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
	clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
@@ -8803,6 +8832,7 @@ EXPORT_SYMBOL(md_reload_sb);
 * at boot time.
 */

static DEFINE_MUTEX(detected_devices_mutex);
static LIST_HEAD(all_detected_devices);
struct detected_devices_node {
	struct list_head list;
@@ -8816,7 +8846,9 @@ void md_autodetect_dev(dev_t dev)
	node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
	if (node_detected_dev) {
		node_detected_dev->dev = dev;
		mutex_lock(&detected_devices_mutex);
		list_add_tail(&node_detected_dev->list, &all_detected_devices);
		mutex_unlock(&detected_devices_mutex);
	} else {
		printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
			", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
@@ -8835,6 +8867,7 @@ static void autostart_arrays(int part)

	printk(KERN_INFO "md: Autodetecting RAID arrays.\n");

	mutex_lock(&detected_devices_mutex);
	while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
		i_scanned++;
		node_detected_dev = list_entry(all_detected_devices.next,
@@ -8853,6 +8886,7 @@ static void autostart_arrays(int part)
		list_add(&rdev->same_set, &pending_raid_disks);
		i_passed++;
	}
	mutex_unlock(&detected_devices_mutex);

	printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
						i_scanned, i_passed);
+9 −1
Original line number Diff line number Diff line
@@ -99,7 +99,7 @@ struct md_rdev {
	atomic_t	read_errors;	/* number of consecutive read errors that
					 * we have tried to ignore.
					 */
	struct timespec last_read_error;	/* monotonic time since our
	time64_t	last_read_error;	/* monotonic time since our
						 * last read error
						 */
	atomic_t	corrected_errors; /* number of corrected read errors,
@@ -163,6 +163,11 @@ enum flag_bits {
				 * than other devices in the array
				 */
	ClusterRemove,
	RemoveSynchronized,	/* synchronize_rcu() was called after
				 * this device was known to be faulty,
				 * so it is safe to remove without
				 * another synchronize_rcu() call.
				 */
};

static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
@@ -204,6 +209,9 @@ struct mddev {
#define MD_RELOAD_SB	7	/* Reload the superblock because another node
				 * updated it.
				 */
#define MD_CLUSTER_RESYNC_LOCKED 8 /* cluster raid only, which means node
				    * already took resync lock, need to
				    * release the lock */

	int				suspended;
	atomic_t			active_io;
+17 −12
Original line number Diff line number Diff line
@@ -43,7 +43,8 @@ static int multipath_map (struct mpconf *conf)
	rcu_read_lock();
	for (i = 0; i < disks; i++) {
		struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
		if (rdev && test_bit(In_sync, &rdev->flags)) {
		if (rdev && test_bit(In_sync, &rdev->flags) &&
		    !test_bit(Faulty, &rdev->flags)) {
			atomic_inc(&rdev->nr_pending);
			rcu_read_unlock();
			return i;
@@ -148,10 +149,12 @@ static void multipath_status (struct seq_file *seq, struct mddev *mddev)

	seq_printf (seq, " [%d/%d] [", conf->raid_disks,
		    conf->raid_disks - mddev->degraded);
	for (i = 0; i < conf->raid_disks; i++)
		seq_printf (seq, "%s",
			       conf->multipaths[i].rdev &&
			       test_bit(In_sync, &conf->multipaths[i].rdev->flags) ? "U" : "_");
	rcu_read_lock();
	for (i = 0; i < conf->raid_disks; i++) {
		struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
		seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
	}
	rcu_read_unlock();
	seq_printf (seq, "]");
}

@@ -295,6 +298,7 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
			goto abort;
		}
		p->rdev = NULL;
		if (!test_bit(RemoveSynchronized, &rdev->flags)) {
			synchronize_rcu();
			if (atomic_read(&rdev->nr_pending)) {
				/* lost the race, try later */
@@ -302,6 +306,7 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
				p->rdev = rdev;
				goto abort;
			}
		}
		err = md_integrity_register(mddev);
	}
abort:
+67 −63
Original line number Diff line number Diff line
@@ -319,14 +319,13 @@ static void raid1_end_read_request(struct bio *bio)
{
	int uptodate = !bio->bi_error;
	struct r1bio *r1_bio = bio->bi_private;
	int mirror;
	struct r1conf *conf = r1_bio->mddev->private;
	struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;

	mirror = r1_bio->read_disk;
	/*
	 * this branch is our 'one mirror IO has finished' event handler:
	 */
	update_head_pos(mirror, r1_bio);
	update_head_pos(r1_bio->read_disk, r1_bio);

	if (uptodate)
		set_bit(R1BIO_Uptodate, &r1_bio->state);
@@ -339,14 +338,14 @@ static void raid1_end_read_request(struct bio *bio)
		spin_lock_irqsave(&conf->device_lock, flags);
		if (r1_bio->mddev->degraded == conf->raid_disks ||
		    (r1_bio->mddev->degraded == conf->raid_disks-1 &&
		     test_bit(In_sync, &conf->mirrors[mirror].rdev->flags)))
		     test_bit(In_sync, &rdev->flags)))
			uptodate = 1;
		spin_unlock_irqrestore(&conf->device_lock, flags);
	}

	if (uptodate) {
		raid_end_bio_io(r1_bio);
		rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
		rdev_dec_pending(rdev, conf->mddev);
	} else {
		/*
		 * oops, read error:
@@ -356,7 +355,7 @@ static void raid1_end_read_request(struct bio *bio)
			KERN_ERR "md/raid1:%s: %s: "
			"rescheduling sector %llu\n",
			mdname(conf->mddev),
			bdevname(conf->mirrors[mirror].rdev->bdev,
			bdevname(rdev->bdev,
				 b),
			(unsigned long long)r1_bio->sector);
		set_bit(R1BIO_ReadError, &r1_bio->state);
@@ -403,20 +402,18 @@ static void r1_bio_write_done(struct r1bio *r1_bio)
static void raid1_end_write_request(struct bio *bio)
{
	struct r1bio *r1_bio = bio->bi_private;
	int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
	int behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
	struct r1conf *conf = r1_bio->mddev->private;
	struct bio *to_put = NULL;

	mirror = find_bio_disk(r1_bio, bio);
	int mirror = find_bio_disk(r1_bio, bio);
	struct md_rdev *rdev = conf->mirrors[mirror].rdev;

	/*
	 * 'one mirror IO has finished' event handler:
	 */
	if (bio->bi_error) {
		set_bit(WriteErrorSeen,
			&conf->mirrors[mirror].rdev->flags);
		if (!test_and_set_bit(WantReplacement,
				      &conf->mirrors[mirror].rdev->flags))
		set_bit(WriteErrorSeen,	&rdev->flags);
		if (!test_and_set_bit(WantReplacement, &rdev->flags))
			set_bit(MD_RECOVERY_NEEDED, &
				conf->mddev->recovery);

@@ -445,13 +442,12 @@ static void raid1_end_write_request(struct bio *bio)
		 * before rdev->recovery_offset, but for simplicity we don't
		 * check this here.
		 */
		if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) &&
		    !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))
		if (test_bit(In_sync, &rdev->flags) &&
		    !test_bit(Faulty, &rdev->flags))
			set_bit(R1BIO_Uptodate, &r1_bio->state);

		/* Maybe we can clear some bad blocks. */
		if (is_badblock(conf->mirrors[mirror].rdev,
				r1_bio->sector, r1_bio->sectors,
		if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors,
				&first_bad, &bad_sectors)) {
			r1_bio->bios[mirror] = IO_MADE_GOOD;
			set_bit(R1BIO_MadeGood, &r1_bio->state);
@@ -459,7 +455,7 @@ static void raid1_end_write_request(struct bio *bio)
	}

	if (behind) {
		if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
		if (test_bit(WriteMostly, &rdev->flags))
			atomic_dec(&r1_bio->behind_remaining);

		/*
@@ -483,8 +479,7 @@ static void raid1_end_write_request(struct bio *bio)
		}
	}
	if (r1_bio->bios[mirror] == NULL)
		rdev_dec_pending(conf->mirrors[mirror].rdev,
				 conf->mddev);
		rdev_dec_pending(rdev, conf->mddev);

	/*
	 * Let's see if all mirrored write operations have finished
@@ -689,13 +684,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
		if (!rdev)
			goto retry;
		atomic_inc(&rdev->nr_pending);
		if (test_bit(Faulty, &rdev->flags)) {
			/* cannot risk returning a device that failed
			 * before we inc'ed nr_pending
			 */
			rdev_dec_pending(rdev, conf->mddev);
			goto retry;
		}
		sectors = best_good_sectors;

		if (conf->mirrors[best_disk].next_seq_sect != this_sector)
@@ -1666,13 +1654,16 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
			goto abort;
		}
		p->rdev = NULL;
		if (!test_bit(RemoveSynchronized, &rdev->flags)) {
			synchronize_rcu();
			if (atomic_read(&rdev->nr_pending)) {
				/* lost the race, try later */
				err = -EBUSY;
				p->rdev = rdev;
				goto abort;
		} else if (conf->mirrors[conf->raid_disks + number].rdev) {
			}
		}
		if (conf->mirrors[conf->raid_disks + number].rdev) {
			/* We just removed a device that is being replaced.
			 * Move down the replacement.  We drain all IO before
			 * doing this to avoid confusion.
@@ -1719,11 +1710,9 @@ static void end_sync_write(struct bio *bio)
	struct r1bio *r1_bio = bio->bi_private;
	struct mddev *mddev = r1_bio->mddev;
	struct r1conf *conf = mddev->private;
	int mirror=0;
	sector_t first_bad;
	int bad_sectors;

	mirror = find_bio_disk(r1_bio, bio);
	struct md_rdev *rdev = conf->mirrors[find_bio_disk(r1_bio, bio)].rdev;

	if (!uptodate) {
		sector_t sync_blocks = 0;
@@ -1736,16 +1725,12 @@ static void end_sync_write(struct bio *bio)
			s += sync_blocks;
			sectors_to_go -= sync_blocks;
		} while (sectors_to_go > 0);
		set_bit(WriteErrorSeen,
			&conf->mirrors[mirror].rdev->flags);
		if (!test_and_set_bit(WantReplacement,
				      &conf->mirrors[mirror].rdev->flags))
		set_bit(WriteErrorSeen, &rdev->flags);
		if (!test_and_set_bit(WantReplacement, &rdev->flags))
			set_bit(MD_RECOVERY_NEEDED, &
				mddev->recovery);
		set_bit(R1BIO_WriteError, &r1_bio->state);
	} else if (is_badblock(conf->mirrors[mirror].rdev,
			       r1_bio->sector,
			       r1_bio->sectors,
	} else if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors,
			       &first_bad, &bad_sectors) &&
		   !is_badblock(conf->mirrors[r1_bio->read_disk].rdev,
				r1_bio->sector,
@@ -2072,29 +2057,30 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
			s = PAGE_SIZE >> 9;

		do {
			/* Note: no rcu protection needed here
			 * as this is synchronous in the raid1d thread
			 * which is the thread that might remove
			 * a device.  If raid1d ever becomes multi-threaded....
			 */
			sector_t first_bad;
			int bad_sectors;

			rdev = conf->mirrors[d].rdev;
			rcu_read_lock();
			rdev = rcu_dereference(conf->mirrors[d].rdev);
			if (rdev &&
			    (test_bit(In_sync, &rdev->flags) ||
			     (!test_bit(Faulty, &rdev->flags) &&
			      rdev->recovery_offset >= sect + s)) &&
			    is_badblock(rdev, sect, s,
					&first_bad, &bad_sectors) == 0 &&
			    sync_page_io(rdev, sect, s<<9,
					&first_bad, &bad_sectors) == 0) {
				atomic_inc(&rdev->nr_pending);
				rcu_read_unlock();
				if (sync_page_io(rdev, sect, s<<9,
					 conf->tmppage, REQ_OP_READ, 0, false))
					success = 1;
			else {
				rdev_dec_pending(rdev, mddev);
				if (success)
					break;
			} else
				rcu_read_unlock();
			d++;
			if (d == conf->raid_disks * 2)
				d = 0;
			}
		} while (!success && d != read_disk);

		if (!success) {
@@ -2110,11 +2096,17 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
			if (d==0)
				d = conf->raid_disks * 2;
			d--;
			rdev = conf->mirrors[d].rdev;
			rcu_read_lock();
			rdev = rcu_dereference(conf->mirrors[d].rdev);
			if (rdev &&
			    !test_bit(Faulty, &rdev->flags))
			    !test_bit(Faulty, &rdev->flags)) {
				atomic_inc(&rdev->nr_pending);
				rcu_read_unlock();
				r1_sync_page_io(rdev, sect, s,
						conf->tmppage, WRITE);
				rdev_dec_pending(rdev, mddev);
			} else
				rcu_read_unlock();
		}
		d = start;
		while (d != read_disk) {
@@ -2122,9 +2114,12 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
			if (d==0)
				d = conf->raid_disks * 2;
			d--;
			rdev = conf->mirrors[d].rdev;
			rcu_read_lock();
			rdev = rcu_dereference(conf->mirrors[d].rdev);
			if (rdev &&
			    !test_bit(Faulty, &rdev->flags)) {
				atomic_inc(&rdev->nr_pending);
				rcu_read_unlock();
				if (r1_sync_page_io(rdev, sect, s,
						    conf->tmppage, READ)) {
					atomic_add(s, &rdev->corrected_errors);
@@ -2136,7 +2131,9 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
								    rdev->data_offset),
					       bdevname(rdev->bdev, b));
				}
			}
				rdev_dec_pending(rdev, mddev);
			} else
				rcu_read_unlock();
		}
		sectors -= s;
		sect += s;
@@ -2534,6 +2531,13 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
		return sync_blocks;
	}

	/*
	 * If there is non-resync activity waiting for a turn, then let it
	 * though before starting on this new sync request.
	 */
	if (conf->nr_waiting)
		schedule_timeout_uninterruptible(1);

	/* we are incrementing sector_nr below. To be safe, we check against
	 * sector_nr + two times RESYNC_SECTORS
	 */
+152 −98

File changed.

Preview size limit exceeded, changes collapsed.

Loading