Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0e70613b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md/raid5: Allow dirty-degraded arrays to be assembled when only party is degraded.
  Don't unconditionally set in_sync on newly added device in raid5_reshape
  md: allow v0.91 metadata to record devices as being active but not in-sync.
  md: factor out updating of 'recovery_offset'.
parents e0a2af1e c148ffdc
Loading
Loading
Loading
Loading
+33 −8
Original line number Original line Diff line number Diff line
@@ -944,6 +944,14 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
			    desc->raid_disk < mddev->raid_disks */) {
			    desc->raid_disk < mddev->raid_disks */) {
			set_bit(In_sync, &rdev->flags);
			set_bit(In_sync, &rdev->flags);
			rdev->raid_disk = desc->raid_disk;
			rdev->raid_disk = desc->raid_disk;
		} else if (desc->state & (1<<MD_DISK_ACTIVE)) {
			/* active but not in sync implies recovery up to
			 * reshape position.  We don't know exactly where
			 * that is, so set to zero for now */
			if (mddev->minor_version >= 91) {
				rdev->recovery_offset = 0;
				rdev->raid_disk = desc->raid_disk;
			}
		}
		}
		if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
		if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
			set_bit(WriteMostly, &rdev->flags);
			set_bit(WriteMostly, &rdev->flags);
@@ -1032,8 +1040,19 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
	list_for_each_entry(rdev2, &mddev->disks, same_set) {
	list_for_each_entry(rdev2, &mddev->disks, same_set) {
		mdp_disk_t *d;
		mdp_disk_t *d;
		int desc_nr;
		int desc_nr;
		if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
		int is_active = test_bit(In_sync, &rdev2->flags);
		    && !test_bit(Faulty, &rdev2->flags))

		if (rdev2->raid_disk >= 0 &&
		    sb->minor_version >= 91)
			/* we have nowhere to store the recovery_offset,
			 * but if it is not below the reshape_position,
			 * we can piggy-back on that.
			 */
			is_active = 1;
		if (rdev2->raid_disk < 0 ||
		    test_bit(Faulty, &rdev2->flags))
			is_active = 0;
		if (is_active)
			desc_nr = rdev2->raid_disk;
			desc_nr = rdev2->raid_disk;
		else
		else
			desc_nr = next_spare++;
			desc_nr = next_spare++;
@@ -1043,15 +1062,15 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
		d->number = rdev2->desc_nr;
		d->number = rdev2->desc_nr;
		d->major = MAJOR(rdev2->bdev->bd_dev);
		d->major = MAJOR(rdev2->bdev->bd_dev);
		d->minor = MINOR(rdev2->bdev->bd_dev);
		d->minor = MINOR(rdev2->bdev->bd_dev);
		if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
		if (is_active)
		    && !test_bit(Faulty, &rdev2->flags))
			d->raid_disk = rdev2->raid_disk;
			d->raid_disk = rdev2->raid_disk;
		else
		else
			d->raid_disk = rdev2->desc_nr; /* compatibility */
			d->raid_disk = rdev2->desc_nr; /* compatibility */
		if (test_bit(Faulty, &rdev2->flags))
		if (test_bit(Faulty, &rdev2->flags))
			d->state = (1<<MD_DISK_FAULTY);
			d->state = (1<<MD_DISK_FAULTY);
		else if (test_bit(In_sync, &rdev2->flags)) {
		else if (is_active) {
			d->state = (1<<MD_DISK_ACTIVE);
			d->state = (1<<MD_DISK_ACTIVE);
			if (test_bit(In_sync, &rdev2->flags))
				d->state |= (1<<MD_DISK_SYNC);
				d->state |= (1<<MD_DISK_SYNC);
			active++;
			active++;
			working++;
			working++;
@@ -1382,8 +1401,6 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)


	if (rdev->raid_disk >= 0 &&
	if (rdev->raid_disk >= 0 &&
	    !test_bit(In_sync, &rdev->flags)) {
	    !test_bit(In_sync, &rdev->flags)) {
		if (mddev->curr_resync_completed > rdev->recovery_offset)
			rdev->recovery_offset = mddev->curr_resync_completed;
		if (rdev->recovery_offset > 0) {
		if (rdev->recovery_offset > 0) {
			sb->feature_map |=
			sb->feature_map |=
				cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
				cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
@@ -1917,6 +1934,14 @@ static void sync_sbs(mddev_t * mddev, int nospares)
	 */
	 */
	mdk_rdev_t *rdev;
	mdk_rdev_t *rdev;


	/* First make sure individual recovery_offsets are correct */
	list_for_each_entry(rdev, &mddev->disks, same_set) {
		if (rdev->raid_disk >= 0 &&
		    !test_bit(In_sync, &rdev->flags) &&
		    mddev->curr_resync_completed > rdev->recovery_offset)
				rdev->recovery_offset = mddev->curr_resync_completed;

	}	
	list_for_each_entry(rdev, &mddev->disks, same_set) {
	list_for_each_entry(rdev, &mddev->disks, same_set) {
		if (rdev->sb_events == mddev->events ||
		if (rdev->sb_events == mddev->events ||
		    (nospares &&
		    (nospares &&
+79 −6
Original line number Original line Diff line number Diff line
@@ -4823,11 +4823,40 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
		return ERR_PTR(-ENOMEM);
		return ERR_PTR(-ENOMEM);
}
}



static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
{
	switch (algo) {
	case ALGORITHM_PARITY_0:
		if (raid_disk < max_degraded)
			return 1;
		break;
	case ALGORITHM_PARITY_N:
		if (raid_disk >= raid_disks - max_degraded)
			return 1;
		break;
	case ALGORITHM_PARITY_0_6:
		if (raid_disk == 0 || 
		    raid_disk == raid_disks - 1)
			return 1;
		break;
	case ALGORITHM_LEFT_ASYMMETRIC_6:
	case ALGORITHM_RIGHT_ASYMMETRIC_6:
	case ALGORITHM_LEFT_SYMMETRIC_6:
	case ALGORITHM_RIGHT_SYMMETRIC_6:
		if (raid_disk == raid_disks - 1)
			return 1;
	}
	return 0;
}

static int run(mddev_t *mddev)
static int run(mddev_t *mddev)
{
{
	raid5_conf_t *conf;
	raid5_conf_t *conf;
	int working_disks = 0, chunk_size;
	int working_disks = 0, chunk_size;
	int dirty_parity_disks = 0;
	mdk_rdev_t *rdev;
	mdk_rdev_t *rdev;
	sector_t reshape_offset = 0;


	if (mddev->recovery_cp != MaxSector)
	if (mddev->recovery_cp != MaxSector)
		printk(KERN_NOTICE "raid5: %s is not clean"
		printk(KERN_NOTICE "raid5: %s is not clean"
@@ -4861,6 +4890,7 @@ static int run(mddev_t *mddev)
			       "on a stripe boundary\n");
			       "on a stripe boundary\n");
			return -EINVAL;
			return -EINVAL;
		}
		}
		reshape_offset = here_new * mddev->new_chunk_sectors;
		/* here_new is the stripe we will write to */
		/* here_new is the stripe we will write to */
		here_old = mddev->reshape_position;
		here_old = mddev->reshape_position;
		sector_div(here_old, mddev->chunk_sectors *
		sector_div(here_old, mddev->chunk_sectors *
@@ -4916,10 +4946,51 @@ static int run(mddev_t *mddev)
	/*
	/*
	 * 0 for a fully functional array, 1 or 2 for a degraded array.
	 * 0 for a fully functional array, 1 or 2 for a degraded array.
	 */
	 */
	list_for_each_entry(rdev, &mddev->disks, same_set)
	list_for_each_entry(rdev, &mddev->disks, same_set) {
		if (rdev->raid_disk >= 0 &&
		if (rdev->raid_disk < 0)
		    test_bit(In_sync, &rdev->flags))
			continue;
		if (test_bit(In_sync, &rdev->flags))
			working_disks++;
			working_disks++;
		/* This disc is not fully in-sync.  However if it
		 * just stored parity (beyond the recovery_offset),
		 * when we don't need to be concerned about the
		 * array being dirty.
		 * When reshape goes 'backwards', we never have
		 * partially completed devices, so we only need
		 * to worry about reshape going forwards.
		 */
		/* Hack because v0.91 doesn't store recovery_offset properly. */
		if (mddev->major_version == 0 &&
		    mddev->minor_version > 90)
			rdev->recovery_offset = reshape_offset;
			
		printk("%d: w=%d pa=%d pr=%d m=%d a=%d r=%d op1=%d op2=%d\n",
		       rdev->raid_disk, working_disks, conf->prev_algo,
		       conf->previous_raid_disks, conf->max_degraded,
		       conf->algorithm, conf->raid_disks, 
		       only_parity(rdev->raid_disk,
				   conf->prev_algo,
				   conf->previous_raid_disks,
				   conf->max_degraded),
		       only_parity(rdev->raid_disk,
				   conf->algorithm,
				   conf->raid_disks,
				   conf->max_degraded));
		if (rdev->recovery_offset < reshape_offset) {
			/* We need to check old and new layout */
			if (!only_parity(rdev->raid_disk,
					 conf->algorithm,
					 conf->raid_disks,
					 conf->max_degraded))
				continue;
		}
		if (!only_parity(rdev->raid_disk,
				 conf->prev_algo,
				 conf->previous_raid_disks,
				 conf->max_degraded))
			continue;
		dirty_parity_disks++;
	}


	mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks)
	mddev->degraded = (max(conf->raid_disks, conf->previous_raid_disks)
			   - working_disks);
			   - working_disks);
@@ -4935,7 +5006,7 @@ static int run(mddev_t *mddev)
	mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
	mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
	mddev->resync_max_sectors = mddev->dev_sectors;
	mddev->resync_max_sectors = mddev->dev_sectors;


	if (mddev->degraded > 0 &&
	if (mddev->degraded > dirty_parity_disks &&
	    mddev->recovery_cp != MaxSector) {
	    mddev->recovery_cp != MaxSector) {
		if (mddev->ok_start_degraded)
		if (mddev->ok_start_degraded)
			printk(KERN_WARNING
			printk(KERN_WARNING
@@ -5361,9 +5432,11 @@ static int raid5_start_reshape(mddev_t *mddev)
		    !test_bit(Faulty, &rdev->flags)) {
		    !test_bit(Faulty, &rdev->flags)) {
			if (raid5_add_disk(mddev, rdev) == 0) {
			if (raid5_add_disk(mddev, rdev) == 0) {
				char nm[20];
				char nm[20];
				if (rdev->raid_disk >= conf->previous_raid_disks)
					set_bit(In_sync, &rdev->flags);
					set_bit(In_sync, &rdev->flags);
				added_devices++;
				else
					rdev->recovery_offset = 0;
					rdev->recovery_offset = 0;
				added_devices++;
				sprintf(nm, "rd%d", rdev->raid_disk);
				sprintf(nm, "rd%d", rdev->raid_disk);
				if (sysfs_create_link(&mddev->kobj,
				if (sysfs_create_link(&mddev->kobj,
						      &rdev->kobj, nm))
						      &rdev->kobj, nm))