Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 43f4d36c authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull device mapper fixes from Mike Snitzer:

 - a stable fix for DM round robin multipath path selector to disable
   preemption before using this_cpu_ptr()

 - a slight increase in DM crypt's mempool reserves to make swap ontop
   of DM crypt more performant

 - a few DM raid fixes to issues found while testing changes that were
   merged in v4.8-rc1

* tag 'dm-4.8-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm raid: support raid0 with missing metadata devices
  dm raid: enhance attempt_restore_of_faulty_devices() to support more devices
  dm raid: fix restoring of failed devices regression
  dm raid: fix frozen recovery regression
  dm crypt: increase mempool reserve to better support swapping
  dm round robin: do not use this_cpu_ptr() without having preemption disabled
parents b2848792 9e7d9367
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -181,7 +181,7 @@ struct crypt_config {
	u8 key[0];
};

#define MIN_IOS        16
#define MIN_IOS        64

static void clone_init(struct dm_crypt_io *, struct bio *);
static void kcryptd_queue_crypt(struct dm_crypt_io *io);
+47 −35
Original line number Diff line number Diff line
@@ -191,7 +191,6 @@ struct raid_dev {
#define RT_FLAG_RS_BITMAP_LOADED	2
#define RT_FLAG_UPDATE_SBS		3
#define RT_FLAG_RESHAPE_RS		4
#define RT_FLAG_KEEP_RS_FROZEN		5

/* Array elements of 64 bit needed for rebuild/failed disk bits */
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -861,6 +860,9 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
{
	unsigned long min_region_size = rs->ti->len / (1 << 21);

	if (rs_is_raid0(rs))
		return 0;

	if (!region_size) {
		/*
		 * Choose a reasonable default.	 All figures in sectors.
@@ -930,6 +932,8 @@ static int validate_raid_redundancy(struct raid_set *rs)
			rebuild_cnt++;

	switch (rs->raid_type->level) {
	case 0:
		break;
	case 1:
		if (rebuild_cnt >= rs->md.raid_disks)
			goto too_many;
@@ -2335,6 +2339,13 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
		case 0:
			break;
		default:
			/*
			 * We have to keep any raid0 data/metadata device pairs or
			 * the MD raid0 personality will fail to start the array.
			 */
			if (rs_is_raid0(rs))
				continue;

			dev = container_of(rdev, struct raid_dev, rdev);
			if (dev->meta_dev)
				dm_put_device(ti, dev->meta_dev);
@@ -2579,7 +2590,6 @@ static int rs_prepare_reshape(struct raid_set *rs)
		} else {
			/* Process raid1 without delta_disks */
			mddev->raid_disks = rs->raid_disks;
			set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
			reshape = false;
		}
	} else {
@@ -2590,7 +2600,6 @@ static int rs_prepare_reshape(struct raid_set *rs)
	if (reshape) {
		set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags);
		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
		set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
	} else if (mddev->raid_disks < rs->raid_disks)
		/* Create new superblocks and bitmaps, if any new disks */
		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
@@ -2902,7 +2911,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
			goto bad;

		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
		set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
		/* Takeover ain't recovery, so disable recovery */
		rs_setup_recovery(rs, MaxSector);
		rs_set_new(rs);
@@ -3386,21 +3394,28 @@ static void raid_postsuspend(struct dm_target *ti)
{
	struct raid_set *rs = ti->private;

	if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) {
	if (!rs->md.suspended)
		mddev_suspend(&rs->md);

	rs->md.ro = 1;
}
}

static void attempt_restore_of_faulty_devices(struct raid_set *rs)
{
	int i;
	uint64_t failed_devices, cleared_failed_devices = 0;
	uint64_t cleared_failed_devices[DISKS_ARRAY_ELEMS];
	unsigned long flags;
	bool cleared = false;
	struct dm_raid_superblock *sb;
	struct mddev *mddev = &rs->md;
	struct md_rdev *r;

	/* RAID personalities have to provide hot add/remove methods or we need to bail out. */
	if (!mddev->pers || !mddev->pers->hot_add_disk || !mddev->pers->hot_remove_disk)
		return;

	memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));

	for (i = 0; i < rs->md.raid_disks; i++) {
		r = &rs->dev[i].rdev;
		if (test_bit(Faulty, &r->flags) && r->sb_page &&
@@ -3420,7 +3435,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
			 * ourselves.
			 */
			if ((r->raid_disk >= 0) &&
			    (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0))
			    (mddev->pers->hot_remove_disk(mddev, r) != 0))
				/* Failed to revive this device, try next */
				continue;

@@ -3430,22 +3445,30 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
			clear_bit(Faulty, &r->flags);
			clear_bit(WriteErrorSeen, &r->flags);
			clear_bit(In_sync, &r->flags);
			if (r->mddev->pers->hot_add_disk(r->mddev, r)) {
			if (mddev->pers->hot_add_disk(mddev, r)) {
				r->raid_disk = -1;
				r->saved_raid_disk = -1;
				r->flags = flags;
			} else {
				r->recovery_offset = 0;
				cleared_failed_devices |= 1 << i;
				set_bit(i, (void *) cleared_failed_devices);
				cleared = true;
			}
		}
	}
	if (cleared_failed_devices) {

	/* If any failed devices could be cleared, update all sbs failed_devices bits */
	if (cleared) {
		uint64_t failed_devices[DISKS_ARRAY_ELEMS];

		rdev_for_each(r, &rs->md) {
			sb = page_address(r->sb_page);
			failed_devices = le64_to_cpu(sb->failed_devices);
			failed_devices &= ~cleared_failed_devices;
			sb->failed_devices = cpu_to_le64(failed_devices);
			sb_retrieve_failed_devices(sb, failed_devices);

			for (i = 0; i < DISKS_ARRAY_ELEMS; i++)
				failed_devices[i] &= ~cleared_failed_devices[i];

			sb_update_failed_devices(sb, failed_devices);
		}
	}
}
@@ -3610,27 +3633,16 @@ static void raid_resume(struct dm_target *ti)
		 * devices are reachable again.
		 */
		attempt_restore_of_faulty_devices(rs);
	} else {
	}

	mddev->ro = 0;
	mddev->in_sync = 0;

		/*
		 * When passing in flags to the ctr, we expect userspace
		 * to reset them because they made it to the superblocks
		 * and reload the mapping anyway.
		 *
		 * -> only unfreeze recovery in case of a table reload or
		 *    we'll have a bogus recovery/reshape position
		 *    retrieved from the superblock by the ctr because
		 *    the ongoing recovery/reshape will change it after read.
		 */
		if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags))
	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);

	if (mddev->suspended)
		mddev_resume(mddev);
}
}

static struct target_type raid_target = {
	.name = "raid",
+5 −2
Original line number Diff line number Diff line
@@ -210,14 +210,17 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes)
	struct path_info *pi = NULL;
	struct dm_path *current_path = NULL;

	local_irq_save(flags);
	current_path = *this_cpu_ptr(s->current_path);
	if (current_path) {
		percpu_counter_dec(&s->repeat_count);
		if (percpu_counter_read_positive(&s->repeat_count) > 0)
		if (percpu_counter_read_positive(&s->repeat_count) > 0) {
			local_irq_restore(flags);
			return current_path;
		}
	}

	spin_lock_irqsave(&s->lock, flags);
	spin_lock(&s->lock);
	if (!list_empty(&s->valid_paths)) {
		pi = list_entry(s->valid_paths.next, struct path_info, list);
		list_move_tail(&pi->list, &s->valid_paths);