Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8b4822de authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull MD fixes from Shaohua Li:

 - Several bug fixes for raid5-cache from Song Liu, mainly handle
   journal disk error

 - Fix bad block handling in choosing raid1 disk from Tomasz Majchrzak

 - Simplify external metadata array sysfs handling from Artur
   Paszkiewicz

 - Optimize raid0 discard handling from me, now raid0 will dispatch
   large discard IO directly to underlayer disks.

* tag 'md/4.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  raid1: prefer disk without bad blocks
  md/r5cache: handle sync with data in write back cache
  md/r5cache: gracefully handle journal device errors for writeback mode
  md/raid1/10: avoid unnecessary locking
  md/raid5-cache: in r5l_do_submit_io(), submit io->split_bio first
  md/md0: optimize raid0 discard handling
  md: don't return -EAGAIN in md_allow_write for external metadata arrays
  md/raid5: make use of spin_lock_irq over local_irq_disable + spin_lock
parents 667f867c d82dd0e3
Loading
Loading
Loading
Loading
+8 −12
Original line number Diff line number Diff line
@@ -8022,18 +8022,15 @@ EXPORT_SYMBOL(md_write_end);
 * may proceed without blocking.  It is important to call this before
 * attempting a GFP_KERNEL allocation while holding the mddev lock.
 * Must be called with mddev_lock held.
 *
 * In the ->external case MD_SB_CHANGE_PENDING can not be cleared until mddev->lock
 * is dropped, so return -EAGAIN after notifying userspace.
 */
int md_allow_write(struct mddev *mddev)
void md_allow_write(struct mddev *mddev)
{
	if (!mddev->pers)
		return 0;
		return;
	if (mddev->ro)
		return 0;
		return;
	if (!mddev->pers->sync_request)
		return 0;
		return;

	spin_lock(&mddev->lock);
	if (mddev->in_sync) {
@@ -8046,13 +8043,12 @@ int md_allow_write(struct mddev *mddev)
		spin_unlock(&mddev->lock);
		md_update_sb(mddev, 0);
		sysfs_notify_dirent_safe(mddev->sysfs_state);
		/* wait for the dirty state to be recorded in the metadata */
		wait_event(mddev->sb_wait,
			   !test_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags) &&
			   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
	} else
		spin_unlock(&mddev->lock);

	if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
		return -EAGAIN;
	else
		return 0;
}
EXPORT_SYMBOL_GPL(md_allow_write);

+1 −1
Original line number Diff line number Diff line
@@ -665,7 +665,7 @@ extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
			bool metadata_op);
extern void md_do_sync(struct md_thread *thread);
extern void md_new_event(struct mddev *mddev);
extern int md_allow_write(struct mddev *mddev);
extern void md_allow_write(struct mddev *mddev);
extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
extern int md_check_no_bitmap(struct mddev *mddev);
+102 −14
Original line number Diff line number Diff line
@@ -385,7 +385,7 @@ static int raid0_run(struct mddev *mddev)
		blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
		blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
		blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors);
		blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
		blk_queue_max_discard_sectors(mddev->queue, UINT_MAX);

		blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
		blk_queue_io_opt(mddev->queue,
@@ -459,6 +459,95 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
	}
}

static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
{
	struct r0conf *conf = mddev->private;
	struct strip_zone *zone;
	sector_t start = bio->bi_iter.bi_sector;
	sector_t end;
	unsigned int stripe_size;
	sector_t first_stripe_index, last_stripe_index;
	sector_t start_disk_offset;
	unsigned int start_disk_index;
	sector_t end_disk_offset;
	unsigned int end_disk_index;
	unsigned int disk;

	zone = find_zone(conf, &start);

	if (bio_end_sector(bio) > zone->zone_end) {
		struct bio *split = bio_split(bio,
			zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO,
			mddev->bio_set);
		bio_chain(split, bio);
		generic_make_request(bio);
		bio = split;
		end = zone->zone_end;
	} else
		end = bio_end_sector(bio);

	if (zone != conf->strip_zone)
		end = end - zone[-1].zone_end;

	/* Now start and end is the offset in zone */
	stripe_size = zone->nb_dev * mddev->chunk_sectors;

	first_stripe_index = start;
	sector_div(first_stripe_index, stripe_size);
	last_stripe_index = end;
	sector_div(last_stripe_index, stripe_size);

	start_disk_index = (int)(start - first_stripe_index * stripe_size) /
		mddev->chunk_sectors;
	start_disk_offset = ((int)(start - first_stripe_index * stripe_size) %
		mddev->chunk_sectors) +
		first_stripe_index * mddev->chunk_sectors;
	end_disk_index = (int)(end - last_stripe_index * stripe_size) /
		mddev->chunk_sectors;
	end_disk_offset = ((int)(end - last_stripe_index * stripe_size) %
		mddev->chunk_sectors) +
		last_stripe_index * mddev->chunk_sectors;

	for (disk = 0; disk < zone->nb_dev; disk++) {
		sector_t dev_start, dev_end;
		struct bio *discard_bio = NULL;
		struct md_rdev *rdev;

		if (disk < start_disk_index)
			dev_start = (first_stripe_index + 1) *
				mddev->chunk_sectors;
		else if (disk > start_disk_index)
			dev_start = first_stripe_index * mddev->chunk_sectors;
		else
			dev_start = start_disk_offset;

		if (disk < end_disk_index)
			dev_end = (last_stripe_index + 1) * mddev->chunk_sectors;
		else if (disk > end_disk_index)
			dev_end = last_stripe_index * mddev->chunk_sectors;
		else
			dev_end = end_disk_offset;

		if (dev_end <= dev_start)
			continue;

		rdev = conf->devlist[(zone - conf->strip_zone) *
			conf->strip_zone[0].nb_dev + disk];
		if (__blkdev_issue_discard(rdev->bdev,
			dev_start + zone->dev_start + rdev->data_offset,
			dev_end - dev_start, GFP_NOIO, 0, &discard_bio) ||
		    !discard_bio)
			continue;
		bio_chain(discard_bio, bio);
		if (mddev->gendisk)
			trace_block_bio_remap(bdev_get_queue(rdev->bdev),
				discard_bio, disk_devt(mddev->gendisk),
				bio->bi_iter.bi_sector);
		generic_make_request(discard_bio);
	}
	bio_endio(bio);
}

static void raid0_make_request(struct mddev *mddev, struct bio *bio)
{
	struct strip_zone *zone;
@@ -473,6 +562,11 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
		return;
	}

	if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
		raid0_handle_discard(mddev, bio);
		return;
	}

	bio_sector = bio->bi_iter.bi_sector;
	sector = bio_sector;
	chunk_sects = mddev->chunk_sectors;
@@ -498,11 +592,6 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
	bio->bi_iter.bi_sector = sector + zone->dev_start +
		tmp_dev->data_offset;

	if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
		     !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
		/* Just ignore it */
		bio_endio(bio);
	} else {
	if (mddev->gendisk)
		trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
				      bio, disk_devt(mddev->gendisk),
@@ -511,7 +600,6 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
	mddev_check_write_zeroes(mddev, bio);
	generic_make_request(bio);
}
}

static void raid0_status(struct seq_file *seq, struct mddev *mddev)
{
+10 −11
Original line number Diff line number Diff line
@@ -666,8 +666,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
					break;
			}
			continue;
		} else
		} else {
			if ((sectors > best_good_sectors) && (best_disk >= 0))
				best_disk = -1;
			best_good_sectors = sectors;
		}

		if (best_disk >= 0)
			/* At least two disks to choose from so failfast is OK */
@@ -1529,18 +1532,17 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
			plug = container_of(cb, struct raid1_plug_cb, cb);
		else
			plug = NULL;
		spin_lock_irqsave(&conf->device_lock, flags);
		if (plug) {
			bio_list_add(&plug->pending, mbio);
			plug->pending_cnt++;
		} else {
			spin_lock_irqsave(&conf->device_lock, flags);
			bio_list_add(&conf->pending_bio_list, mbio);
			conf->pending_count++;
		}
			spin_unlock_irqrestore(&conf->device_lock, flags);
		if (!plug)
			md_wakeup_thread(mddev->thread);
		}
	}

	r1_bio_write_done(r1_bio);

@@ -3197,7 +3199,7 @@ static int raid1_reshape(struct mddev *mddev)
	struct r1conf *conf = mddev->private;
	int cnt, raid_disks;
	unsigned long flags;
	int d, d2, err;
	int d, d2;

	/* Cannot change chunk_size, layout, or level */
	if (mddev->chunk_sectors != mddev->new_chunk_sectors ||
@@ -3209,11 +3211,8 @@ static int raid1_reshape(struct mddev *mddev)
		return -EINVAL;
	}

	if (!mddev_is_clustered(mddev)) {
		err = md_allow_write(mddev);
		if (err)
			return err;
	}
	if (!mddev_is_clustered(mddev))
		md_allow_write(mddev);

	raid_disks = mddev->raid_disks + mddev->delta_disks;

+3 −4
Original line number Diff line number Diff line
@@ -1282,18 +1282,17 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
		plug = container_of(cb, struct raid10_plug_cb, cb);
	else
		plug = NULL;
	spin_lock_irqsave(&conf->device_lock, flags);
	if (plug) {
		bio_list_add(&plug->pending, mbio);
		plug->pending_cnt++;
	} else {
		spin_lock_irqsave(&conf->device_lock, flags);
		bio_list_add(&conf->pending_bio_list, mbio);
		conf->pending_count++;
	}
		spin_unlock_irqrestore(&conf->device_lock, flags);
	if (!plug)
		md_wakeup_thread(mddev->thread);
	}
}

static void raid10_write_request(struct mddev *mddev, struct bio *bio,
				 struct r10bio *r10_bio)
Loading