Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 607ae5f2 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull md fixes from Shaohua Li:
 "Basically one fix for raid5 cache which is merged in this cycle,
  others are trival fixes"

* tag 'md/4.10-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  md/raid5: Use correct IS_ERR() variation on pointer check
  md: cleanup mddev flag clear for takeover
  md/r5cache: fix spelling mistake on "recoverying"
  md/r5cache: assign conf->log before r5l_load_log()
  md/r5cache: simplify handling of sh->log_start in recovery
  md/raid5-cache: removes unnecessary write-through mode judgments
  md/raid10: Refactor raid10_make_request
  md/raid1: Refactor raid1_make_request
parents ba836a6f 32cd7cbb
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -212,6 +212,7 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
				int is_new);
struct md_cluster_info;

/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
enum mddev_flags {
	MD_ARRAY_FIRST_USE,	/* First use of array, needs initialization */
	MD_CLOSING,		/* If set, we are closing the array, do not open
@@ -702,4 +703,11 @@ static inline int mddev_is_clustered(struct mddev *mddev)
{
	return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
}

/* clear unsupported mddev_flags */
static inline void mddev_clear_unsupported_flags(struct mddev *mddev,
	unsigned long unsupported_flags)
{
	mddev->flags &= ~unsupported_flags;
}
#endif /* _MD_MD_H */
+8 −4
Original line number Diff line number Diff line
@@ -26,6 +26,11 @@
#include "raid0.h"
#include "raid5.h"

#define UNSUPPORTED_MDDEV_FLAGS		\
	((1L << MD_HAS_JOURNAL) |	\
	 (1L << MD_JOURNAL_CLEAN) |	\
	 (1L << MD_FAILFAST_SUPPORTED))

static int raid0_congested(struct mddev *mddev, int bits)
{
	struct r0conf *conf = mddev->private;
@@ -539,8 +544,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
	mddev->delta_disks = -1;
	/* make sure it will be not marked as dirty */
	mddev->recovery_cp = MaxSector;
	clear_bit(MD_HAS_JOURNAL, &mddev->flags);
	clear_bit(MD_JOURNAL_CLEAN, &mddev->flags);
	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);

	create_strip_zones(mddev, &priv_conf);

@@ -583,7 +587,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
	mddev->degraded = 0;
	/* make sure it will be not marked as dirty */
	mddev->recovery_cp = MaxSector;
	clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);

	create_strip_zones(mddev, &priv_conf);
	return priv_conf;
@@ -626,7 +630,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
	mddev->raid_disks = 1;
	/* make sure it will be not marked as dirty */
	mddev->recovery_cp = MaxSector;
	clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
	mddev_clear_unsupported_flags(mddev, UNSUPPORTED_MDDEV_FLAGS);

	create_strip_zones(mddev, &priv_conf);
	return priv_conf;
+145 −130
Original line number Diff line number Diff line
@@ -42,6 +42,10 @@
#include "raid1.h"
#include "bitmap.h"

#define UNSUPPORTED_MDDEV_FLAGS		\
	((1L << MD_HAS_JOURNAL) |	\
	 (1L << MD_JOURNAL_CLEAN))

/*
 * Number of guaranteed r1bios in case of extreme VM load:
 */
@@ -1066,95 +1070,21 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
	kfree(plug);
}

static void raid1_make_request(struct mddev *mddev, struct bio * bio)
static void raid1_read_request(struct mddev *mddev, struct bio *bio,
				 struct r1bio *r1_bio)
{
	struct r1conf *conf = mddev->private;
	struct raid1_info *mirror;
	struct r1bio *r1_bio;
	struct bio *read_bio;
	int i, disks;
	struct bitmap *bitmap;
	unsigned long flags;
	struct bitmap *bitmap = mddev->bitmap;
	const int op = bio_op(bio);
	const int rw = bio_data_dir(bio);
	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
	const unsigned long do_flush_fua = (bio->bi_opf &
						(REQ_PREFLUSH | REQ_FUA));
	struct md_rdev *blocked_rdev;
	struct blk_plug_cb *cb;
	struct raid1_plug_cb *plug = NULL;
	int first_clone;
	int sectors_handled;
	int max_sectors;
	sector_t start_next_window;

	/*
	 * Register the new request and wait if the reconstruction
	 * thread has put up a bar for new requests.
	 * Continue immediately if no resync is active currently.
	 */

	md_write_start(mddev, bio); /* wait on superblock update early */

	if (bio_data_dir(bio) == WRITE &&
	    ((bio_end_sector(bio) > mddev->suspend_lo &&
	    bio->bi_iter.bi_sector < mddev->suspend_hi) ||
	    (mddev_is_clustered(mddev) &&
	     md_cluster_ops->area_resyncing(mddev, WRITE,
		     bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
		/* As the suspend_* range is controlled by
		 * userspace, we want an interruptible
		 * wait.
		 */
		DEFINE_WAIT(w);
		for (;;) {
			flush_signals(current);
			prepare_to_wait(&conf->wait_barrier,
					&w, TASK_INTERRUPTIBLE);
			if (bio_end_sector(bio) <= mddev->suspend_lo ||
			    bio->bi_iter.bi_sector >= mddev->suspend_hi ||
			    (mddev_is_clustered(mddev) &&
			     !md_cluster_ops->area_resyncing(mddev, WRITE,
				     bio->bi_iter.bi_sector, bio_end_sector(bio))))
				break;
			schedule();
		}
		finish_wait(&conf->wait_barrier, &w);
	}

	start_next_window = wait_barrier(conf, bio);

	bitmap = mddev->bitmap;

	/*
	 * make_request() can abort the operation when read-ahead is being
	 * used and no empty request is available.
	 *
	 */
	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);

	r1_bio->master_bio = bio;
	r1_bio->sectors = bio_sectors(bio);
	r1_bio->state = 0;
	r1_bio->mddev = mddev;
	r1_bio->sector = bio->bi_iter.bi_sector;

	/* We might need to issue multiple reads to different
	 * devices if there are bad blocks around, so we keep
	 * track of the number of reads in bio->bi_phys_segments.
	 * If this is 0, there is only one r1_bio and no locking
	 * will be needed when requests complete.  If it is
	 * non-zero, then it is the number of not-completed requests.
	 */
	bio->bi_phys_segments = 0;
	bio_clear_flag(bio, BIO_SEG_VALID);

	if (rw == READ) {
		/*
		 * read balancing logic:
		 */
	int rdisk;

	wait_barrier(conf, bio);

read_again:
	rdisk = read_balance(conf, r1_bio, &max_sectors);

@@ -1167,9 +1097,9 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)

	if (test_bit(WriteMostly, &mirror->rdev->flags) &&
	    bitmap) {
			/* Reading from a write-mostly device must
			 * take care not to over-take any writes
			 * that are 'behind'
		/*
		 * Reading from a write-mostly device must take care not to
		 * over-take any writes that are 'behind'
		 */
		raid1_log(mddev, "wait behind writes");
		wait_event(bitmap->behind_wait,
@@ -1200,10 +1130,10 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
	                              r1_bio->sector);

	if (max_sectors < r1_bio->sectors) {
			/* could not read all from this device, so we will
			 * need another r1_bio.
		/*
		 * could not read all from this device, so we will need another
		 * r1_bio.
		 */

		sectors_handled = (r1_bio->sector + max_sectors
				   - bio->bi_iter.bi_sector);
		r1_bio->sectors = max_sectors;
@@ -1213,10 +1143,11 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
		else
			bio->bi_phys_segments++;
		spin_unlock_irq(&conf->device_lock);
			/* Cannot call generic_make_request directly
			 * as that will be queued in __make_request
			 * and subsequent mempool_alloc might block waiting
			 * for it.  So hand bio over to raid1d.

		/*
		 * Cannot call generic_make_request directly as that will be
		 * queued in __make_request and subsequent mempool_alloc might
		 * block waiting for it.  So hand bio over to raid1d.
		 */
		reschedule_retry(r1_bio);

@@ -1226,17 +1157,67 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
		r1_bio->sectors = bio_sectors(bio) - sectors_handled;
		r1_bio->state = 0;
		r1_bio->mddev = mddev;
			r1_bio->sector = bio->bi_iter.bi_sector +
				sectors_handled;
		r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
		goto read_again;
	} else
		generic_make_request(read_bio);
		return;
}

static void raid1_write_request(struct mddev *mddev, struct bio *bio,
				struct r1bio *r1_bio)
{
	struct r1conf *conf = mddev->private;
	int i, disks;
	struct bitmap *bitmap = mddev->bitmap;
	unsigned long flags;
	const int op = bio_op(bio);
	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
	const unsigned long do_flush_fua = (bio->bi_opf &
						(REQ_PREFLUSH | REQ_FUA));
	struct md_rdev *blocked_rdev;
	struct blk_plug_cb *cb;
	struct raid1_plug_cb *plug = NULL;
	int first_clone;
	int sectors_handled;
	int max_sectors;
	sector_t start_next_window;

	/*
	 * Register the new request and wait if the reconstruction
	 * thread has put up a bar for new requests.
	 * Continue immediately if no resync is active currently.
	 */

	md_write_start(mddev, bio); /* wait on superblock update early */

	if ((bio_end_sector(bio) > mddev->suspend_lo &&
	    bio->bi_iter.bi_sector < mddev->suspend_hi) ||
	    (mddev_is_clustered(mddev) &&
	     md_cluster_ops->area_resyncing(mddev, WRITE,
		     bio->bi_iter.bi_sector, bio_end_sector(bio)))) {

		/*
	 * WRITE:
		 * As the suspend_* range is controlled by userspace, we want
		 * an interruptible wait.
		 */
		DEFINE_WAIT(w);
		for (;;) {
			flush_signals(current);
			prepare_to_wait(&conf->wait_barrier,
					&w, TASK_INTERRUPTIBLE);
			if (bio_end_sector(bio) <= mddev->suspend_lo ||
			    bio->bi_iter.bi_sector >= mddev->suspend_hi ||
			    (mddev_is_clustered(mddev) &&
			     !md_cluster_ops->area_resyncing(mddev, WRITE,
				     bio->bi_iter.bi_sector,
				     bio_end_sector(bio))))
				break;
			schedule();
		}
		finish_wait(&conf->wait_barrier, &w);
	}
	start_next_window = wait_barrier(conf, bio);

	if (conf->pending_count >= max_queued_requests) {
		md_wakeup_thread(mddev->thread);
		raid1_log(mddev, "wait queued");
@@ -1280,8 +1261,7 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
			int bad_sectors;
			int is_bad;

			is_bad = is_badblock(rdev, r1_bio->sector,
					     max_sectors,
			is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
					     &first_bad, &bad_sectors);
			if (is_bad < 0) {
				/* mustn't write here until the bad block is
@@ -1370,7 +1350,8 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
			continue;

		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
		bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors);
		bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector,
			 max_sectors);

		if (first_clone) {
			/* do behind I/O ?
@@ -1464,6 +1445,40 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
	wake_up(&conf->wait_barrier);
}

static void raid1_make_request(struct mddev *mddev, struct bio *bio)
{
	struct r1conf *conf = mddev->private;
	struct r1bio *r1_bio;

	/*
	 * make_request() can abort the operation when read-ahead is being
	 * used and no empty request is available.
	 *
	 */
	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);

	r1_bio->master_bio = bio;
	r1_bio->sectors = bio_sectors(bio);
	r1_bio->state = 0;
	r1_bio->mddev = mddev;
	r1_bio->sector = bio->bi_iter.bi_sector;

	/*
	 * We might need to issue multiple reads to different devices if there
	 * are bad blocks around, so we keep track of the number of reads in
	 * bio->bi_phys_segments.  If this is 0, there is only one r1_bio and
	 * no locking will be needed when requests complete.  If it is
	 * non-zero, then it is the number of not-completed requests.
	 */
	bio->bi_phys_segments = 0;
	bio_clear_flag(bio, BIO_SEG_VALID);

	if (bio_data_dir(bio) == READ)
		raid1_read_request(mddev, bio, r1_bio);
	else
		raid1_write_request(mddev, bio, r1_bio);
}

static void raid1_status(struct seq_file *seq, struct mddev *mddev)
{
	struct r1conf *conf = mddev->private;
@@ -3246,8 +3261,8 @@ static void *raid1_takeover(struct mddev *mddev)
		if (!IS_ERR(conf)) {
			/* Array must appear to be quiesced */
			conf->array_frozen = 1;
			clear_bit(MD_HAS_JOURNAL, &mddev->flags);
			clear_bit(MD_JOURNAL_CLEAN, &mddev->flags);
			mddev_clear_unsupported_flags(mddev,
				UNSUPPORTED_MDDEV_FLAGS);
		}
		return conf;
	}
+140 −105
Original line number Diff line number Diff line
@@ -1087,25 +1087,18 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
	kfree(plug);
}

static void __make_request(struct mddev *mddev, struct bio *bio)
static void raid10_read_request(struct mddev *mddev, struct bio *bio,
				struct r10bio *r10_bio)
{
	struct r10conf *conf = mddev->private;
	struct r10bio *r10_bio;
	struct bio *read_bio;
	int i;
	const int op = bio_op(bio);
	const int rw = bio_data_dir(bio);
	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
	const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
	unsigned long flags;
	struct md_rdev *blocked_rdev;
	struct blk_plug_cb *cb;
	struct raid10_plug_cb *plug = NULL;
	int sectors_handled;
	int max_sectors;
	int sectors;

	md_write_start(mddev, bio);
	sector_t sectors;
	struct md_rdev *rdev;
	int slot;

	/*
	 * Register the new request and wait if the reconstruction
@@ -1118,8 +1111,9 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
	    bio->bi_iter.bi_sector < conf->reshape_progress &&
	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
		/* IO spans the reshape position.  Need to wait for
		 * reshape to pass
		/*
		 * IO spans the reshape position.  Need to wait for reshape to
		 * pass
		 */
		raid10_log(conf->mddev, "wait reshape");
		allow_barrier(conf);
@@ -1129,50 +1123,6 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
			   sectors);
		wait_barrier(conf);
	}
	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
	    bio_data_dir(bio) == WRITE &&
	    (mddev->reshape_backwards
	     ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
		bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
	     : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
		bio->bi_iter.bi_sector < conf->reshape_progress))) {
		/* Need to update reshape_position in metadata */
		mddev->reshape_position = conf->reshape_progress;
		set_mask_bits(&mddev->sb_flags, 0,
			      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
		md_wakeup_thread(mddev->thread);
		raid10_log(conf->mddev, "wait reshape metadata");
		wait_event(mddev->sb_wait,
			   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));

		conf->reshape_safe = mddev->reshape_position;
	}

	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);

	r10_bio->master_bio = bio;
	r10_bio->sectors = sectors;

	r10_bio->mddev = mddev;
	r10_bio->sector = bio->bi_iter.bi_sector;
	r10_bio->state = 0;

	/* We might need to issue multiple reads to different
	 * devices if there are bad blocks around, so we keep
	 * track of the number of reads in bio->bi_phys_segments.
	 * If this is 0, there is only one r10_bio and no locking
	 * will be needed when the request completes.  If it is
	 * non-zero, then it is the number of not-completed requests.
	 */
	bio->bi_phys_segments = 0;
	bio_clear_flag(bio, BIO_SEG_VALID);

	if (rw == READ) {
		/*
		 * read balancing logic:
		 */
		struct md_rdev *rdev;
		int slot;

read_again:
	rdev = read_balance(conf, r10_bio, &max_sectors);
@@ -1204,8 +1154,9 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
	                              read_bio, disk_devt(mddev->gendisk),
	                              r10_bio->sector);
	if (max_sectors < r10_bio->sectors) {
			/* Could not read all from this device, so we will
			 * need another r10_bio.
		/*
		 * Could not read all from this device, so we will need another
		 * r10_bio.
		 */
		sectors_handled = (r10_bio->sector + max_sectors
				   - bio->bi_iter.bi_sector);
@@ -1216,10 +1167,11 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
		else
			bio->bi_phys_segments++;
		spin_unlock_irq(&conf->device_lock);
			/* Cannot call generic_make_request directly
			 * as that will be queued in __generic_make_request
			 * and subsequent mempool_alloc might block
			 * waiting for it.  so hand bio over to raid10d.
		/*
		 * Cannot call generic_make_request directly as that will be
		 * queued in __generic_make_request and subsequent
		 * mempool_alloc might block waiting for it.  so hand bio over
		 * to raid10d.
		 */
		reschedule_retry(r10_bio);

@@ -1229,17 +1181,73 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
		r10_bio->sectors = bio_sectors(bio) - sectors_handled;
		r10_bio->state = 0;
		r10_bio->mddev = mddev;
			r10_bio->sector = bio->bi_iter.bi_sector +
				sectors_handled;
		r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
		goto read_again;
	} else
		generic_make_request(read_bio);
	return;
}

static void raid10_write_request(struct mddev *mddev, struct bio *bio,
				 struct r10bio *r10_bio)
{
	struct r10conf *conf = mddev->private;
	int i;
	const int op = bio_op(bio);
	const unsigned long do_sync = (bio->bi_opf & REQ_SYNC);
	const unsigned long do_fua = (bio->bi_opf & REQ_FUA);
	unsigned long flags;
	struct md_rdev *blocked_rdev;
	struct blk_plug_cb *cb;
	struct raid10_plug_cb *plug = NULL;
	sector_t sectors;
	int sectors_handled;
	int max_sectors;

	md_write_start(mddev, bio);

	/*
	 * WRITE:
	 * Register the new request and wait if the reconstruction
	 * thread has put up a bar for new requests.
	 * Continue immediately if no resync is active currently.
	 */
	wait_barrier(conf);

	sectors = bio_sectors(bio);
	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
	    bio->bi_iter.bi_sector < conf->reshape_progress &&
	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
		/*
		 * IO spans the reshape position.  Need to wait for reshape to
		 * pass
		 */
		raid10_log(conf->mddev, "wait reshape");
		allow_barrier(conf);
		wait_event(conf->wait_barrier,
			   conf->reshape_progress <= bio->bi_iter.bi_sector ||
			   conf->reshape_progress >= bio->bi_iter.bi_sector +
			   sectors);
		wait_barrier(conf);
	}

	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
	    (mddev->reshape_backwards
	     ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
		bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
	     : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
		bio->bi_iter.bi_sector < conf->reshape_progress))) {
		/* Need to update reshape_position in metadata */
		mddev->reshape_position = conf->reshape_progress;
		set_mask_bits(&mddev->sb_flags, 0,
			      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
		md_wakeup_thread(mddev->thread);
		raid10_log(conf->mddev, "wait reshape metadata");
		wait_event(mddev->sb_wait,
			   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));

		conf->reshape_safe = mddev->reshape_position;
	}

	if (conf->pending_count >= max_queued_requests) {
		md_wakeup_thread(mddev->thread);
		raid10_log(mddev, "wait queued");
@@ -1300,8 +1308,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
			int bad_sectors;
			int is_bad;

			is_bad = is_badblock(rdev, dev_sector,
					     max_sectors,
			is_bad = is_badblock(rdev, dev_sector, max_sectors,
					     &first_bad, &bad_sectors);
			if (is_bad < 0) {
				/* Mustn't write here until the bad block
@@ -1405,8 +1412,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
			r10_bio->devs[i].bio = mbio;

			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr+
					   choose_data_offset(r10_bio,
							      rdev));
					   choose_data_offset(r10_bio, rdev));
			mbio->bi_bdev = rdev->bdev;
			mbio->bi_end_io	= raid10_end_write_request;
			bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1457,8 +1463,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
			r10_bio->devs[i].repl_bio = mbio;

			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr +
					   choose_data_offset(
						   r10_bio, rdev));
					   choose_data_offset(r10_bio, rdev));
			mbio->bi_bdev = rdev->bdev;
			mbio->bi_end_io	= raid10_end_write_request;
			bio_set_op_attrs(mbio, op, do_sync | do_fua);
@@ -1503,6 +1508,36 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
	one_write_done(r10_bio);
}

static void __make_request(struct mddev *mddev, struct bio *bio)
{
	struct r10conf *conf = mddev->private;
	struct r10bio *r10_bio;

	r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);

	r10_bio->master_bio = bio;
	r10_bio->sectors = bio_sectors(bio);

	r10_bio->mddev = mddev;
	r10_bio->sector = bio->bi_iter.bi_sector;
	r10_bio->state = 0;

	/*
	 * We might need to issue multiple reads to different devices if there
	 * are bad blocks around, so we keep track of the number of reads in
	 * bio->bi_phys_segments.  If this is 0, there is only one r10_bio and
	 * no locking will be needed when the request completes.  If it is
	 * non-zero, then it is the number of not-completed requests.
	 */
	bio->bi_phys_segments = 0;
	bio_clear_flag(bio, BIO_SEG_VALID);

	if (bio_data_dir(bio) == READ)
		raid10_read_request(mddev, bio, r10_bio);
	else
		raid10_write_request(mddev, bio, r10_bio);
}

static void raid10_make_request(struct mddev *mddev, struct bio *bio)
{
	struct r10conf *conf = mddev->private;
+16 −20
Original line number Diff line number Diff line
@@ -1682,8 +1682,7 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf,

static struct stripe_head *
r5c_recovery_alloc_stripe(struct r5conf *conf,
			  sector_t stripe_sect,
			  sector_t log_start)
			  sector_t stripe_sect)
{
	struct stripe_head *sh;

@@ -1692,7 +1691,6 @@ r5c_recovery_alloc_stripe(struct r5conf *conf,
		return NULL;  /* no more stripe available */

	r5l_recovery_reset_stripe(sh);
	sh->log_start = log_start;

	return sh;
}
@@ -1862,7 +1860,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
						stripe_sect);

		if (!sh) {
			sh = r5c_recovery_alloc_stripe(conf, stripe_sect, ctx->pos);
			sh = r5c_recovery_alloc_stripe(conf, stripe_sect);
			/*
			 * cannot get stripe from raid5_get_active_stripe
			 * try replay some stripes
@@ -1871,7 +1869,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
				r5c_recovery_replay_stripes(
					cached_stripe_list, ctx);
				sh = r5c_recovery_alloc_stripe(
					conf, stripe_sect, ctx->pos);
					conf, stripe_sect);
			}
			if (!sh) {
				pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
@@ -1879,8 +1877,8 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
					conf->min_nr_stripes * 2);
				raid5_set_cache_size(mddev,
						     conf->min_nr_stripes * 2);
				sh = r5c_recovery_alloc_stripe(
					conf, stripe_sect, ctx->pos);
				sh = r5c_recovery_alloc_stripe(conf,
							       stripe_sect);
			}
			if (!sh) {
				pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
@@ -1894,7 +1892,6 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
			if (!test_bit(STRIPE_R5C_CACHING, &sh->state) &&
			    test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags)) {
				r5l_recovery_replay_one_stripe(conf, sh, ctx);
				sh->log_start = ctx->pos;
				list_move_tail(&sh->lru, cached_stripe_list);
			}
			r5l_recovery_load_data(log, sh, ctx, payload,
@@ -1933,8 +1930,6 @@ static void r5c_recovery_load_one_stripe(struct r5l_log *log,
			set_bit(R5_UPTODATE, &dev->flags);
		}
	}
	list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
	atomic_inc(&log->stripe_in_journal_count);
}

/*
@@ -2070,6 +2065,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
	struct stripe_head *sh, *next;
	struct mddev *mddev = log->rdev->mddev;
	struct page *page;
	sector_t next_checkpoint = MaxSector;

	page = alloc_page(GFP_KERNEL);
	if (!page) {
@@ -2078,6 +2074,8 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
		return -ENOMEM;
	}

	WARN_ON(list_empty(&ctx->cached_list));

	list_for_each_entry_safe(sh, next, &ctx->cached_list, lru) {
		struct r5l_meta_block *mb;
		int i;
@@ -2123,12 +2121,15 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
		sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
			     REQ_OP_WRITE, REQ_FUA, false);
		sh->log_start = ctx->pos;
		list_add_tail(&sh->r5c, &log->stripe_in_journal_list);
		atomic_inc(&log->stripe_in_journal_count);
		ctx->pos = write_pos;
		ctx->seq += 1;

		next_checkpoint = sh->log_start;
		list_del_init(&sh->lru);
		raid5_release_stripe(sh);
	}
	log->next_checkpoint = next_checkpoint;
	__free_page(page);
	return 0;
}
@@ -2139,7 +2140,6 @@ static int r5l_recovery_log(struct r5l_log *log)
	struct r5l_recovery_ctx ctx;
	int ret;
	sector_t pos;
	struct stripe_head *sh;

	ctx.pos = log->last_checkpoint;
	ctx.seq = log->last_cp_seq;
@@ -2164,16 +2164,13 @@ static int r5l_recovery_log(struct r5l_log *log)
		log->next_checkpoint = ctx.pos;
		r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq++);
		ctx.pos = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
	} else {
		sh = list_last_entry(&ctx.cached_list, struct stripe_head, lru);
		log->next_checkpoint = sh->log_start;
	}

	if ((ctx.data_only_stripes == 0) && (ctx.data_parity_stripes == 0))
		pr_debug("md/raid:%s: starting from clean shutdown\n",
			 mdname(mddev));
	else {
		pr_debug("md/raid:%s: recoverying %d data-only stripes and %d data-parity stripes\n",
		pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
			 mdname(mddev), ctx.data_only_stripes,
			 ctx.data_parity_stripes);

@@ -2418,9 +2415,6 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
	if (do_wakeup)
		wake_up(&conf->wait_for_overlap);

	if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
		return;

	spin_lock_irq(&conf->log->stripe_in_journal_lock);
	list_del_init(&sh->r5c);
	spin_unlock_irq(&conf->log->stripe_in_journal_lock);
@@ -2639,14 +2633,16 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
	spin_lock_init(&log->stripe_in_journal_lock);
	atomic_set(&log->stripe_in_journal_count, 0);

	rcu_assign_pointer(conf->log, log);

	if (r5l_load_log(log))
		goto error;

	rcu_assign_pointer(conf->log, log);
	set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
	return 0;

error:
	rcu_assign_pointer(conf->log, NULL);
	md_unregister_thread(&log->reclaim_thread);
reclaim_thread:
	mempool_destroy(log->meta_pool);
Loading