Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 671488cc authored by NeilBrown's avatar NeilBrown
Browse files

md/raid5: allow each slot to have an extra replacement device



Just enhance data structures to record a second device per slot to be
used as a 'replacement' device, replacing the original.
We also have a second bio in each slot in each stripe_head.  This will
only be used when writing to the array - we need to write to both the
original and the replacement at the same time, so will need two bios.

For now, only try using the replacement drive for aligned-reads.
In this case, we prefer the replacement if it has been recovered far
enough, otherwise use the original.

This includes a small enhancement.  Previously we would only do
aligned reads if the target device was fully recovered.  Now we also
do them if it has recovered far enough.

Reviewed-by: default avatarDan Williams <dan.j.williams@intel.com>
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 2d78f8c4
Loading
Loading
Loading
Loading
+13 −2
Original line number Original line Diff line number Diff line
@@ -3594,6 +3594,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
	int dd_idx;
	int dd_idx;
	struct bio* align_bi;
	struct bio* align_bi;
	struct md_rdev *rdev;
	struct md_rdev *rdev;
	sector_t end_sector;


	if (!in_chunk_boundary(mddev, raid_bio)) {
	if (!in_chunk_boundary(mddev, raid_bio)) {
		pr_debug("chunk_aligned_read : non aligned\n");
		pr_debug("chunk_aligned_read : non aligned\n");
@@ -3618,9 +3619,19 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
						    0,
						    0,
						    &dd_idx, NULL);
						    &dd_idx, NULL);


	end_sector = align_bi->bi_sector + (align_bi->bi_size >> 9);
	rcu_read_lock();
	rcu_read_lock();
	rdev = rcu_dereference(conf->disks[dd_idx].replacement);
	if (!rdev || test_bit(Faulty, &rdev->flags) ||
	    rdev->recovery_offset < end_sector) {
		rdev = rcu_dereference(conf->disks[dd_idx].rdev);
		rdev = rcu_dereference(conf->disks[dd_idx].rdev);
	if (rdev && test_bit(In_sync, &rdev->flags)) {
		if (rdev &&
		    (test_bit(Faulty, &rdev->flags) ||
		    !(test_bit(In_sync, &rdev->flags) ||
		      rdev->recovery_offset >= end_sector)))
			rdev = NULL;
	}
	if (rdev) {
		sector_t first_bad;
		sector_t first_bad;
		int bad_sectors;
		int bad_sectors;


+33 −24
Original line number Original line Diff line number Diff line
@@ -226,8 +226,11 @@ struct stripe_head {
		#endif
		#endif
	} ops;
	} ops;
	struct r5dev {
	struct r5dev {
		struct bio	req;
		/* rreq and rvec are used for the replacement device when
		struct bio_vec	vec;
		 * writing data to both devices.
		 */
		struct bio	req, rreq;
		struct bio_vec	vec, rvec;
		struct page	*page;
		struct page	*page;
		struct bio	*toread, *read, *towrite, *written;
		struct bio	*toread, *read, *towrite, *written;
		sector_t	sector;			/* sector of this page */
		sector_t	sector;			/* sector of this page */
@@ -252,29 +255,35 @@ struct stripe_head_state {
	int handle_bad_blocks;
	int handle_bad_blocks;
};
};


/* Flags */
/* Flags for struct r5dev.flags */
#define	R5_UPTODATE	0	/* page contains current data */
enum r5dev_flags {
#define	R5_LOCKED	1	/* IO has been submitted on "req" */
	R5_UPTODATE,	/* page contains current data */
#define	R5_OVERWRITE	2	/* towrite covers whole page */
	R5_LOCKED,	/* IO has been submitted on "req" */
	R5_OVERWRITE,	/* towrite covers whole page */
/* and some that are internal to handle_stripe */
/* and some that are internal to handle_stripe */
#define	R5_Insync	3	/* rdev && rdev->in_sync at start */
	R5_Insync,	/* rdev && rdev->in_sync at start */
#define	R5_Wantread	4	/* want to schedule a read */
	R5_Wantread,	/* want to schedule a read */
#define	R5_Wantwrite	5
	R5_Wantwrite,
#define	R5_Overlap	7	/* There is a pending overlapping request on this block */
	R5_Overlap,	/* There is a pending overlapping request
#define	R5_ReadError	8	/* seen a read error here recently */
			 * on this block */
#define	R5_ReWrite	9	/* have tried to over-write the readerror */
	R5_ReadError,	/* seen a read error here recently */
	R5_ReWrite,	/* have tried to over-write the readerror */


#define	R5_Expanded	10	/* This block now has post-expand data */
	R5_Expanded,	/* This block now has post-expand data */
#define	R5_Wantcompute	11	/* compute_block in progress treat as
	R5_Wantcompute,	/* compute_block in progress treat as
			 * uptodate
			 * uptodate
			 */
			 */
#define	R5_Wantfill	12	/* dev->toread contains a bio that needs
	R5_Wantfill,	/* dev->toread contains a bio that needs
			 * filling
			 * filling
			 */
			 */
#define	R5_Wantdrain	13	/* dev->towrite needs to be drained */
	R5_Wantdrain,	/* dev->towrite needs to be drained */
#define	R5_WantFUA	14	/* Write should be FUA */
	R5_WantFUA,	/* Write should be FUA */
#define	R5_WriteError	15	/* got a write error - need to record it */
	R5_WriteError,	/* got a write error - need to record it */
#define	R5_MadeGood	16	/* A bad block has been fixed by writing to it*/
	R5_MadeGood,	/* A bad block has been fixed by writing to it */
	R5_ReadRepl,	/* Will/did read from replacement rather than orig */
	R5_MadeGoodRepl,/* A bad block on the replacement device has been
			 * fixed by writing to it */
};
/*
/*
 * Write method
 * Write method
 */
 */
@@ -344,7 +353,7 @@ enum {




struct disk_info {
struct disk_info {
	struct md_rdev	*rdev;
	struct md_rdev	*rdev, *replacement;
};
};


struct r5conf {
struct r5conf {