Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1d838d70 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'md-3.7-fixes' of git://neil.brown.name/md

Pull md fixes from NeilBrown:
 "Several bug fixes for md in 3.7:

   - raid5 discard has problems
   - raid10 replacement devices have problems
   - bad block lock seqlock usage has problems
   - dm-raid doesn't free everything"

* tag 'md-3.7-fixes' of git://neil.brown.name/md:
  md/raid10: decrement correct pending counter when writing to replacement.
  md/raid10: close race that lose writes lost when replacement completes.
  md/raid5: Make sure we clear R5_Discard when discard is finished.
  md/raid5: move resolving of reconstruct_state earlier in stripe_handle.
  md/raid5: round discard alignment up to power of 2.
  md: make sure everything is freed when dm-raid stops an array.
  md: Avoid write invalid address if read_seqretry returned true.
  md: Reassigned the parameters if read_seqretry returned true in func md_is_badblock.
parents a8946afe 884162df
Loading
Loading
Loading
Loading
+20 −7
Original line number Diff line number Diff line
@@ -1817,10 +1817,10 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
			memset(bbp, 0xff, PAGE_SIZE);

			for (i = 0 ; i < bb->count ; i++) {
				u64 internal_bb = *p++;
				u64 internal_bb = p[i];
				u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
						| BB_LEN(internal_bb));
				*bbp++ = cpu_to_le64(store_bb);
				bbp[i] = cpu_to_le64(store_bb);
			}
			bb->changed = 0;
			if (read_seqretry(&bb->lock, seq))
@@ -5294,7 +5294,7 @@ void md_stop_writes(struct mddev *mddev)
}
EXPORT_SYMBOL_GPL(md_stop_writes);

void md_stop(struct mddev *mddev)
static void __md_stop(struct mddev *mddev)
{
	mddev->ready = 0;
	mddev->pers->stop(mddev);
@@ -5304,6 +5304,18 @@ void md_stop(struct mddev *mddev)
	mddev->pers = NULL;
	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}

void md_stop(struct mddev *mddev)
{
	/* stop the array and free an attached data structures.
	 * This is called from dm-raid
	 */
	__md_stop(mddev);
	bitmap_destroy(mddev);
	if (mddev->bio_set)
		bioset_free(mddev->bio_set);
}

EXPORT_SYMBOL_GPL(md_stop);

static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
@@ -5364,7 +5376,7 @@ static int do_md_stop(struct mddev * mddev, int mode,
			set_disk_ro(disk, 0);

		__md_stop_writes(mddev);
		md_stop(mddev);
		__md_stop(mddev);
		mddev->queue->merge_bvec_fn = NULL;
		mddev->queue->backing_dev_info.congested_fn = NULL;

@@ -7936,9 +7948,9 @@ int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
		   sector_t *first_bad, int *bad_sectors)
{
	int hi;
	int lo = 0;
	int lo;
	u64 *p = bb->page;
	int rv = 0;
	int rv;
	sector_t target = s + sectors;
	unsigned seq;

@@ -7953,7 +7965,8 @@ int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,

retry:
	seq = read_seqbegin(&bb->lock);

	lo = 0;
	rv = 0;
	hi = bb->count;

	/* Binary search between lo and hi for 'target'
+69 −62
Original line number Diff line number Diff line
@@ -499,7 +499,7 @@ static void raid10_end_write_request(struct bio *bio, int error)
	 */
	one_write_done(r10_bio);
	if (dec_rdev)
		rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
		rdev_dec_pending(rdev, conf->mddev);
}

/*
@@ -1334,18 +1334,21 @@ static void make_request(struct mddev *mddev, struct bio * bio)
			blocked_rdev = rrdev;
			break;
		}
		if (rdev && (test_bit(Faulty, &rdev->flags)
			     || test_bit(Unmerged, &rdev->flags)))
			rdev = NULL;
		if (rrdev && (test_bit(Faulty, &rrdev->flags)
			      || test_bit(Unmerged, &rrdev->flags)))
			rrdev = NULL;

		r10_bio->devs[i].bio = NULL;
		r10_bio->devs[i].repl_bio = NULL;
		if (!rdev || test_bit(Faulty, &rdev->flags) ||
		    test_bit(Unmerged, &rdev->flags)) {

		if (!rdev && !rrdev) {
			set_bit(R10BIO_Degraded, &r10_bio->state);
			continue;
		}
		if (test_bit(WriteErrorSeen, &rdev->flags)) {
		if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
			sector_t first_bad;
			sector_t dev_sector = r10_bio->devs[i].addr;
			int bad_sectors;
@@ -1387,8 +1390,10 @@ static void make_request(struct mddev *mddev, struct bio * bio)
					max_sectors = good_sectors;
			}
		}
		if (rdev) {
			r10_bio->devs[i].bio = bio;
			atomic_inc(&rdev->nr_pending);
		}
		if (rrdev) {
			r10_bio->devs[i].repl_bio = bio;
			atomic_inc(&rrdev->nr_pending);
@@ -1444,9 +1449,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
	for (i = 0; i < conf->copies; i++) {
		struct bio *mbio;
		int d = r10_bio->devs[i].devnum;
		if (!r10_bio->devs[i].bio)
			continue;

		if (r10_bio->devs[i].bio) {
			struct md_rdev *rdev = conf->mirrors[d].rdev;
			mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
			md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
				    max_sectors);
@@ -1454,17 +1458,19 @@ static void make_request(struct mddev *mddev, struct bio * bio)

			mbio->bi_sector	= (r10_bio->devs[i].addr+
					   choose_data_offset(r10_bio,
						      conf->mirrors[d].rdev));
		mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
							      rdev));
			mbio->bi_bdev = rdev->bdev;
			mbio->bi_end_io	= raid10_end_write_request;
			mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
			mbio->bi_private = r10_bio;

			atomic_inc(&r10_bio->remaining);

		cb = blk_check_plugged(raid10_unplug, mddev, sizeof(*plug));
			cb = blk_check_plugged(raid10_unplug, mddev,
					       sizeof(*plug));
			if (cb)
			plug = container_of(cb, struct raid10_plug_cb, cb);
				plug = container_of(cb, struct raid10_plug_cb,
						    cb);
			else
				plug = NULL;
			spin_lock_irqsave(&conf->device_lock, flags);
@@ -1478,24 +1484,24 @@ static void make_request(struct mddev *mddev, struct bio * bio)
			spin_unlock_irqrestore(&conf->device_lock, flags);
			if (!plug)
				md_wakeup_thread(mddev->thread);
		}

		if (!r10_bio->devs[i].repl_bio)
			continue;

		if (r10_bio->devs[i].repl_bio) {
			struct md_rdev *rdev = conf->mirrors[d].replacement;
			if (rdev == NULL) {
				/* Replacement just got moved to main 'rdev' */
				smp_mb();
				rdev = conf->mirrors[d].rdev;
			}
			mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
			md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
				    max_sectors);
			r10_bio->devs[i].repl_bio = mbio;

		/* We are actively writing to the original device
		 * so it cannot disappear, so the replacement cannot
		 * become NULL here
		 */
			mbio->bi_sector	= (r10_bio->devs[i].addr +
					   choose_data_offset(
					   r10_bio,
					   conf->mirrors[d].replacement));
		mbio->bi_bdev = conf->mirrors[d].replacement->bdev;
						   r10_bio, rdev));
			mbio->bi_bdev = rdev->bdev;
			mbio->bi_end_io	= raid10_end_write_request;
			mbio->bi_rw = WRITE | do_sync | do_fua | do_discard;
			mbio->bi_private = r10_bio;
@@ -1508,6 +1514,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
			if (!mddev_check_plugged(mddev))
				md_wakeup_thread(mddev->thread);
		}
	}

	/* Don't remove the bias on 'remaining' (one_write_done) until
	 * after checking if we need to go around again.
+43 −36
Original line number Diff line number Diff line
@@ -2774,10 +2774,12 @@ static void handle_stripe_clean_event(struct r5conf *conf,
			dev = &sh->dev[i];
			if (!test_bit(R5_LOCKED, &dev->flags) &&
			    (test_bit(R5_UPTODATE, &dev->flags) ||
			     test_and_clear_bit(R5_Discard, &dev->flags))) {
			     test_bit(R5_Discard, &dev->flags))) {
				/* We can return any write requests */
				struct bio *wbi, *wbi2;
				pr_debug("Return write for disc %d\n", i);
				if (test_and_clear_bit(R5_Discard, &dev->flags))
					clear_bit(R5_UPTODATE, &dev->flags);
				wbi = dev->written;
				dev->written = NULL;
				while (wbi && wbi->bi_sector <
@@ -2795,7 +2797,8 @@ static void handle_stripe_clean_event(struct r5conf *conf,
					 !test_bit(STRIPE_DEGRADED, &sh->state),
						0);
			}
		}
		} else if (test_bit(R5_Discard, &sh->dev[i].flags))
			clear_bit(R5_Discard, &sh->dev[i].flags);

	if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
		if (atomic_dec_and_test(&conf->pending_full_writes))
@@ -3490,40 +3493,6 @@ static void handle_stripe(struct stripe_head *sh)
			handle_failed_sync(conf, sh, &s);
	}

	/*
	 * might be able to return some write requests if the parity blocks
	 * are safe, or on a failed drive
	 */
	pdev = &sh->dev[sh->pd_idx];
	s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
		|| (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
	qdev = &sh->dev[sh->qd_idx];
	s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
		|| (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
		|| conf->level < 6;

	if (s.written &&
	    (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
			     && !test_bit(R5_LOCKED, &pdev->flags)
			     && (test_bit(R5_UPTODATE, &pdev->flags) ||
				 test_bit(R5_Discard, &pdev->flags))))) &&
	    (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
			     && !test_bit(R5_LOCKED, &qdev->flags)
			     && (test_bit(R5_UPTODATE, &qdev->flags) ||
				 test_bit(R5_Discard, &qdev->flags))))))
		handle_stripe_clean_event(conf, sh, disks, &s.return_bi);

	/* Now we might consider reading some blocks, either to check/generate
	 * parity, or to satisfy requests
	 * or to load a block that is being partially written.
	 */
	if (s.to_read || s.non_overwrite
	    || (conf->level == 6 && s.to_write && s.failed)
	    || (s.syncing && (s.uptodate + s.compute < disks))
	    || s.replacing
	    || s.expanding)
		handle_stripe_fill(sh, &s, disks);

	/* Now we check to see if any write operations have recently
	 * completed
	 */
@@ -3561,6 +3530,40 @@ static void handle_stripe(struct stripe_head *sh)
			s.dec_preread_active = 1;
	}

	/*
	 * might be able to return some write requests if the parity blocks
	 * are safe, or on a failed drive
	 */
	pdev = &sh->dev[sh->pd_idx];
	s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
		|| (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
	qdev = &sh->dev[sh->qd_idx];
	s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
		|| (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
		|| conf->level < 6;

	if (s.written &&
	    (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
			     && !test_bit(R5_LOCKED, &pdev->flags)
			     && (test_bit(R5_UPTODATE, &pdev->flags) ||
				 test_bit(R5_Discard, &pdev->flags))))) &&
	    (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
			     && !test_bit(R5_LOCKED, &qdev->flags)
			     && (test_bit(R5_UPTODATE, &qdev->flags) ||
				 test_bit(R5_Discard, &qdev->flags))))))
		handle_stripe_clean_event(conf, sh, disks, &s.return_bi);

	/* Now we might consider reading some blocks, either to check/generate
	 * parity, or to satisfy requests
	 * or to load a block that is being partially written.
	 */
	if (s.to_read || s.non_overwrite
	    || (conf->level == 6 && s.to_write && s.failed)
	    || (s.syncing && (s.uptodate + s.compute < disks))
	    || s.replacing
	    || s.expanding)
		handle_stripe_fill(sh, &s, disks);

	/* Now to consider new write requests and what else, if anything
	 * should be read.  We do not handle new writes when:
	 * 1/ A 'write' operation (copy+xor) is already in flight.
@@ -5529,6 +5532,10 @@ static int run(struct mddev *mddev)
		 * discard data disk but write parity disk
		 */
		stripe = stripe * PAGE_SIZE;
		/* Round up to power of 2, as discard handling
		 * currently assumes that */
		while ((stripe-1) & stripe)
			stripe = (stripe | (stripe-1)) + 1;
		mddev->queue->limits.discard_alignment = stripe;
		mddev->queue->limits.discard_granularity = stripe;
		/*