Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 2fb748d2 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'md-3.5-fixes' of git://neil.brown.name/md

Pull md fixes from NeilBrown:
 "md: collection of bug fixes for 3.5

  You go away for 2 weeks vacation and what do you get when you come
  back? Piles of bugs :-)

  Some found by inspection, some by testing, some during use in the
  field, and some while developing for the next window..."

* tag 'md-3.5-fixes' of git://neil.brown.name/md:
  md: fix up plugging (again).
  md: support re-add of recovering devices.
  md/raid1: fix bug in read_balance introduced by hot-replace
  raid5: delayed stripe fix
  md/raid456: When read error cannot be recovered, record bad block
  md: make 'name' arg to md_register_thread non-optional.
  md/raid10: fix failure when trying to repair a read error.
  md/raid5: fix refcount problem when blocked_rdev is set.
  md:Add blk_plug in sync_thread.
  md/raid5: In ops_run_io, inc nr_pending before calling md_wait_for_blocked_rdev
  md/raid5: Do not add data_offset before call to is_badblock
  md/raid5: prefer replacing failed devices over want-replacement devices.
  md/raid10: Don't try to recovery unmatched (and unused) chunks.
parents 3bfd2454 b357f04a
Loading
Loading
Loading
Loading
+5 −3
Original line number Diff line number Diff line
@@ -5784,8 +5784,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
			super_types[mddev->major_version].
				validate_super(mddev, rdev);
		if ((info->state & (1<<MD_DISK_SYNC)) &&
		    (!test_bit(In_sync, &rdev->flags) ||
		     rdev->raid_disk != info->raid_disk)) {
		     rdev->raid_disk != info->raid_disk) {
			/* This was a hot-add request, but events doesn't
			 * match, so reject it.
			 */
@@ -6751,7 +6750,7 @@ struct md_thread *md_register_thread(void (*run) (struct mddev *), struct mddev
	thread->tsk = kthread_run(md_thread, thread,
				  "%s_%s",
				  mdname(thread->mddev),
				  name ?: mddev->pers->name);
				  name);
	if (IS_ERR(thread->tsk)) {
		kfree(thread);
		return NULL;
@@ -7298,6 +7297,7 @@ void md_do_sync(struct mddev *mddev)
	int skipped = 0;
	struct md_rdev *rdev;
	char *desc;
	struct blk_plug plug;

	/* just incase thread restarts... */
	if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
@@ -7447,6 +7447,7 @@ void md_do_sync(struct mddev *mddev)
	}
	mddev->curr_resync_completed = j;

	blk_start_plug(&plug);
	while (j < max_sectors) {
		sector_t sectors;

@@ -7552,6 +7553,7 @@ void md_do_sync(struct mddev *mddev)
	 * this also signals 'finished resyncing' to md_stop
	 */
 out:
	blk_finish_plug(&plug);
	wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));

	/* tell personality that we are finished */
+2 −1
Original line number Diff line number Diff line
@@ -474,7 +474,8 @@ static int multipath_run (struct mddev *mddev)
	}

	{
		mddev->thread = md_register_thread(multipathd, mddev, NULL);
		mddev->thread = md_register_thread(multipathd, mddev,
						   "multipath");
		if (!mddev->thread) {
			printk(KERN_ERR "multipath: couldn't allocate thread"
				" for %s\n", mdname(mddev));
+5 −8
Original line number Diff line number Diff line
@@ -517,8 +517,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
		int bad_sectors;

		int disk = start_disk + i;
		if (disk >= conf->raid_disks)
			disk -= conf->raid_disks;
		if (disk >= conf->raid_disks * 2)
			disk -= conf->raid_disks * 2;

		rdev = rcu_dereference(conf->mirrors[disk].rdev);
		if (r1_bio->bios[disk] == IO_BLOCKED
@@ -883,7 +883,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
	const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
	struct md_rdev *blocked_rdev;
	int plugged;
	int first_clone;
	int sectors_handled;
	int max_sectors;
@@ -1034,7 +1033,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
	 * the bad blocks.  Each set of writes gets it's own r1bio
	 * with a set of bios attached.
	 */
	plugged = mddev_check_plugged(mddev);

	disks = conf->raid_disks * 2;
 retry_write:
@@ -1191,6 +1189,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
		bio_list_add(&conf->pending_bio_list, mbio);
		conf->pending_count++;
		spin_unlock_irqrestore(&conf->device_lock, flags);
		if (!mddev_check_plugged(mddev))
			md_wakeup_thread(mddev->thread);
	}
	/* Mustn't call r1_bio_write_done before this next test,
	 * as it could result in the bio being freed.
@@ -1213,9 +1213,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)

	/* In case raid1d snuck in to freeze_array */
	wake_up(&conf->wait_barrier);

	if (do_sync || !bitmap || !plugged)
		md_wakeup_thread(mddev->thread);
}

static void status(struct seq_file *seq, struct mddev *mddev)
@@ -2621,7 +2618,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
		goto abort;
	}
	err = -ENOMEM;
	conf->thread = md_register_thread(raid1d, mddev, NULL);
	conf->thread = md_register_thread(raid1d, mddev, "raid1");
	if (!conf->thread) {
		printk(KERN_ERR
		       "md/raid1:%s: couldn't allocate thread\n",
+16 −10
Original line number Diff line number Diff line
@@ -1039,7 +1039,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
	const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
	unsigned long flags;
	struct md_rdev *blocked_rdev;
	int plugged;
	int sectors_handled;
	int max_sectors;
	int sectors;
@@ -1239,7 +1238,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
	 * of r10_bios is recored in bio->bi_phys_segments just as with
	 * the read case.
	 */
	plugged = mddev_check_plugged(mddev);

	r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
	raid10_find_phys(conf, r10_bio);
@@ -1396,6 +1394,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
		bio_list_add(&conf->pending_bio_list, mbio);
		conf->pending_count++;
		spin_unlock_irqrestore(&conf->device_lock, flags);
		if (!mddev_check_plugged(mddev, 0, 0))
			md_wakeup_thread(mddev->thread);

		if (!r10_bio->devs[i].repl_bio)
			continue;
@@ -1423,6 +1423,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
		bio_list_add(&conf->pending_bio_list, mbio);
		conf->pending_count++;
		spin_unlock_irqrestore(&conf->device_lock, flags);
		if (!mddev_check_plugged(mddev))
			md_wakeup_thread(mddev->thread);
	}

	/* Don't remove the bias on 'remaining' (one_write_done) until
@@ -1448,9 +1450,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)

	/* In case raid10d snuck in to freeze_array */
	wake_up(&conf->wait_barrier);

	if (do_sync || !mddev->bitmap || !plugged)
		md_wakeup_thread(mddev->thread);
}

static void status(struct seq_file *seq, struct mddev *mddev)
@@ -2310,7 +2309,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
			if (r10_sync_page_io(rdev,
					     r10_bio->devs[sl].addr +
					     sect,
					     s<<9, conf->tmppage, WRITE)
					     s, conf->tmppage, WRITE)
			    == 0) {
				/* Well, this device is dead */
				printk(KERN_NOTICE
@@ -2349,7 +2348,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
			switch (r10_sync_page_io(rdev,
					     r10_bio->devs[sl].addr +
					     sect,
					     s<<9, conf->tmppage,
					     s, conf->tmppage,
						 READ)) {
			case 0:
				/* Well, this device is dead */
@@ -2661,6 +2660,7 @@ static void raid10d(struct mddev *mddev)
	blk_start_plug(&plug);
	for (;;) {

		if (atomic_read(&mddev->plug_cnt) == 0)
			flush_pending_writes(conf);

		spin_lock_irqsave(&conf->device_lock, flags);
@@ -2890,6 +2890,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
			/* want to reconstruct this device */
			rb2 = r10_bio;
			sect = raid10_find_virt(conf, sector_nr, i);
			if (sect >= mddev->resync_max_sectors) {
				/* last stripe is not complete - don't
				 * try to recover this sector.
				 */
				continue;
			}
			/* Unless we are doing a full sync, or a replacement
			 * we only need to recover the block if it is set in
			 * the bitmap
@@ -3421,7 +3427,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
	spin_lock_init(&conf->resync_lock);
	init_waitqueue_head(&conf->wait_barrier);

	conf->thread = md_register_thread(raid10d, mddev, NULL);
	conf->thread = md_register_thread(raid10d, mddev, "raid10");
	if (!conf->thread)
		goto out;

+47 −20
Original line number Diff line number Diff line
@@ -196,12 +196,14 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
		BUG_ON(!list_empty(&sh->lru));
		BUG_ON(atomic_read(&conf->active_stripes)==0);
		if (test_bit(STRIPE_HANDLE, &sh->state)) {
			if (test_bit(STRIPE_DELAYED, &sh->state))
			if (test_bit(STRIPE_DELAYED, &sh->state) &&
			    !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
				list_add_tail(&sh->lru, &conf->delayed_list);
			else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
				   sh->bm_seq - conf->seq_write > 0)
				list_add_tail(&sh->lru, &conf->bitmap_list);
			else {
				clear_bit(STRIPE_DELAYED, &sh->state);
				clear_bit(STRIPE_BIT_DELAY, &sh->state);
				list_add_tail(&sh->lru, &conf->handle_list);
			}
@@ -606,6 +608,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
					 * a chance*/
					md_check_recovery(conf->mddev);
				}
				/*
				 * Because md_wait_for_blocked_rdev
				 * will dec nr_pending, we must
				 * increment it first.
				 */
				atomic_inc(&rdev->nr_pending);
				md_wait_for_blocked_rdev(rdev, conf->mddev);
			} else {
				/* Acknowledged bad block - skip the write */
@@ -1737,6 +1745,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
	} else {
		const char *bdn = bdevname(rdev->bdev, b);
		int retry = 0;
		int set_bad = 0;

		clear_bit(R5_UPTODATE, &sh->dev[i].flags);
		atomic_inc(&rdev->read_errors);
@@ -1748,7 +1757,8 @@ static void raid5_end_read_request(struct bio * bi, int error)
				mdname(conf->mddev),
				(unsigned long long)s,
				bdn);
		else if (conf->mddev->degraded >= conf->max_degraded)
		else if (conf->mddev->degraded >= conf->max_degraded) {
			set_bad = 1;
			printk_ratelimited(
				KERN_WARNING
				"md/raid:%s: read error not correctable "
@@ -1756,8 +1766,9 @@ static void raid5_end_read_request(struct bio * bi, int error)
				mdname(conf->mddev),
				(unsigned long long)s,
				bdn);
		else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
		} else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) {
			/* Oh, no!!! */
			set_bad = 1;
			printk_ratelimited(
				KERN_WARNING
				"md/raid:%s: read error NOT corrected!! "
@@ -1765,7 +1776,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
				mdname(conf->mddev),
				(unsigned long long)s,
				bdn);
		else if (atomic_read(&rdev->read_errors)
		} else if (atomic_read(&rdev->read_errors)
			 > conf->max_nr_stripes)
			printk(KERN_WARNING
			       "md/raid:%s: Too many read errors, failing device %s.\n",
@@ -1777,6 +1788,10 @@ static void raid5_end_read_request(struct bio * bi, int error)
		else {
			clear_bit(R5_ReadError, &sh->dev[i].flags);
			clear_bit(R5_ReWrite, &sh->dev[i].flags);
			if (!(set_bad
			      && test_bit(In_sync, &rdev->flags)
			      && rdev_set_badblocks(
				      rdev, sh->sector, STRIPE_SECTORS, 0)))
				md_error(conf->mddev, rdev);
		}
	}
@@ -3582,8 +3597,18 @@ static void handle_stripe(struct stripe_head *sh)

finish:
	/* wait for this device to become unblocked */
	if (conf->mddev->external && unlikely(s.blocked_rdev))
		md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
	if (unlikely(s.blocked_rdev)) {
		if (conf->mddev->external)
			md_wait_for_blocked_rdev(s.blocked_rdev,
						 conf->mddev);
		else
			/* Internal metadata will immediately
			 * be written by raid5d, so we don't
			 * need to wait here.
			 */
			rdev_dec_pending(s.blocked_rdev,
					 conf->mddev);
	}

	if (s.handle_bad_blocks)
		for (i = disks; i--; ) {
@@ -3881,8 +3906,6 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
		raid_bio->bi_next = (void*)rdev;
		align_bi->bi_bdev =  rdev->bdev;
		align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
		/* No reshape active, so we can trust rdev->data_offset */
		align_bi->bi_sector += rdev->data_offset;

		if (!bio_fits_rdev(align_bi) ||
		    is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
@@ -3893,6 +3916,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
			return 0;
		}

		/* No reshape active, so we can trust rdev->data_offset */
		align_bi->bi_sector += rdev->data_offset;

		spin_lock_irq(&conf->device_lock);
		wait_event_lock_irq(conf->wait_for_stripe,
				    conf->quiesce == 0,
@@ -3971,7 +3997,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
	struct stripe_head *sh;
	const int rw = bio_data_dir(bi);
	int remaining;
	int plugged;

	if (unlikely(bi->bi_rw & REQ_FLUSH)) {
		md_flush_request(mddev, bi);
@@ -3990,7 +4015,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
	bi->bi_next = NULL;
	bi->bi_phys_segments = 1;	/* over-loaded to count active stripes */

	plugged = mddev_check_plugged(mddev);
	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
		DEFINE_WAIT(w);
		int previous;
@@ -4092,6 +4116,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
			if ((bi->bi_rw & REQ_SYNC) &&
			    !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
				atomic_inc(&conf->preread_active_stripes);
			mddev_check_plugged(mddev);
			release_stripe(sh);
		} else {
			/* cannot get stripe for read-ahead, just give-up */
@@ -4099,10 +4124,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
			finish_wait(&conf->wait_for_overlap, &w);
			break;
		}
			
	}
	if (!plugged)
		md_wakeup_thread(mddev->thread);

	spin_lock_irq(&conf->device_lock);
	remaining = raid5_dec_bi_phys_segments(bi);
@@ -4823,6 +4845,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
	int raid_disk, memory, max_disks;
	struct md_rdev *rdev;
	struct disk_info *disk;
	char pers_name[6];

	if (mddev->new_level != 5
	    && mddev->new_level != 4
@@ -4946,7 +4969,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
		printk(KERN_INFO "md/raid:%s: allocated %dkB\n",
		       mdname(mddev), memory);

	conf->thread = md_register_thread(raid5d, mddev, NULL);
	sprintf(pers_name, "raid%d", mddev->new_level);
	conf->thread = md_register_thread(raid5d, mddev, pers_name);
	if (!conf->thread) {
		printk(KERN_ERR
		       "md/raid:%s: couldn't allocate thread.\n",
@@ -5465,10 +5489,9 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
	if (rdev->saved_raid_disk >= 0 &&
	    rdev->saved_raid_disk >= first &&
	    conf->disks[rdev->saved_raid_disk].rdev == NULL)
		disk = rdev->saved_raid_disk;
	else
		disk = first;
	for ( ; disk <= last ; disk++) {
		first = rdev->saved_raid_disk;

	for (disk = first; disk <= last; disk++) {
		p = conf->disks + disk;
		if (p->rdev == NULL) {
			clear_bit(In_sync, &rdev->flags);
@@ -5477,8 +5500,11 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
			if (rdev->saved_raid_disk != disk)
				conf->fullsync = 1;
			rcu_assign_pointer(p->rdev, rdev);
			break;
			goto out;
		}
	}
	for (disk = first; disk <= last; disk++) {
		p = conf->disks + disk;
		if (test_bit(WantReplacement, &p->rdev->flags) &&
		    p->replacement == NULL) {
			clear_bit(In_sync, &rdev->flags);
@@ -5490,6 +5516,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
			break;
		}
	}
out:
	print_raid5_conf(conf);
	return err;
}