Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 06d91a5f authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds
Browse files

[PATCH] md: improve locking on 'safemode' and move superblock writes



When md marks the superblock dirty before a write, it calls
generic_make_request (to write the superblock) from within
generic_make_request (to write the first dirty block), which could cause
problems later.

With this patch, the superblock write is always done by the helper thread, and
write request are delayed until that write completes.

Also, the locking around marking the array dirty and writing the superblock is
improved to avoid possible races.

Signed-off-by: default avatarNeil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent fca4d848
Loading
Loading
Loading
Loading
+59 −14
Original line number Diff line number Diff line
@@ -218,6 +218,8 @@ static mddev_t * mddev_find(dev_t unit)
	INIT_LIST_HEAD(&new->all_mddevs);
	init_timer(&new->safemode_timer);
	atomic_set(&new->active, 1);
	bio_list_init(&new->write_list);
	spin_lock_init(&new->write_lock);

	new->queue = blk_alloc_queue(GFP_KERNEL);
	if (!new->queue) {
@@ -1251,9 +1253,11 @@ static void md_update_sb(mddev_t * mddev)
	int err, count = 100;
	struct list_head *tmp;
	mdk_rdev_t *rdev;
	int sync_req;

	mddev->sb_dirty = 0;
repeat:
	spin_lock(&mddev->write_lock);
	sync_req = mddev->in_sync;
	mddev->utime = get_seconds();
	mddev->events ++;

@@ -1272,8 +1276,12 @@ repeat:
	 * do not write anything to disk if using
	 * nonpersistent superblocks
	 */
	if (!mddev->persistent)
	if (!mddev->persistent) {
		mddev->sb_dirty = 0;
		spin_unlock(&mddev->write_lock);
		return;
	}
	spin_unlock(&mddev->write_lock);

	dprintk(KERN_INFO 
		"md: updating %s RAID superblock on device (in sync %d)\n",
@@ -1304,6 +1312,15 @@ repeat:
		printk(KERN_ERR \
			"md: excessive errors occurred during superblock update, exiting\n");
	}
	spin_lock(&mddev->write_lock);
	if (mddev->in_sync != sync_req) {
		/* have to write it out again */
		spin_unlock(&mddev->write_lock);
		goto repeat;
	}
	mddev->sb_dirty = 0;
	spin_unlock(&mddev->write_lock);

}

/*
@@ -3178,19 +3195,31 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
}


void md_write_start(mddev_t *mddev)
/* md_write_start(mddev, bi)
 * If we need to update some array metadata (e.g. 'active' flag
 * in superblock) before writing, queue bi for later writing
 * and return 0, else return 1 and it will be written now
 */
int md_write_start(mddev_t *mddev, struct bio *bi)
{
	if (!atomic_read(&mddev->writes_pending)) {
		mddev_lock_uninterruptible(mddev);
	if (bio_data_dir(bi) != WRITE)
		return 1;

	atomic_inc(&mddev->writes_pending);
	spin_lock(&mddev->write_lock);
	if (mddev->in_sync == 0 && mddev->sb_dirty == 0) {
		spin_unlock(&mddev->write_lock);
		return 1;
	}
	bio_list_add(&mddev->write_list, bi);

	if (mddev->in_sync) {
		mddev->in_sync = 0;
 			del_timer(&mddev->safemode_timer);
			md_update_sb(mddev);
		mddev->sb_dirty = 1;
	}
		atomic_inc(&mddev->writes_pending);
		mddev_unlock(mddev);
	} else
		atomic_inc(&mddev->writes_pending);
	spin_unlock(&mddev->write_lock);
	md_wakeup_thread(mddev->thread);
	return 0;
}

void md_write_end(mddev_t *mddev)
@@ -3472,6 +3501,7 @@ void md_check_recovery(mddev_t *mddev)
		mddev->sb_dirty ||
		test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
		test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
		mddev->write_list.head ||
		(mddev->safemode == 1) ||
		(mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
		 && !mddev->in_sync && mddev->recovery_cp == MaxSector)
@@ -3480,7 +3510,9 @@ void md_check_recovery(mddev_t *mddev)

	if (mddev_trylock(mddev)==0) {
		int spares =0;
		struct bio *blist;

		spin_lock(&mddev->write_lock);
		if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
		    !mddev->in_sync && mddev->recovery_cp == MaxSector) {
			mddev->in_sync = 1;
@@ -3488,9 +3520,22 @@ void md_check_recovery(mddev_t *mddev)
		}
		if (mddev->safemode == 1)
			mddev->safemode = 0;
		blist = bio_list_get(&mddev->write_list);
		spin_unlock(&mddev->write_lock);

		if (mddev->sb_dirty)
			md_update_sb(mddev);

		while (blist) {
			struct bio *b = blist;
			blist = blist->bi_next;
			b->bi_next = NULL;
			generic_make_request(b);
			/* we already counted this, so need to un-count */
			md_write_end(mddev);
		}


		if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
		    !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
			/* resync/recovery still happening */
+3 −1
Original line number Diff line number Diff line
@@ -530,6 +530,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
	 * thread has put up a bar for new requests.
	 * Continue immediately if no resync is active currently.
	 */
	if (md_write_start(mddev, bio)==0)
		return 0;
	spin_lock_irq(&conf->resync_lock);
	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
	conf->nr_pending++;
@@ -611,7 +613,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
	rcu_read_unlock();

	atomic_set(&r1_bio->remaining, 1);
	md_write_start(mddev);

	for (i = 0; i < disks; i++) {
		struct bio *mbio;
		if (!r1_bio->bios[i])
+4 −1
Original line number Diff line number Diff line
@@ -700,6 +700,9 @@ static int make_request(request_queue_t *q, struct bio * bio)
		return 0;
	}

	if (md_write_start(mddev, bio) == 0)
		return 0;

	/*
	 * Register the new request and wait if the reconstruction
	 * thread has put up a bar for new requests.
@@ -774,7 +777,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
	rcu_read_unlock();

	atomic_set(&r10_bio->remaining, 1);
	md_write_start(mddev);

	for (i = 0; i < conf->copies; i++) {
		struct bio *mbio;
		int d = r10_bio->devs[i].devnum;
+4 −2
Original line number Diff line number Diff line
@@ -1411,6 +1411,9 @@ static int make_request (request_queue_t *q, struct bio * bi)
	sector_t logical_sector, last_sector;
	struct stripe_head *sh;

	if (md_write_start(mddev, bi)==0)
		return 0;

	if (bio_data_dir(bi)==WRITE) {
		disk_stat_inc(mddev->gendisk, writes);
		disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi));
@@ -1423,8 +1426,7 @@ static int make_request (request_queue_t *q, struct bio * bi)
	last_sector = bi->bi_sector + (bi->bi_size>>9);
	bi->bi_next = NULL;
	bi->bi_phys_segments = 1;	/* over-loaded to count active stripes */
	if ( bio_data_dir(bi) == WRITE )
		md_write_start(mddev);

	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
		DEFINE_WAIT(w);
		
+4 −2
Original line number Diff line number Diff line
@@ -1570,6 +1570,9 @@ static int make_request (request_queue_t *q, struct bio * bi)
	sector_t logical_sector, last_sector;
	struct stripe_head *sh;

	if (md_write_start(mddev, bi)==0)
		return 0;

	if (bio_data_dir(bi)==WRITE) {
		disk_stat_inc(mddev->gendisk, writes);
		disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi));
@@ -1583,8 +1586,7 @@ static int make_request (request_queue_t *q, struct bio * bi)

	bi->bi_next = NULL;
	bi->bi_phys_segments = 1;	/* over-loaded to count active stripes */
	if ( bio_data_dir(bi) == WRITE )
		md_write_start(mddev);

	for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
		DEFINE_WAIT(w);

Loading