[PATCH] md: improve raid1 "IO Barrier" concept (17999be4) · Commits · e / devices / android_kernel_teracube_emerald

drivers/md/raid1.c

+89 −78

Original line number	Diff line number	Diff line
		@@ -51,6 +51,8 @@ static mdk_personality_t raid1_personality;

		static void unplug_slaves(mddev_t *mddev);

		static void allow_barrier(conf_t *conf);
		static void lower_barrier(conf_t *conf);

		static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
		{
		@@ -160,20 +162,13 @@ static void put_all_bios(conf_t conf, r1bio_t r1_bio)

		static inline void free_r1bio(r1bio_t *r1_bio)
		{
		unsigned long flags;

		conf_t *conf = mddev_to_conf(r1_bio->mddev);

		/*
		* Wake up any possible resync thread that waits for the device
		* to go idle.
		*/
		spin_lock_irqsave(&conf->resync_lock, flags);
		if (!--conf->nr_pending) {
		wake_up(&conf->wait_idle);
		wake_up(&conf->wait_resume);
		}
		spin_unlock_irqrestore(&conf->resync_lock, flags);
		allow_barrier(conf);

		put_all_bios(conf, r1_bio);
		mempool_free(r1_bio, conf->r1bio_pool);
		@@ -182,22 +177,10 @@ static inline void free_r1bio(r1bio_t *r1_bio)
		static inline void put_buf(r1bio_t *r1_bio)
		{
		conf_t *conf = mddev_to_conf(r1_bio->mddev);
		unsigned long flags;

		mempool_free(r1_bio, conf->r1buf_pool);

		spin_lock_irqsave(&conf->resync_lock, flags);
		if (!conf->barrier)
		BUG();
		--conf->barrier;
		wake_up(&conf->wait_resume);
		wake_up(&conf->wait_idle);

		if (!--conf->nr_pending) {
		wake_up(&conf->wait_idle);
		wake_up(&conf->wait_resume);
		}
		spin_unlock_irqrestore(&conf->resync_lock, flags);
		lower_barrier(conf);
		}

		static void reschedule_retry(r1bio_t *r1_bio)
		@@ -210,6 +193,7 @@ static void reschedule_retry(r1bio_t *r1_bio)
		list_add(&r1_bio->retry_list, &conf->retry_list);
		spin_unlock_irqrestore(&conf->device_lock, flags);

		wake_up(&conf->wait_barrier);
		md_wakeup_thread(mddev->thread);
		}

		@@ -593,30 +577,83 @@ static int raid1_issue_flush(request_queue_t q, struct gendisk disk,
		return ret;
		}

		/*
		* Throttle resync depth, so that we can both get proper overlapping of
		* requests, but are still able to handle normal requests quickly.
		/* Barriers....
		* Sometimes we need to suspend IO while we do something else,
		* either some resync/recovery, or reconfigure the array.
		* To do this we raise a 'barrier'.
		* The 'barrier' is a counter that can be raised multiple times
		* to count how many activities are happening which preclude
		* normal IO.
		* We can only raise the barrier if there is no pending IO.
		* i.e. if nr_pending == 0.
		* We choose only to raise the barrier if no-one is waiting for the
		* barrier to go down. This means that as soon as an IO request
		* is ready, no other operations which require a barrier will start
		* until the IO request has had a chance.
		*
		* So: regular IO calls 'wait_barrier'. When that returns there
		* is no backgroup IO happening, It must arrange to call
		* allow_barrier when it has finished its IO.
		* backgroup IO calls must call raise_barrier. Once that returns
		* there is no normal IO happeing. It must arrange to call
		* lower_barrier when the particular background IO completes.
		*/
		#define RESYNC_DEPTH 32

		static void device_barrier(conf_t *conf, sector_t sect)
		static void raise_barrier(conf_t *conf)
		{
		spin_lock_irq(&conf->resync_lock);
		wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume),
		conf->resync_lock, raid1_unplug(conf->mddev->queue));

		if (!conf->barrier++) {
		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
		conf->resync_lock, raid1_unplug(conf->mddev->queue));
		if (conf->nr_pending)
		BUG();
		/* Wait until no block IO is waiting */
		wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
		conf->resync_lock,
		raid1_unplug(conf->mddev->queue));

		/* block any new IO from starting */
		conf->barrier++;

		/* No wait for all pending IO to complete */
		wait_event_lock_irq(conf->wait_barrier,
		!conf->nr_pending && conf->barrier < RESYNC_DEPTH,
		conf->resync_lock,
		raid1_unplug(conf->mddev->queue));

		spin_unlock_irq(&conf->resync_lock);
		}

		static void lower_barrier(conf_t *conf)
		{
		unsigned long flags;
		spin_lock_irqsave(&conf->resync_lock, flags);
		conf->barrier--;
		spin_unlock_irqrestore(&conf->resync_lock, flags);
		wake_up(&conf->wait_barrier);
		}

		static void wait_barrier(conf_t *conf)
		{
		spin_lock_irq(&conf->resync_lock);
		if (conf->barrier) {
		conf->nr_waiting++;
		wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
		conf->resync_lock,
		raid1_unplug(conf->mddev->queue));
		conf->nr_waiting--;
		}
		wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH,
		conf->resync_lock, raid1_unplug(conf->mddev->queue));
		conf->next_resync = sect;
		conf->nr_pending++;
		spin_unlock_irq(&conf->resync_lock);
		}

		static void allow_barrier(conf_t *conf)
		{
		unsigned long flags;
		spin_lock_irqsave(&conf->resync_lock, flags);
		conf->nr_pending--;
		spin_unlock_irqrestore(&conf->resync_lock, flags);
		wake_up(&conf->wait_barrier);
		}


		/* duplicate the data pages for behind I/O */
		static struct page *alloc_behind_pages(struct bio bio)
		{
		@@ -678,10 +715,7 @@ static int make_request(request_queue_t q, struct bio bio)
		*/
		md_write_start(mddev, bio); /* wait on superblock update early */

		spin_lock_irq(&conf->resync_lock);
		wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
		conf->nr_pending++;
		spin_unlock_irq(&conf->resync_lock);
		wait_barrier(conf);

		disk_stat_inc(mddev->gendisk, ios[rw]);
		disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
		@@ -909,13 +943,8 @@ static void print_conf(conf_t *conf)

		static void close_sync(conf_t *conf)
		{
		spin_lock_irq(&conf->resync_lock);
		wait_event_lock_irq(conf->wait_resume, !conf->barrier,
		conf->resync_lock, raid1_unplug(conf->mddev->queue));
		spin_unlock_irq(&conf->resync_lock);

		if (conf->barrier) BUG();
		if (waitqueue_active(&conf->wait_idle)) BUG();
		wait_barrier(conf);
		allow_barrier(conf);

		mempool_destroy(conf->r1buf_pool);
		conf->r1buf_pool = NULL;
		@@ -1317,12 +1346,16 @@ static sector_t sync_request(mddev_t mddev, sector_t sector_nr, int skipped, i
		return sync_blocks;
		}
		/*
		* If there is non-resync activity waiting for us then
		* put in a delay to throttle resync.
		* If there is non-resync activity waiting for a turn,
		* and resync is going fast enough,
		* then let it though before starting on this new sync request.
		*/
		if (!go_faster && waitqueue_active(&conf->wait_resume))
		if (!go_faster && conf->nr_waiting)
		msleep_interruptible(1000);
		device_barrier(conf, sector_nr + RESYNC_SECTORS);

		raise_barrier(conf);

		conf->next_resync = sector_nr;

		/*
		* If reconstructing, and >1 working disc,
		@@ -1355,10 +1388,6 @@ static sector_t sync_request(mddev_t mddev, sector_t sector_nr, int skipped, i

		r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);

		spin_lock_irq(&conf->resync_lock);
		conf->nr_pending++;
		spin_unlock_irq(&conf->resync_lock);

		r1_bio->mddev = mddev;
		r1_bio->sector = sector_nr;
		r1_bio->state = 0;
		@@ -1542,8 +1571,7 @@ static int run(mddev_t *mddev)
		mddev->recovery_cp = MaxSector;

		spin_lock_init(&conf->resync_lock);
		init_waitqueue_head(&conf->wait_idle);
		init_waitqueue_head(&conf->wait_resume);
		init_waitqueue_head(&conf->wait_barrier);

		bio_list_init(&conf->pending_bio_list);
		bio_list_init(&conf->flushing_bio_list);
		@@ -1714,11 +1742,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
		}
		memset(newmirrors, 0, sizeof(struct mirror_info)*raid_disks);

		spin_lock_irq(&conf->resync_lock);
		conf->barrier++;
		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
		conf->resync_lock, raid1_unplug(mddev->queue));
		spin_unlock_irq(&conf->resync_lock);
		raise_barrier(conf);

		/* ok, everything is stopped */
		oldpool = conf->r1bio_pool;
		@@ -1738,12 +1762,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
		conf->raid_disks = mddev->raid_disks = raid_disks;

		conf->last_used = 0; /* just make sure it is in-range */
		spin_lock_irq(&conf->resync_lock);
		conf->barrier--;
		spin_unlock_irq(&conf->resync_lock);
		wake_up(&conf->wait_resume);
		wake_up(&conf->wait_idle);

		lower_barrier(conf);

		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
		md_wakeup_thread(mddev->thread);
		@@ -1758,18 +1777,10 @@ static void raid1_quiesce(mddev_t *mddev, int state)

		switch(state) {
		case 1:
		spin_lock_irq(&conf->resync_lock);
		conf->barrier++;
		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
		conf->resync_lock, raid1_unplug(mddev->queue));
		spin_unlock_irq(&conf->resync_lock);
		raise_barrier(conf);
		break;
		case 0:
		spin_lock_irq(&conf->resync_lock);
		conf->barrier--;
		spin_unlock_irq(&conf->resync_lock);
		wake_up(&conf->wait_resume);
		wake_up(&conf->wait_idle);
		lower_barrier(conf);
		break;
		}
		if (mddev->thread) {

include/linux/raid/raid1.h

+2 −2

Original line number	Diff line number	Diff line
		@@ -45,6 +45,7 @@ struct r1_private_data_s {

		spinlock_t resync_lock;
		int nr_pending;
		int nr_waiting;
		int barrier;
		sector_t next_resync;
		int fullsync; /* set to 1 if a full sync is needed,
		@@ -52,8 +53,7 @@ struct r1_private_data_s {
		* Cleared when a sync completes.
		*/

		wait_queue_head_t wait_idle;
		wait_queue_head_t wait_resume;
		wait_queue_head_t wait_barrier;

		struct pool_info *poolinfo;