Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 8d0304e6 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'md/3.16' of git://neil.brown.name/md

Pull md updates from Neil Brown:
 "Assorted md fixes for 3.16

  Mostly performance improvements with a few corner-case bug fixes"

* tag 'md/3.16' of git://neil.brown.name/md:
  raid5: speedup sync_request processing
  md/raid5: deadlock between retry_aligned_read with barrier io
  raid5: add an option to avoid copy data from bio to stripe cache
  md/bitmap: remove confusing code from filemap_get_page.
  raid5: avoid release list until last reference of the stripe
  md: md_clear_badblocks should return an error code on failure.
  md/raid56: Don't perform reads to support writes until stripe is ready.
  md: refuse to change shape of array if it is active but read-only
parents dfb94547 053f5b65
Loading
Loading
Loading
Loading
+1 −5
Original line number Diff line number Diff line
@@ -669,17 +669,13 @@ static inline unsigned long file_page_offset(struct bitmap_storage *store,
/*
 * return a pointer to the page in the filemap that contains the given bit
 *
 * this lookup is complicated by the fact that the bitmap sb might be exactly
 * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page
 * 0 or page 1
 */
static inline struct page *filemap_get_page(struct bitmap_storage *store,
					    unsigned long chunk)
{
	if (file_page_index(store, chunk) >= store->file_pages)
		return NULL;
	return store->filemap[file_page_index(store, chunk)
			      - file_page_index(store, 0)];
	return store->filemap[file_page_index(store, chunk)];
}

static int bitmap_storage_alloc(struct bitmap_storage *store,
+11 −1
Original line number Diff line number Diff line
@@ -3448,6 +3448,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
		mddev->level = LEVEL_NONE;
		return rv;
	}
	if (mddev->ro)
		return  -EROFS;

	/* request to change the personality.  Need to ensure:
	 *  - array is not engaged in resync/recovery/reshape
@@ -3634,6 +3636,8 @@ layout_store(struct mddev *mddev, const char *buf, size_t len)
		int err;
		if (mddev->pers->check_reshape == NULL)
			return -EBUSY;
		if (mddev->ro)
			return -EROFS;
		mddev->new_layout = n;
		err = mddev->pers->check_reshape(mddev);
		if (err) {
@@ -3723,6 +3727,8 @@ chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
		int err;
		if (mddev->pers->check_reshape == NULL)
			return -EBUSY;
		if (mddev->ro)
			return -EROFS;
		mddev->new_chunk_sectors = n >> 9;
		err = mddev->pers->check_reshape(mddev);
		if (err) {
@@ -6135,6 +6141,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
	 */
	if (mddev->sync_thread)
		return -EBUSY;
	if (mddev->ro)
		return -EROFS;

	rdev_for_each(rdev, mddev) {
		sector_t avail = rdev->sectors;
@@ -6157,6 +6165,8 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
	/* change the number of raid disks */
	if (mddev->pers->check_reshape == NULL)
		return -EINVAL;
	if (mddev->ro)
		return -EROFS;
	if (raid_disks <= 0 ||
	    (mddev->max_disks && raid_disks >= mddev->max_disks))
		return -EINVAL;
@@ -8333,7 +8343,7 @@ static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
			if (a < s) {
				/* we need to split this range */
				if (bb->count >= MD_MAX_BADBLOCKS) {
					rv = 0;
					rv = -ENOSPC;
					goto out;
				}
				memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
+123 −35
Original line number Diff line number Diff line
@@ -292,9 +292,12 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
	BUG_ON(atomic_read(&conf->active_stripes)==0);
	if (test_bit(STRIPE_HANDLE, &sh->state)) {
		if (test_bit(STRIPE_DELAYED, &sh->state) &&
		    !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
		    !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
			list_add_tail(&sh->lru, &conf->delayed_list);
		else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
			if (atomic_read(&conf->preread_active_stripes)
			    < IO_THRESHOLD)
				md_wakeup_thread(conf->mddev->thread);
		} else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
			   sh->bm_seq - conf->seq_write > 0)
			list_add_tail(&sh->lru, &conf->bitmap_list);
		else {
@@ -413,6 +416,11 @@ static void release_stripe(struct stripe_head *sh)
	int hash;
	bool wakeup;

	/* Avoid release_list until the last reference.
	 */
	if (atomic_add_unless(&sh->count, -1, 1))
		return;

	if (unlikely(!conf->mddev->thread) ||
		test_and_set_bit(STRIPE_ON_RELEASE_LIST, &sh->state))
		goto slow_path;
@@ -479,6 +487,7 @@ static void shrink_buffers(struct stripe_head *sh)
	int num = sh->raid_conf->pool_size;

	for (i = 0; i < num ; i++) {
		WARN_ON(sh->dev[i].page != sh->dev[i].orig_page);
		p = sh->dev[i].page;
		if (!p)
			continue;
@@ -499,6 +508,7 @@ static int grow_buffers(struct stripe_head *sh)
			return 1;
		}
		sh->dev[i].page = page;
		sh->dev[i].orig_page = page;
	}
	return 0;
}
@@ -855,6 +865,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
			if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
				bi->bi_rw |= REQ_NOMERGE;

			if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
				WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
			sh->dev[i].vec.bv_page = sh->dev[i].page;
			bi->bi_vcnt = 1;
			bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
			bi->bi_io_vec[0].bv_offset = 0;
@@ -899,6 +912,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
			else
				rbi->bi_iter.bi_sector = (sh->sector
						  + rrdev->data_offset);
			if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
				WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
			sh->dev[i].rvec.bv_page = sh->dev[i].page;
			rbi->bi_vcnt = 1;
			rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
			rbi->bi_io_vec[0].bv_offset = 0;
@@ -927,8 +943,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
}

static struct dma_async_tx_descriptor *
async_copy_data(int frombio, struct bio *bio, struct page *page,
	sector_t sector, struct dma_async_tx_descriptor *tx)
async_copy_data(int frombio, struct bio *bio, struct page **page,
	sector_t sector, struct dma_async_tx_descriptor *tx,
	struct stripe_head *sh)
{
	struct bio_vec bvl;
	struct bvec_iter iter;
@@ -965,11 +982,16 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
		if (clen > 0) {
			b_offset += bvl.bv_offset;
			bio_page = bvl.bv_page;
			if (frombio)
				tx = async_memcpy(page, bio_page, page_offset,
						  b_offset, clen, &submit);
			if (frombio) {
				if (sh->raid_conf->skip_copy &&
				    b_offset == 0 && page_offset == 0 &&
				    clen == STRIPE_SIZE)
					*page = bio_page;
				else
				tx = async_memcpy(bio_page, page, b_offset,
					tx = async_memcpy(*page, bio_page, page_offset,
						  b_offset, clen, &submit);
			} else
				tx = async_memcpy(bio_page, *page, b_offset,
						  page_offset, clen, &submit);
		}
		/* chain the operations */
@@ -1045,8 +1067,8 @@ static void ops_run_biofill(struct stripe_head *sh)
			spin_unlock_irq(&sh->stripe_lock);
			while (rbi && rbi->bi_iter.bi_sector <
				dev->sector + STRIPE_SECTORS) {
				tx = async_copy_data(0, rbi, dev->page,
					dev->sector, tx);
				tx = async_copy_data(0, rbi, &dev->page,
					dev->sector, tx, sh);
				rbi = r5_next_bio(rbi, dev->sector);
			}
		}
@@ -1384,6 +1406,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
			BUG_ON(dev->written);
			wbi = dev->written = chosen;
			spin_unlock_irq(&sh->stripe_lock);
			WARN_ON(dev->page != dev->orig_page);

			while (wbi && wbi->bi_iter.bi_sector <
				dev->sector + STRIPE_SECTORS) {
@@ -1393,9 +1416,15 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
					set_bit(R5_SyncIO, &dev->flags);
				if (wbi->bi_rw & REQ_DISCARD)
					set_bit(R5_Discard, &dev->flags);
				else
					tx = async_copy_data(1, wbi, dev->page,
						dev->sector, tx);
				else {
					tx = async_copy_data(1, wbi, &dev->page,
						dev->sector, tx, sh);
					if (dev->page != dev->orig_page) {
						set_bit(R5_SkipCopy, &dev->flags);
						clear_bit(R5_UPTODATE, &dev->flags);
						clear_bit(R5_OVERWRITE, &dev->flags);
					}
				}
				wbi = r5_next_bio(wbi, dev->sector);
			}
		}
@@ -1426,7 +1455,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref)
		struct r5dev *dev = &sh->dev[i];

		if (dev->written || i == pd_idx || i == qd_idx) {
			if (!discard)
			if (!discard && !test_bit(R5_SkipCopy, &dev->flags))
				set_bit(R5_UPTODATE, &dev->flags);
			if (fua)
				set_bit(R5_WantFUA, &dev->flags);
@@ -1839,8 +1868,10 @@ static int resize_stripes(struct r5conf *conf, int newsize)
		osh = get_free_stripe(conf, hash);
		unlock_device_hash_lock(conf, hash);
		atomic_set(&nsh->count, 1);
		for(i=0; i<conf->pool_size; i++)
		for(i=0; i<conf->pool_size; i++) {
			nsh->dev[i].page = osh->dev[i].page;
			nsh->dev[i].orig_page = osh->dev[i].page;
		}
		for( ; i<newsize; i++)
			nsh->dev[i].page = NULL;
		nsh->hash_lock_index = hash;
@@ -1896,6 +1927,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
			if (nsh->dev[i].page == NULL) {
				struct page *p = alloc_page(GFP_NOIO);
				nsh->dev[i].page = p;
				nsh->dev[i].orig_page = p;
				if (!p)
					err = -ENOMEM;
			}
@@ -2140,17 +2172,13 @@ static void raid5_build_block(struct stripe_head *sh, int i, int previous)

	bio_init(&dev->req);
	dev->req.bi_io_vec = &dev->vec;
	dev->req.bi_vcnt++;
	dev->req.bi_max_vecs++;
	dev->req.bi_max_vecs = 1;
	dev->req.bi_private = sh;
	dev->vec.bv_page = dev->page;

	bio_init(&dev->rreq);
	dev->rreq.bi_io_vec = &dev->rvec;
	dev->rreq.bi_vcnt++;
	dev->rreq.bi_max_vecs++;
	dev->rreq.bi_max_vecs = 1;
	dev->rreq.bi_private = sh;
	dev->rvec.bv_page = dev->page;

	dev->flags = 0;
	dev->sector = compute_blocknr(sh, i, previous);
@@ -2750,6 +2778,11 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
		/* and fail all 'written' */
		bi = sh->dev[i].written;
		sh->dev[i].written = NULL;
		if (test_and_clear_bit(R5_SkipCopy, &sh->dev[i].flags)) {
			WARN_ON(test_bit(R5_UPTODATE, &sh->dev[i].flags));
			sh->dev[i].page = sh->dev[i].orig_page;
		}

		if (bi) bitmap_end = 1;
		while (bi && bi->bi_iter.bi_sector <
		       sh->dev[i].sector + STRIPE_SECTORS) {
@@ -2886,8 +2919,11 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
	     (s->failed >= 1 && fdev[0]->toread) ||
	     (s->failed >= 2 && fdev[1]->toread) ||
	     (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite &&
	      (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) &&
	      !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
	     (sh->raid_conf->level == 6 && s->failed && s->to_write))) {
	     (sh->raid_conf->level == 6 && s->failed && s->to_write &&
	      s->to_write < sh->raid_conf->raid_disks - 2 &&
	      (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) {
		/* we would like to get this block, possibly by computing it,
		 * otherwise read it if the backing disk is insync
		 */
@@ -2991,12 +3027,17 @@ static void handle_stripe_clean_event(struct r5conf *conf,
			dev = &sh->dev[i];
			if (!test_bit(R5_LOCKED, &dev->flags) &&
			    (test_bit(R5_UPTODATE, &dev->flags) ||
			     test_bit(R5_Discard, &dev->flags))) {
			     test_bit(R5_Discard, &dev->flags) ||
			     test_bit(R5_SkipCopy, &dev->flags))) {
				/* We can return any write requests */
				struct bio *wbi, *wbi2;
				pr_debug("Return write for disc %d\n", i);
				if (test_and_clear_bit(R5_Discard, &dev->flags))
					clear_bit(R5_UPTODATE, &dev->flags);
				if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) {
					WARN_ON(test_bit(R5_UPTODATE, &dev->flags));
					dev->page = dev->orig_page;
				}
				wbi = dev->written;
				dev->written = NULL;
				while (wbi && wbi->bi_iter.bi_sector <
@@ -3015,6 +3056,8 @@ static void handle_stripe_clean_event(struct r5conf *conf,
						0);
			} else if (test_bit(R5_Discard, &dev->flags))
				discard_pending = 1;
			WARN_ON(test_bit(R5_SkipCopy, &dev->flags));
			WARN_ON(dev->page != dev->orig_page);
		}
	if (!discard_pending &&
	    test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
@@ -3086,7 +3129,8 @@ static void handle_stripe_dirtying(struct r5conf *conf,
		    !test_bit(R5_LOCKED, &dev->flags) &&
		    !(test_bit(R5_UPTODATE, &dev->flags) ||
		    test_bit(R5_Wantcompute, &dev->flags))) {
			if (test_bit(R5_Insync, &dev->flags)) rcw++;
			if (test_bit(R5_Insync, &dev->flags))
				rcw++;
			else
				rcw += 2*disks;
		}
@@ -3107,10 +3151,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
			    !(test_bit(R5_UPTODATE, &dev->flags) ||
			    test_bit(R5_Wantcompute, &dev->flags)) &&
			    test_bit(R5_Insync, &dev->flags)) {
				if (
				  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
					pr_debug("Read_old block "
						 "%d for r-m-w\n", i);
				if (test_bit(STRIPE_PREREAD_ACTIVE,
					     &sh->state)) {
					pr_debug("Read_old block %d for r-m-w\n",
						 i);
					set_bit(R5_LOCKED, &dev->flags);
					set_bit(R5_Wantread, &dev->flags);
					s->locked++;
@@ -3133,10 +3177,9 @@ static void handle_stripe_dirtying(struct r5conf *conf,
			    !(test_bit(R5_UPTODATE, &dev->flags) ||
			      test_bit(R5_Wantcompute, &dev->flags))) {
				rcw++;
				if (!test_bit(R5_Insync, &dev->flags))
					continue; /* it's a failed drive */
				if (
				  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
				if (test_bit(R5_Insync, &dev->flags) &&
				    test_bit(STRIPE_PREREAD_ACTIVE,
					     &sh->state)) {
					pr_debug("Read_old block "
						"%d for Reconstruct\n", i);
					set_bit(R5_LOCKED, &dev->flags);
@@ -5031,8 +5074,8 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
	bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded);

	set_bit(STRIPE_SYNC_REQUESTED, &sh->state);
	set_bit(STRIPE_HANDLE, &sh->state);

	handle_stripe(sh);
	release_stripe(sh);

	return STRIPE_SECTORS;
@@ -5072,7 +5115,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
			/* already done this stripe */
			continue;

		sh = get_active_stripe(conf, sector, 0, 1, 0);
		sh = get_active_stripe(conf, sector, 0, 1, 1);

		if (!sh) {
			/* failed to get a stripe - must wait */
@@ -5354,6 +5397,50 @@ raid5_preread_bypass_threshold = __ATTR(preread_bypass_threshold,
					raid5_show_preread_threshold,
					raid5_store_preread_threshold);

static ssize_t
raid5_show_skip_copy(struct mddev *mddev, char *page)
{
	struct r5conf *conf = mddev->private;
	if (conf)
		return sprintf(page, "%d\n", conf->skip_copy);
	else
		return 0;
}

static ssize_t
raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
{
	struct r5conf *conf = mddev->private;
	unsigned long new;
	if (len >= PAGE_SIZE)
		return -EINVAL;
	if (!conf)
		return -ENODEV;

	if (kstrtoul(page, 10, &new))
		return -EINVAL;
	new = !!new;
	if (new == conf->skip_copy)
		return len;

	mddev_suspend(mddev);
	conf->skip_copy = new;
	if (new)
		mddev->queue->backing_dev_info.capabilities |=
						BDI_CAP_STABLE_WRITES;
	else
		mddev->queue->backing_dev_info.capabilities &=
						~BDI_CAP_STABLE_WRITES;
	mddev_resume(mddev);
	return len;
}

static struct md_sysfs_entry
raid5_skip_copy = __ATTR(skip_copy, S_IRUGO | S_IWUSR,
					raid5_show_skip_copy,
					raid5_store_skip_copy);


static ssize_t
stripe_cache_active_show(struct mddev *mddev, char *page)
{
@@ -5439,6 +5526,7 @@ static struct attribute *raid5_attrs[] = {
	&raid5_stripecache_active.attr,
	&raid5_preread_bypass_threshold.attr,
	&raid5_group_thread_cnt.attr,
	&raid5_skip_copy.attr,
	NULL,
};
static struct attribute_group raid5_attrs_group = {
+3 −1
Original line number Diff line number Diff line
@@ -232,7 +232,7 @@ struct stripe_head {
		 */
		struct bio	req, rreq;
		struct bio_vec	vec, rvec;
		struct page	*page;
		struct page	*page, *orig_page;
		struct bio	*toread, *read, *towrite, *written;
		sector_t	sector;			/* sector of this page */
		unsigned long	flags;
@@ -299,6 +299,7 @@ enum r5dev_flags {
			 * data in, and now is a good time to write it out.
			 */
	R5_Discard,	/* Discard the stripe */
	R5_SkipCopy,	/* Don't copy data from bio to stripe cache */
};

/*
@@ -436,6 +437,7 @@ struct r5conf {
	atomic_t		pending_full_writes; /* full write backlog */
	int			bypass_count; /* bypassed prereads */
	int			bypass_threshold; /* preread nice */
	int			skip_copy; /* Don't copy data from bio to stripe cache */
	struct list_head	*last_hold; /* detect hold_list promotions */

	atomic_t		reshape_stripes; /* stripes with pending writes for reshape */