Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit d9734e0d authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-4.4/core' of git://git.kernel.dk/linux-block

Pull core block updates from Jens Axboe:
 "This is the core block pull request for 4.4.  I've got a few more
  topic branches this time around, some of them will layer on top of the
  core+drivers changes and will come in a separate round.  So not a huge
  chunk of changes in this round.

  This pull request contains:

   - Enable blk-mq page allocation tracking with kmemleak, from Catalin.

   - Unused prototype removal in blk-mq from Christoph.

   - Cleanup of the q->blk_trace exchange, using cmpxchg instead of two
     xchg()'s, from Davidlohr.

   - A plug flush fix from Jeff.

   - Also from Jeff, a fix that means we don't have to update shared tag
     sets at init time unless we do a state change.  This cuts down boot
     times on thousands of devices a lot with scsi/blk-mq.

   - blk-mq waitqueue barrier fix from Kosuke.

   - Various fixes from Ming:

        - Fixes for segment merging and splitting, and checks, for
          the old core and blk-mq.

        - Potential blk-mq speedup by marking ctx pending at the end
          of a plug insertion batch in blk-mq.

        - direct-io no page dirty on kernel direct reads.

   - A WRITE_SYNC fix for mpage from Roman"

* 'for-4.4/core' of git://git.kernel.dk/linux-block:
  blk-mq: avoid excessive boot delays with large lun counts
  blktrace: re-write setting q->blk_trace
  blk-mq: mark ctx as pending at batch in flush plug path
  blk-mq: fix for trace_block_plug()
  block: check bio_mergeable() early before merging
  blk-mq: check bio_mergeable() early before merging
  block: avoid to merge splitted bio
  block: setup bi_phys_segments after splitting
  block: fix plug list flushing for nomerge queues
  blk-mq: remove unused blk_mq_clone_flush_request prototype
  blk-mq: fix waitqueue_active without memory barrier in block/blk-mq-tag.c
  fs: direct-io: don't dirtying pages for ITER_BVEC/ITER_KVEC direct read
  fs/mpage.c: forgotten WRITE_SYNC in case of data integrity write
  block: kmemleak: Track the page allocations for struct request
parents 0d51ce9c 2404e607
Loading
Loading
Loading
Loading
+29 −3
Original line number Diff line number Diff line
@@ -1594,6 +1594,30 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
	return ret;
}

unsigned int blk_plug_queued_count(struct request_queue *q)
{
	struct blk_plug *plug;
	struct request *rq;
	struct list_head *plug_list;
	unsigned int ret = 0;

	plug = current->plug;
	if (!plug)
		goto out;

	if (q->mq_ops)
		plug_list = &plug->mq_list;
	else
		plug_list = &plug->list;

	list_for_each_entry(rq, plug_list, queuelist) {
		if (rq->q == q)
			ret++;
	}
out:
	return ret;
}

void init_request_from_bio(struct request *req, struct bio *bio)
{
	req->cmd_type = REQ_TYPE_FS;
@@ -1641,9 +1665,11 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
	 * Check if we can merge with the plugged list before grabbing
	 * any locks.
	 */
	if (!blk_queue_nomerges(q) &&
	    blk_attempt_plug_merge(q, bio, &request_count, NULL))
	if (!blk_queue_nomerges(q)) {
		if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
			return;
	} else
		request_count = blk_plug_queued_count(q);

	spin_lock_irq(q->queue_lock);

+25 −7
Original line number Diff line number Diff line
@@ -11,13 +11,16 @@

static struct bio *blk_bio_discard_split(struct request_queue *q,
					 struct bio *bio,
					 struct bio_set *bs)
					 struct bio_set *bs,
					 unsigned *nsegs)
{
	unsigned int max_discard_sectors, granularity;
	int alignment;
	sector_t tmp;
	unsigned split_sectors;

	*nsegs = 1;

	/* Zero-sector (unknown) and one-sector granularities are the same.  */
	granularity = max(q->limits.discard_granularity >> 9, 1U);

@@ -51,8 +54,11 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,

static struct bio *blk_bio_write_same_split(struct request_queue *q,
					    struct bio *bio,
					    struct bio_set *bs)
					    struct bio_set *bs,
					    unsigned *nsegs)
{
	*nsegs = 1;

	if (!q->limits.max_write_same_sectors)
		return NULL;

@@ -64,7 +70,8 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,

static struct bio *blk_bio_segment_split(struct request_queue *q,
					 struct bio *bio,
					 struct bio_set *bs)
					 struct bio_set *bs,
					 unsigned *segs)
{
	struct bio_vec bv, bvprv, *bvprvp = NULL;
	struct bvec_iter iter;
@@ -106,24 +113,35 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
		sectors += bv.bv_len >> 9;
	}

	*segs = nsegs;
	return NULL;
split:
	*segs = nsegs;
	return bio_split(bio, sectors, GFP_NOIO, bs);
}

void blk_queue_split(struct request_queue *q, struct bio **bio,
		     struct bio_set *bs)
{
	struct bio *split;
	struct bio *split, *res;
	unsigned nsegs;

	if ((*bio)->bi_rw & REQ_DISCARD)
		split = blk_bio_discard_split(q, *bio, bs);
		split = blk_bio_discard_split(q, *bio, bs, &nsegs);
	else if ((*bio)->bi_rw & REQ_WRITE_SAME)
		split = blk_bio_write_same_split(q, *bio, bs);
		split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
	else
		split = blk_bio_segment_split(q, *bio, q->bio_split);
		split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);

	/* physical segments can be figured out during splitting */
	res = split ? split : *bio;
	res->bi_phys_segments = nsegs;
	bio_set_flag(res, BIO_SEG_VALID);

	if (split) {
		/* there isn't chance to merge the splitted bio */
		split->bi_rw |= REQ_NOMERGE;

		bio_chain(split, *bio);
		generic_make_request(*bio);
		*bio = split;
+4 −0
Original line number Diff line number Diff line
@@ -75,6 +75,10 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
	struct blk_mq_bitmap_tags *bt;
	int i, wake_index;

	/*
	 * Make sure all changes prior to this are visible from other CPUs.
	 */
	smp_mb();
	bt = &tags->bitmap_tags;
	wake_index = atomic_read(&bt->wake_index);
	for (i = 0; i < BT_WAIT_QUEUES; i++) {
+62 −27
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/kmemleak.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/slab.h>
@@ -989,18 +990,25 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
}
EXPORT_SYMBOL(blk_mq_delay_queue);

static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
				    struct request *rq, bool at_head)
static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
					    struct blk_mq_ctx *ctx,
					    struct request *rq,
					    bool at_head)
{
	struct blk_mq_ctx *ctx = rq->mq_ctx;

	trace_block_rq_insert(hctx->queue, rq);

	if (at_head)
		list_add(&rq->queuelist, &ctx->rq_list);
	else
		list_add_tail(&rq->queuelist, &ctx->rq_list);
}

static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
				    struct request *rq, bool at_head)
{
	struct blk_mq_ctx *ctx = rq->mq_ctx;

	__blk_mq_insert_req_list(hctx, ctx, rq, at_head);
	blk_mq_hctx_mark_pending(hctx, ctx);
}

@@ -1056,8 +1064,9 @@ static void blk_mq_insert_requests(struct request_queue *q,
		rq = list_first_entry(list, struct request, queuelist);
		list_del_init(&rq->queuelist);
		rq->mq_ctx = ctx;
		__blk_mq_insert_request(hctx, rq, false);
		__blk_mq_insert_req_list(hctx, ctx, rq, false);
	}
	blk_mq_hctx_mark_pending(hctx, ctx);
	spin_unlock(&ctx->lock);

	blk_mq_run_hw_queue(hctx, from_schedule);
@@ -1139,7 +1148,7 @@ static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
					 struct blk_mq_ctx *ctx,
					 struct request *rq, struct bio *bio)
{
	if (!hctx_allow_merges(hctx)) {
	if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) {
		blk_mq_bio_to_request(rq, bio);
		spin_lock(&ctx->lock);
insert_rq:
@@ -1267,9 +1276,12 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)

	blk_queue_split(q, &bio, q->bio_split);

	if (!is_flush_fua && !blk_queue_nomerges(q) &&
	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
	if (!is_flush_fua && !blk_queue_nomerges(q)) {
		if (blk_attempt_plug_merge(q, bio, &request_count,
					   &same_queue_rq))
			return;
	} else
		request_count = blk_plug_queued_count(q);

	rq = blk_mq_map_request(q, bio, &data);
	if (unlikely(!rq))
@@ -1376,7 +1388,7 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
	plug = current->plug;
	if (plug) {
		blk_mq_bio_to_request(rq, bio);
		if (list_empty(&plug->mq_list))
		if (!request_count)
			trace_block_plug(q);
		else if (request_count >= BLK_MAX_REQUEST_COUNT) {
			blk_flush_plug_list(plug, false);
@@ -1430,6 +1442,11 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
	while (!list_empty(&tags->page_list)) {
		page = list_first_entry(&tags->page_list, struct page, lru);
		list_del_init(&page->lru);
		/*
		 * Remove kmemleak object previously allocated in
		 * blk_mq_init_rq_map().
		 */
		kmemleak_free(page_address(page));
		__free_pages(page, page->private);
	}

@@ -1502,6 +1519,11 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
		list_add_tail(&page->lru, &tags->page_list);

		p = page_address(page);
		/*
		 * Allow kmemleak to scan these pages as they contain pointers
		 * to additional allocations like via ops->init_request().
		 */
		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL);
		entries_per_page = order_to_size(this_order) / rq_size;
		to_do = min(entries_per_page, set->queue_depth - i);
		left -= to_do * rq_size;
@@ -1673,7 +1695,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
	INIT_LIST_HEAD(&hctx->dispatch);
	hctx->queue = q;
	hctx->queue_num = hctx_idx;
	hctx->flags = set->flags;
	hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;

	blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
					blk_mq_hctx_notify, hctx);
@@ -1860,27 +1882,26 @@ static void blk_mq_map_swqueue(struct request_queue *q,
	}
}

static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set)
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
{
	struct blk_mq_hw_ctx *hctx;
	struct request_queue *q;
	bool shared;
	int i;

	if (set->tag_list.next == set->tag_list.prev)
		shared = false;
	else
		shared = true;

	list_for_each_entry(q, &set->tag_list, tag_set_list) {
		blk_mq_freeze_queue(q);

	queue_for_each_hw_ctx(q, hctx, i) {
		if (shared)
			hctx->flags |= BLK_MQ_F_TAG_SHARED;
		else
			hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
	}
}

static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
{
	struct request_queue *q;

	list_for_each_entry(q, &set->tag_list, tag_set_list) {
		blk_mq_freeze_queue(q);
		queue_set_hctx_shared(q, shared);
		blk_mq_unfreeze_queue(q);
	}
}
@@ -1891,7 +1912,12 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)

	mutex_lock(&set->tag_list_lock);
	list_del_init(&q->tag_set_list);
	blk_mq_update_tag_set_depth(set);
	if (list_is_singular(&set->tag_list)) {
		/* just transitioned to unshared */
		set->flags &= ~BLK_MQ_F_TAG_SHARED;
		/* update existing queue */
		blk_mq_update_tag_set_depth(set, false);
	}
	mutex_unlock(&set->tag_list_lock);
}

@@ -1901,8 +1927,17 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
	q->tag_set = set;

	mutex_lock(&set->tag_list_lock);

	/* Check to see if we're transitioning to shared (from 1 to 2 queues). */
	if (!list_empty(&set->tag_list) && !(set->flags & BLK_MQ_F_TAG_SHARED)) {
		set->flags |= BLK_MQ_F_TAG_SHARED;
		/* update existing queue */
		blk_mq_update_tag_set_depth(set, true);
	}
	if (set->flags & BLK_MQ_F_TAG_SHARED)
		queue_set_hctx_shared(q, true);
	list_add_tail(&q->tag_set_list, &set->tag_list);
	blk_mq_update_tag_set_depth(set);

	mutex_unlock(&set->tag_list_lock);
}

+0 −2
Original line number Diff line number Diff line
@@ -29,8 +29,6 @@ void __blk_mq_complete_request(struct request *rq);
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_freeze_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q);
void blk_mq_clone_flush_request(struct request *flush_rq,
		struct request *orig_rq);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);

Loading