Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 5e57dc81 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block IO fixes from Jens Axboe:
 "Second round of updates and fixes for 3.14-rc2.  Most of this stuff
  has been queued up for a while.  The notable exception is the blk-mq
  changes, which are naturally a bit more in flux still.

  The pull request contains:

   - Two bug fixes for the new immutable vecs, causing crashes with raid
     or swap.  From Kent.

   - Various blk-mq tweaks and fixes from Christoph.  A fix for
     integrity bio's from Nic.

   - A few bcache fixes from Kent and Darrick Wong.

   - xen-blk{front,back} fixes from David Vrabel, Matt Rushton, Nicolas
     Swenson, and Roger Pau Monne.

   - Fix for a vec miscount with integrity vectors from Martin.

   - Minor annotations or fixes from Masanari Iida and Rashika Kheria.

   - Tweak to null_blk to do more normal FIFO processing of requests
     from Shlomo Pongratz.

   - Elevator switching bypass fix from Tejun.

   - Softlockup in blkdev_issue_discard() fix when !CONFIG_PREEMPT from
     me"

* 'for-linus' of git://git.kernel.dk/linux-block: (31 commits)
  block: add cond_resched() to potentially long running ioctl discard loop
  xen-blkback: init persistent_purge_work work_struct
  blk-mq: pair blk_mq_start_request / blk_mq_requeue_request
  blk-mq: dont assume rq->errors is set when returning an error from ->queue_rq
  block: Fix cloning of discard/write same bios
  block: Fix type mismatch in ssize_t_blk_mq_tag_sysfs_show
  blk-mq: rework flush sequencing logic
  null_blk: use blk_complete_request and blk_mq_complete_request
  virtio_blk: use blk_mq_complete_request
  blk-mq: rework I/O completions
  fs: Add prototype declaration to appropriate header file include/linux/bio.h
  fs: Mark function as static in fs/bio-integrity.c
  block/null_blk: Fix completion processing from LIFO to FIFO
  block: Explicitly handle discard/write same segments
  block: Fix nr_vecs for inline integrity vectors
  blk-mq: Add bio_integrity setup to blk_mq_make_request
  blk-mq: initialize sg_reserved_size
  blk-mq: handle dma_drain_size
  blk-mq: divert __blk_put_request for MQ ops
  blk-mq: support at_head inserations for blk_execute_rq
  ...
parents 0d25e369 c8123f8c
Loading
Loading
Loading
Loading
+17 −3
Original line number Diff line number Diff line
@@ -693,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
	if (!uninit_q)
		return NULL;

	uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
	if (!uninit_q->flush_rq)
		goto out_cleanup_queue;

	q = blk_init_allocated_queue(uninit_q, rfn, lock);
	if (!q)
		blk_cleanup_queue(uninit_q);

		goto out_free_flush_rq;
	return q;

out_free_flush_rq:
	kfree(uninit_q->flush_rq);
out_cleanup_queue:
	blk_cleanup_queue(uninit_q);
	return NULL;
}
EXPORT_SYMBOL(blk_init_queue_node);

@@ -1127,7 +1136,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
{
	if (q->mq_ops)
		return blk_mq_alloc_request(q, rw, gfp_mask, false);
		return blk_mq_alloc_request(q, rw, gfp_mask);
	else
		return blk_old_get_request(q, rw, gfp_mask);
}
@@ -1278,6 +1287,11 @@ void __blk_put_request(struct request_queue *q, struct request *req)
	if (unlikely(!q))
		return;

	if (q->mq_ops) {
		blk_mq_free_request(req);
		return;
	}

	blk_pm_put_request(req);

	elv_completed_request(q, req);
+1 −1
Original line number Diff line number Diff line
@@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
	 * be resued after dying flag is set
	 */
	if (q->mq_ops) {
		blk_mq_insert_request(q, rq, true);
		blk_mq_insert_request(q, rq, at_head, true);
		return;
	}

+37 −64
Original line number Diff line number Diff line
@@ -130,20 +130,26 @@ static void blk_flush_restore_request(struct request *rq)
	blk_clear_rq_complete(rq);
}

static void mq_flush_data_run(struct work_struct *work)
static void mq_flush_run(struct work_struct *work)
{
	struct request *rq;

	rq = container_of(work, struct request, mq_flush_data);
	rq = container_of(work, struct request, mq_flush_work);

	memset(&rq->csd, 0, sizeof(rq->csd));
	blk_mq_run_request(rq, true, false);
}

static void blk_mq_flush_data_insert(struct request *rq)
static bool blk_flush_queue_rq(struct request *rq)
{
	INIT_WORK(&rq->mq_flush_data, mq_flush_data_run);
	kblockd_schedule_work(rq->q, &rq->mq_flush_data);
	if (rq->q->mq_ops) {
		INIT_WORK(&rq->mq_flush_work, mq_flush_run);
		kblockd_schedule_work(rq->q, &rq->mq_flush_work);
		return false;
	} else {
		list_add_tail(&rq->queuelist, &rq->q->queue_head);
		return true;
	}
}

/**
@@ -187,12 +193,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,

	case REQ_FSEQ_DATA:
		list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
		if (q->mq_ops)
			blk_mq_flush_data_insert(rq);
		else {
			list_add(&rq->queuelist, &q->queue_head);
			queued = true;
		}
		queued = blk_flush_queue_rq(rq);
		break;

	case REQ_FSEQ_DONE:
@@ -216,9 +217,6 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
	}

	kicked = blk_kick_flush(q);
	/* blk_mq_run_flush will run queue */
	if (q->mq_ops)
		return queued;
	return kicked | queued;
}

@@ -230,10 +228,9 @@ static void flush_end_io(struct request *flush_rq, int error)
	struct request *rq, *n;
	unsigned long flags = 0;

	if (q->mq_ops) {
		blk_mq_free_request(flush_rq);
	if (q->mq_ops)
		spin_lock_irqsave(&q->mq_flush_lock, flags);
	}

	running = &q->flush_queue[q->flush_running_idx];
	BUG_ON(q->flush_pending_idx == q->flush_running_idx);

@@ -263,49 +260,14 @@ static void flush_end_io(struct request *flush_rq, int error)
	 * kblockd.
	 */
	if (queued || q->flush_queue_delayed) {
		if (!q->mq_ops)
		WARN_ON(q->mq_ops);
		blk_run_queue_async(q);
		else
		/*
		 * This can be optimized to only run queues with requests
		 * queued if necessary.
		 */
			blk_mq_run_queues(q, true);
	}
	q->flush_queue_delayed = 0;
	if (q->mq_ops)
		spin_unlock_irqrestore(&q->mq_flush_lock, flags);
}

static void mq_flush_work(struct work_struct *work)
{
	struct request_queue *q;
	struct request *rq;

	q = container_of(work, struct request_queue, mq_flush_work);

	/* We don't need set REQ_FLUSH_SEQ, it's for consistency */
	rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
		__GFP_WAIT|GFP_ATOMIC, true);
	rq->cmd_type = REQ_TYPE_FS;
	rq->end_io = flush_end_io;

	blk_mq_run_request(rq, true, false);
}

/*
 * We can't directly use q->flush_rq, because it doesn't have tag and is not in
 * hctx->rqs[]. so we must allocate a new request, since we can't sleep here,
 * so offload the work to workqueue.
 *
 * Note: we assume a flush request finished in any hardware queue will flush
 * the whole disk cache.
 */
static void mq_run_flush(struct request_queue *q)
{
	kblockd_schedule_work(q, &q->mq_flush_work);
}

/**
 * blk_kick_flush - consider issuing flush request
 * @q: request_queue being kicked
@@ -340,19 +302,31 @@ static bool blk_kick_flush(struct request_queue *q)
	 * different from running_idx, which means flush is in flight.
	 */
	q->flush_pending_idx ^= 1;

	if (q->mq_ops) {
		mq_run_flush(q);
		return true;
		struct blk_mq_ctx *ctx = first_rq->mq_ctx;
		struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);

		blk_mq_rq_init(hctx, q->flush_rq);
		q->flush_rq->mq_ctx = ctx;

		/*
		 * Reuse the tag value from the fist waiting request,
		 * with blk-mq the tag is generated during request
		 * allocation and drivers can rely on it being inside
		 * the range they asked for.
		 */
		q->flush_rq->tag = first_rq->tag;
	} else {
		blk_rq_init(q, q->flush_rq);
	}

	blk_rq_init(q, &q->flush_rq);
	q->flush_rq.cmd_type = REQ_TYPE_FS;
	q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
	q->flush_rq.rq_disk = first_rq->rq_disk;
	q->flush_rq.end_io = flush_end_io;
	q->flush_rq->cmd_type = REQ_TYPE_FS;
	q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
	q->flush_rq->rq_disk = first_rq->rq_disk;
	q->flush_rq->end_io = flush_end_io;

	list_add_tail(&q->flush_rq.queuelist, &q->queue_head);
	return true;
	return blk_flush_queue_rq(q->flush_rq);
}

static void flush_data_end_io(struct request *rq, int error)
@@ -558,5 +532,4 @@ EXPORT_SYMBOL(blkdev_issue_flush);
void blk_mq_init_flush(struct request_queue *q)
{
	spin_lock_init(&q->mq_flush_lock);
	INIT_WORK(&q->mq_flush_work, mq_flush_work);
}
+8 −0
Original line number Diff line number Diff line
@@ -119,6 +119,14 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,

		atomic_inc(&bb.done);
		submit_bio(type, bio);

		/*
		 * We can loop for a long time in here, if someone does
		 * full device discards (like mkfs). Be nice and allow
		 * us to schedule out to avoid softlocking if preempt
		 * is disabled.
		 */
		cond_resched();
	}
	blk_finish_plug(&plug);

+62 −29
Original line number Diff line number Diff line
@@ -21,6 +21,16 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
	if (!bio)
		return 0;

	/*
	 * This should probably be returning 0, but blk_add_request_payload()
	 * (Christoph!!!!)
	 */
	if (bio->bi_rw & REQ_DISCARD)
		return 1;

	if (bio->bi_rw & REQ_WRITE_SAME)
		return 1;

	fbio = bio;
	cluster = blk_queue_cluster(q);
	seg_size = 0;
@@ -161,30 +171,60 @@ new_segment:
	*bvprv = *bvec;
}

/*
 * map a request to scatterlist, return number of sg entries setup. Caller
 * must make sure sg can hold rq->nr_phys_segments entries
 */
int blk_rq_map_sg(struct request_queue *q, struct request *rq,
		  struct scatterlist *sglist)
static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
			     struct scatterlist *sglist,
			     struct scatterlist **sg)
{
	struct bio_vec bvec, bvprv = { NULL };
	struct req_iterator iter;
	struct scatterlist *sg;
	struct bvec_iter iter;
	int nsegs, cluster;

	nsegs = 0;
	cluster = blk_queue_cluster(q);

	if (bio->bi_rw & REQ_DISCARD) {
		/*
	 * for each bio in rq
		 * This is a hack - drivers should be neither modifying the
		 * biovec, nor relying on bi_vcnt - but because of
		 * blk_add_request_payload(), a discard bio may or may not have
		 * a payload we need to set up here (thank you Christoph) and
		 * bi_vcnt is really the only way of telling if we need to.
		 */
	sg = NULL;
	rq_for_each_segment(bvec, rq, iter) {
		__blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,

		if (bio->bi_vcnt)
			goto single_segment;

		return 0;
	}

	if (bio->bi_rw & REQ_WRITE_SAME) {
single_segment:
		*sg = sglist;
		bvec = bio_iovec(bio);
		sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
		return 1;
	}

	for_each_bio(bio)
		bio_for_each_segment(bvec, bio, iter)
			__blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg,
					     &nsegs, &cluster);
	} /* segments in rq */

	return nsegs;
}

/*
 * map a request to scatterlist, return number of sg entries setup. Caller
 * must make sure sg can hold rq->nr_phys_segments entries
 */
int blk_rq_map_sg(struct request_queue *q, struct request *rq,
		  struct scatterlist *sglist)
{
	struct scatterlist *sg = NULL;
	int nsegs = 0;

	if (rq->bio)
		nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);

	if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
	    (blk_rq_bytes(rq) & q->dma_pad_mask)) {
@@ -230,20 +270,13 @@ EXPORT_SYMBOL(blk_rq_map_sg);
int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
		   struct scatterlist *sglist)
{
	struct bio_vec bvec, bvprv = { NULL };
	struct scatterlist *sg;
	int nsegs, cluster;
	struct bvec_iter iter;

	nsegs = 0;
	cluster = blk_queue_cluster(q);

	sg = NULL;
	bio_for_each_segment(bvec, bio, iter) {
		__blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
				     &nsegs, &cluster);
	} /* segments in bio */
	struct scatterlist *sg = NULL;
	int nsegs;
	struct bio *next = bio->bi_next;
	bio->bi_next = NULL;

	nsegs = __blk_bios_map_sg(q, bio, sglist, &sg);
	bio->bi_next = next;
	if (sg)
		sg_mark_end(sg);

Loading