blk-mq: Remove generation seqeunce (12f5b931) · Commits · e / devices / android_kernel_fairphone_FP5

block/blk-core.c

+0 −6

Original line number	Original line	Diff line number	Diff line
	@@ -198,12 +198,6 @@ void blk_rq_init(struct request_queue q, struct request rq)
	rq->internal_tag = -1;		rq->internal_tag = -1;
	rq->start_time_ns = ktime_get_ns();		rq->start_time_ns = ktime_get_ns();
	rq->part = NULL;		rq->part = NULL;
	seqcount_init(&rq->gstate_seq);
	u64_stats_init(&rq->aborted_gstate_sync);
	/*
	* See comment of blk_mq_init_request
	*/
	WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);
	}		}
	EXPORT_SYMBOL(blk_rq_init);		EXPORT_SYMBOL(blk_rq_init);

block/blk-mq-debugfs.c

+0 −1

Original line number	Original line	Diff line number	Diff line
	@@ -344,7 +344,6 @@ static const char *const rqf_name[] = {
	RQF_NAME(STATS),		RQF_NAME(STATS),
	RQF_NAME(SPECIAL_PAYLOAD),		RQF_NAME(SPECIAL_PAYLOAD),
	RQF_NAME(ZONE_WRITE_LOCKED),		RQF_NAME(ZONE_WRITE_LOCKED),
	RQF_NAME(MQ_TIMEOUT_EXPIRED),
	RQF_NAME(MQ_POLL_SLEPT),		RQF_NAME(MQ_POLL_SLEPT),
	};		};
	#undef RQF_NAME		#undef RQF_NAME

block/blk-mq.c

+75 −183

Original line number	Original line	Diff line number	Diff line
	@@ -332,6 +332,7 @@ static struct request blk_mq_rq_ctx_init(struct blk_mq_alloc_data data,
	#endif		#endif

	data->ctx->rq_dispatched[op_is_sync(op)]++;		data->ctx->rq_dispatched[op_is_sync(op)]++;
			refcount_set(&rq->ref, 1);
	return rq;		return rq;
	}		}

	@@ -465,13 +466,27 @@ struct request blk_mq_alloc_request_hctx(struct request_queue q,
	}		}
	EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);		EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);

			static void __blk_mq_free_request(struct request *rq)
			{
			struct request_queue *q = rq->q;
			struct blk_mq_ctx *ctx = rq->mq_ctx;
			struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
			const int sched_tag = rq->internal_tag;

			if (rq->tag != -1)
			blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
			if (sched_tag != -1)
			blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
			blk_mq_sched_restart(hctx);
			blk_queue_exit(q);
			}

	void blk_mq_free_request(struct request *rq)		void blk_mq_free_request(struct request *rq)
	{		{
	struct request_queue *q = rq->q;		struct request_queue *q = rq->q;
	struct elevator_queue *e = q->elevator;		struct elevator_queue *e = q->elevator;
	struct blk_mq_ctx *ctx = rq->mq_ctx;		struct blk_mq_ctx *ctx = rq->mq_ctx;
	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);		struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
	const int sched_tag = rq->internal_tag;

	if (rq->rq_flags & RQF_ELVPRIV) {		if (rq->rq_flags & RQF_ELVPRIV) {
	if (e && e->type->ops.mq.finish_request)		if (e && e->type->ops.mq.finish_request)
	@@ -494,13 +509,9 @@ void blk_mq_free_request(struct request *rq)
	if (blk_rq_rl(rq))		if (blk_rq_rl(rq))
	blk_put_rl(blk_rq_rl(rq));		blk_put_rl(blk_rq_rl(rq));

	blk_mq_rq_update_state(rq, MQ_RQ_IDLE);		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
	if (rq->tag != -1)		if (refcount_dec_and_test(&rq->ref))
	blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);		__blk_mq_free_request(rq);
	if (sched_tag != -1)
	blk_mq_put_tag(hctx, hctx->sched_tags, ctx, sched_tag);
	blk_mq_sched_restart(hctx);
	blk_queue_exit(q);
	}		}
	EXPORT_SYMBOL_GPL(blk_mq_free_request);		EXPORT_SYMBOL_GPL(blk_mq_free_request);

	@@ -547,8 +558,9 @@ static void __blk_mq_complete_request(struct request *rq)
	bool shared = false;		bool shared = false;
	int cpu;		int cpu;

	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT);		if (cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) !=
	blk_mq_rq_update_state(rq, MQ_RQ_COMPLETE);		MQ_RQ_IN_FLIGHT)
			return;

	if (rq->internal_tag != -1)		if (rq->internal_tag != -1)
	blk_mq_sched_completed_request(rq);		blk_mq_sched_completed_request(rq);
	@@ -593,36 +605,6 @@ static void hctx_lock(struct blk_mq_hw_ctx hctx, int srcu_idx)
	*srcu_idx = srcu_read_lock(hctx->srcu);		*srcu_idx = srcu_read_lock(hctx->srcu);
	}		}

	static void blk_mq_rq_update_aborted_gstate(struct request *rq, u64 gstate)
	{
	unsigned long flags;

	/*
	* blk_mq_rq_aborted_gstate() is used from the completion path and
	* can thus be called from irq context. u64_stats_fetch in the
	* middle of update on the same CPU leads to lockup. Disable irq
	* while updating.
	*/
	local_irq_save(flags);
	u64_stats_update_begin(&rq->aborted_gstate_sync);
	rq->aborted_gstate = gstate;
	u64_stats_update_end(&rq->aborted_gstate_sync);
	local_irq_restore(flags);
	}

	static u64 blk_mq_rq_aborted_gstate(struct request *rq)
	{
	unsigned int start;
	u64 aborted_gstate;

	do {
	start = u64_stats_fetch_begin(&rq->aborted_gstate_sync);
	aborted_gstate = rq->aborted_gstate;
	} while (u64_stats_fetch_retry(&rq->aborted_gstate_sync, start));

	return aborted_gstate;
	}

	/**		/**
	* blk_mq_complete_request - end I/O on a request		* blk_mq_complete_request - end I/O on a request
	* @rq: the request being processed		* @rq: the request being processed
	@@ -633,28 +615,9 @@ static u64 blk_mq_rq_aborted_gstate(struct request *rq)
	**/		**/
	void blk_mq_complete_request(struct request *rq)		void blk_mq_complete_request(struct request *rq)
	{		{
	struct request_queue *q = rq->q;		if (unlikely(blk_should_fake_timeout(rq->q)))
	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
	int srcu_idx;

	if (unlikely(blk_should_fake_timeout(q)))
	return;		return;

	/*
	* If @rq->aborted_gstate equals the current instance, timeout is
	* claiming @rq and we lost. This is synchronized through
	* hctx_lock(). See blk_mq_timeout_work() for details.
	*
	* Completion path never blocks and we can directly use RCU here
	* instead of hctx_lock() which can be either RCU or SRCU.
	* However, that would complicate paths which want to synchronize
	* against us. Let stay in sync with the issue path so that
	* hctx_lock() covers both issue and completion paths.
	*/
	hctx_lock(hctx, &srcu_idx);
	if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
	__blk_mq_complete_request(rq);		__blk_mq_complete_request(rq);
	hctx_unlock(hctx, srcu_idx);
	}		}
	EXPORT_SYMBOL(blk_mq_complete_request);		EXPORT_SYMBOL(blk_mq_complete_request);

	@@ -683,25 +646,8 @@ void blk_mq_start_request(struct request *rq)

	WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);		WARN_ON_ONCE(blk_mq_rq_state(rq) != MQ_RQ_IDLE);

	/*
	* Mark @rq in-flight which also advances the generation number,
	* and register for timeout. Protect with a seqcount to allow the
	* timeout path to read both @rq->gstate and @rq->deadline
	* coherently.
	*
	* This is the only place where a request is marked in-flight. If
	* the timeout path reads an in-flight @rq->gstate, the
	* @rq->deadline it reads together under @rq->gstate_seq is
	* guaranteed to be the matching one.
	*/
	preempt_disable();
	write_seqcount_begin(&rq->gstate_seq);

	blk_add_timer(rq);		blk_add_timer(rq);
	blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);		WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);

	write_seqcount_end(&rq->gstate_seq);
	preempt_enable();

	if (q->dma_drain_size && blk_rq_bytes(rq)) {		if (q->dma_drain_size && blk_rq_bytes(rq)) {
	/*		/*
	@@ -714,11 +660,6 @@ void blk_mq_start_request(struct request *rq)
	}		}
	EXPORT_SYMBOL(blk_mq_start_request);		EXPORT_SYMBOL(blk_mq_start_request);

	/*
	* When we reach here because queue is busy, it's safe to change the state
	* to IDLE without checking @rq->aborted_gstate because we should still be
	* holding the RCU read lock and thus protected against timeout.
	*/
	static void __blk_mq_requeue_request(struct request *rq)		static void __blk_mq_requeue_request(struct request *rq)
	{		{
	struct request_queue *q = rq->q;		struct request_queue *q = rq->q;
	@@ -728,8 +669,8 @@ static void __blk_mq_requeue_request(struct request *rq)
	trace_block_rq_requeue(q, rq);		trace_block_rq_requeue(q, rq);
	wbt_requeue(q->rq_wb, rq);		wbt_requeue(q->rq_wb, rq);

	if (blk_mq_rq_state(rq) != MQ_RQ_IDLE) {		if (blk_mq_request_started(rq)) {
	blk_mq_rq_update_state(rq, MQ_RQ_IDLE);		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
	if (q->dma_drain_size && blk_rq_bytes(rq))		if (q->dma_drain_size && blk_rq_bytes(rq))
	rq->nr_phys_segments--;		rq->nr_phys_segments--;
	}		}
	@@ -827,33 +768,20 @@ struct request blk_mq_tag_to_rq(struct blk_mq_tags tags, unsigned int tag)
	}		}
	EXPORT_SYMBOL(blk_mq_tag_to_rq);		EXPORT_SYMBOL(blk_mq_tag_to_rq);

	struct blk_mq_timeout_data {
	unsigned long next;
	unsigned int next_set;
	unsigned int nr_expired;
	};

	static void blk_mq_rq_timed_out(struct request *req, bool reserved)		static void blk_mq_rq_timed_out(struct request *req, bool reserved)
	{		{
	const struct blk_mq_ops *ops = req->q->mq_ops;		const struct blk_mq_ops *ops = req->q->mq_ops;
	enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;		enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;

	req->rq_flags \|= RQF_MQ_TIMEOUT_EXPIRED;

	if (ops->timeout)		if (ops->timeout)
	ret = ops->timeout(req, reserved);		ret = ops->timeout(req, reserved);

	switch (ret) {		switch (ret) {
	case BLK_EH_HANDLED:		case BLK_EH_HANDLED:
			if (blk_mq_rq_state(req) == MQ_RQ_IN_FLIGHT)
	__blk_mq_complete_request(req);		__blk_mq_complete_request(req);
	break;		break;
	case BLK_EH_RESET_TIMER:		case BLK_EH_RESET_TIMER:
	/*
	* As nothing prevents from completion happening while
	* ->aborted_gstate is set, this may lead to ignored
	* completions and further spurious timeouts.
	*/
	blk_mq_rq_update_aborted_gstate(req, 0);
	blk_add_timer(req);		blk_add_timer(req);
	break;		break;
	case BLK_EH_NOT_HANDLED:		case BLK_EH_NOT_HANDLED:
	@@ -864,64 +792,65 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
	}		}
	}		}

	static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,		static bool blk_mq_req_expired(struct request rq, unsigned long next)
	struct request rq, void priv, bool reserved)
	{		{
	struct blk_mq_timeout_data *data = priv;		unsigned long deadline;
	unsigned long gstate, deadline;
	int start;

	might_sleep();

	if (rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED)		if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
	return;		return false;

	/* read coherent snapshots of @rq->state_gen and @rq->deadline */
	while (true) {
	start = read_seqcount_begin(&rq->gstate_seq);
	gstate = READ_ONCE(rq->gstate);
	deadline = blk_rq_deadline(rq);		deadline = blk_rq_deadline(rq);
	if (!read_seqcount_retry(&rq->gstate_seq, start))		if (time_after_eq(jiffies, deadline))
	break;		return true;
	cond_resched();
	}

	/* if in-flight && overdue, mark for abortion */		if (*next == 0)
	if ((gstate & MQ_RQ_STATE_MASK) == MQ_RQ_IN_FLIGHT &&		*next = deadline;
	time_after_eq(jiffies, deadline)) {		else if (time_after(*next, deadline))
	blk_mq_rq_update_aborted_gstate(rq, gstate);		*next = deadline;
	data->nr_expired++;		return false;
	hctx->nr_expired++;
	} else if (!data->next_set \|\| time_after(data->next, deadline)) {
	data->next = deadline;
	data->next_set = 1;
	}
	}		}

	static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx,		static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
	struct request rq, void priv, bool reserved)		struct request rq, void priv, bool reserved)
	{		{
			unsigned long *next = priv;

			/*
			* Just do a quick check if it is expired before locking the request in
			* so we're not unnecessarilly synchronizing across CPUs.
			*/
			if (!blk_mq_req_expired(rq, next))
			return;

	/*		/*
	* We marked @rq->aborted_gstate and waited for RCU. If there were		* We have reason to believe the request may be expired. Take a
	* completions that we lost to, they would have finished and		* reference on the request to lock this request lifetime into its
	* updated @rq->gstate by now; otherwise, the completion path is		* currently allocated context to prevent it from being reallocated in
	* now guaranteed to see @rq->aborted_gstate and yield. If		* the event the completion by-passes this timeout handler.
	* @rq->aborted_gstate still matches @rq->gstate, @rq is ours.		*
			* If the reference was already released, then the driver beat the
			* timeout handler to posting a natural completion.
			*/
			if (!refcount_inc_not_zero(&rq->ref))
			return;

			/*
			* The request is now locked and cannot be reallocated underneath the
			* timeout handler's processing. Re-verify this exact request is truly
			* expired; if it is not expired, then the request was completed and
			* reallocated as a new request.
	*/		*/
	if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) &&		if (blk_mq_req_expired(rq, next))
	READ_ONCE(rq->gstate) == rq->aborted_gstate)
	blk_mq_rq_timed_out(rq, reserved);		blk_mq_rq_timed_out(rq, reserved);
			if (refcount_dec_and_test(&rq->ref))
			__blk_mq_free_request(rq);
	}		}

	static void blk_mq_timeout_work(struct work_struct *work)		static void blk_mq_timeout_work(struct work_struct *work)
	{		{
	struct request_queue *q =		struct request_queue *q =
	container_of(work, struct request_queue, timeout_work);		container_of(work, struct request_queue, timeout_work);
	struct blk_mq_timeout_data data = {		unsigned long next = 0;
	.next = 0,
	.next_set = 0,
	.nr_expired = 0,
	};
	struct blk_mq_hw_ctx *hctx;		struct blk_mq_hw_ctx *hctx;
	int i;		int i;

	@@ -941,39 +870,10 @@ static void blk_mq_timeout_work(struct work_struct *work)
	if (!percpu_ref_tryget(&q->q_usage_counter))		if (!percpu_ref_tryget(&q->q_usage_counter))
	return;		return;

	/* scan for the expired ones and set their ->aborted_gstate */		blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next);
	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);

	if (data.nr_expired) {
	bool has_rcu = false;

	/*
	* Wait till everyone sees ->aborted_gstate. The
	* sequential waits for SRCUs aren't ideal. If this ever
	* becomes a problem, we can add per-hw_ctx rcu_head and
	* wait in parallel.
	*/
	queue_for_each_hw_ctx(q, hctx, i) {
	if (!hctx->nr_expired)
	continue;

	if (!(hctx->flags & BLK_MQ_F_BLOCKING))
	has_rcu = true;
	else
	synchronize_srcu(hctx->srcu);

	hctx->nr_expired = 0;
	}
	if (has_rcu)
	synchronize_rcu();

	/* terminate the ones we won */
	blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL);
	}

	if (data.next_set) {		if (next != 0) {
	data.next = blk_rq_timeout(round_jiffies_up(data.next));		mod_timer(&q->timeout, next);
	mod_timer(&q->timeout, data.next);
	} else {		} else {
	/*		/*
	* Request timeouts are handled as a forward rolling timer. If		* Request timeouts are handled as a forward rolling timer. If
	@@ -2049,15 +1949,7 @@ static int blk_mq_init_request(struct blk_mq_tag_set set, struct request rq,
	return ret;		return ret;
	}		}

	seqcount_init(&rq->gstate_seq);		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
	u64_stats_init(&rq->aborted_gstate_sync);
	/*
	* start gstate with gen 1 instead of 0, otherwise it will be equal
	* to aborted_gstate, and be identified timed out by
	* blk_mq_terminate_expired.
	*/
	WRITE_ONCE(rq->gstate, MQ_RQ_GEN_INC);

	return 0;		return 0;
	}		}

block/blk-mq.h

+2 −40

Original line number	Original line	Diff line number	Diff line
	@@ -30,20 +30,6 @@ struct blk_mq_ctx {
	struct kobject kobj;		struct kobject kobj;
	} ____cacheline_aligned_in_smp;		} ____cacheline_aligned_in_smp;

	/*
	* Bits for request->gstate. The lower two bits carry MQ_RQ_* state value
	* and the upper bits the generation number.
	*/
	enum mq_rq_state {
	MQ_RQ_IDLE = 0,
	MQ_RQ_IN_FLIGHT = 1,
	MQ_RQ_COMPLETE = 2,

	MQ_RQ_STATE_BITS = 2,
	MQ_RQ_STATE_MASK = (1 << MQ_RQ_STATE_BITS) - 1,
	MQ_RQ_GEN_INC = 1 << MQ_RQ_STATE_BITS,
	};

	void blk_mq_freeze_queue(struct request_queue *q);		void blk_mq_freeze_queue(struct request_queue *q);
	void blk_mq_free_queue(struct request_queue *q);		void blk_mq_free_queue(struct request_queue *q);
	int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);		int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
	@@ -107,33 +93,9 @@ void blk_mq_release(struct request_queue *q);
	* blk_mq_rq_state() - read the current MQ_RQ_* state of a request		* blk_mq_rq_state() - read the current MQ_RQ_* state of a request
	* @rq: target request.		* @rq: target request.
	*/		*/
	static inline int blk_mq_rq_state(struct request *rq)		static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
	{		{
	return READ_ONCE(rq->gstate) & MQ_RQ_STATE_MASK;		return READ_ONCE(rq->state);
	}

	/**
	* blk_mq_rq_update_state() - set the current MQ_RQ_* state of a request
	* @rq: target request.
	* @state: new state to set.
	*
	* Set @rq's state to @state. The caller is responsible for ensuring that
	* there are no other updaters. A request can transition into IN_FLIGHT
	* only from IDLE and doing so increments the generation number.
	*/
	static inline void blk_mq_rq_update_state(struct request *rq,
	enum mq_rq_state state)
	{
	u64 old_val = READ_ONCE(rq->gstate);
	u64 new_val = (old_val & ~MQ_RQ_STATE_MASK) \| state;

	if (state == MQ_RQ_IN_FLIGHT) {
	WARN_ON_ONCE((old_val & MQ_RQ_STATE_MASK) != MQ_RQ_IDLE);
	new_val += MQ_RQ_GEN_INC;
	}

	/* avoid exposing interim values */
	WRITE_ONCE(rq->gstate, new_val);
	}		}

	static inline struct blk_mq_ctx __blk_mq_get_ctx(struct request_queue q,		static inline struct blk_mq_ctx __blk_mq_get_ctx(struct request_queue q,

block/blk-timeout.c

+0 −1

Original line number	Original line	Diff line number	Diff line
	@@ -214,7 +214,6 @@ void blk_add_timer(struct request *req)
	req->timeout = q->rq_timeout;		req->timeout = q->rq_timeout;

	blk_rq_set_deadline(req, jiffies + req->timeout);		blk_rq_set_deadline(req, jiffies + req->timeout);
	req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED;

	/*		/*
	* Only the non-mq case needs to add the request to a protected list.		* Only the non-mq case needs to add the request to a protected list.