Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 80201fe1 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-5.1/block-20190302' of git://git.kernel.dk/linux-block

Pull block layer updates from Jens Axboe:
 "Not a huge amount of changes in this round, the biggest one is that we
  finally have Mings multi-page bvec support merged. Apart from that,
  this pull request contains:

   - Small series that avoids quiescing the queue for sysfs changes that
     match what we currently have (Aleksei)

   - Series of bcache fixes (via Coly)

   - Series of lightnvm fixes (via Mathias)

   - NVMe pull request from Christoph. Nothing major, just SPDX/license
     cleanups, RR mp policy (Hannes), and little fixes (Bart,
     Chaitanya).

   - BFQ series (Paolo)

   - Save blk-mq cpu -> hw queue mapping, removing a pointer indirection
     for the fast path (Jianchao)

   - fops->iopoll() added for async IO polling, this is a feature that
     the upcoming io_uring interface will use (Christoph, me)

   - Partition scan loop fixes (Dongli)

   - mtip32xx conversion from managed resource API (Christoph)

   - cdrom registration race fix (Guenter)

   - MD pull from Song, two minor fixes.

   - Various documentation fixes (Marcos)

   - Multi-page bvec feature. This brings a lot of nice improvements
     with it, like more efficient splitting, larger IOs can be supported
     without growing the bvec table size, and so on. (Ming)

   - Various little fixes to core and drivers"

* tag 'for-5.1/block-20190302' of git://git.kernel.dk/linux-block: (117 commits)
  block: fix updating bio's front segment size
  block: Replace function name in string with __func__
  nbd: propagate genlmsg_reply return code
  floppy: remove set but not used variable 'q'
  null_blk: fix checking for REQ_FUA
  block: fix NULL pointer dereference in register_disk
  fs: fix guard_bio_eod to check for real EOD errors
  blk-mq: use HCTX_TYPE_DEFAULT but not 0 to index blk_mq_tag_set->map
  block: optimize bvec iteration in bvec_iter_advance
  block: introduce mp_bvec_for_each_page() for iterating over page
  block: optimize blk_bio_segment_split for single-page bvec
  block: optimize __blk_segment_map_sg() for single-page bvec
  block: introduce bvec_nth_page()
  iomap: wire up the iopoll method
  block: add bio_set_polled() helper
  block: wire up block device iopoll method
  fs: add an iopoll method to struct file_operations
  loop: set GENHD_FL_NO_PART_SCAN after blkdev_reread_part()
  loop: do not print warn message if partition scan is successful
  block: bounce: make sure that bvec table is updated
  ...
parents 4221b807 aaeee62c
Loading
Loading
Loading
Loading
+25 −0
Original line number Diff line number Diff line
@@ -117,3 +117,28 @@ Other implications:
   size limitations and the limitations of the underlying devices. Thus
   there's no need to define ->merge_bvec_fn() callbacks for individual block
   drivers.

Usage of helpers:
=================

* The following helpers whose names have the suffix of "_all" can only be used
on non-BIO_CLONED bio. They are usually used by filesystem code. Drivers
shouldn't use them because the bio may have been split before it reached the
driver.

	bio_for_each_segment_all()
	bio_first_bvec_all()
	bio_first_page_all()
	bio_last_bvec_all()

* The following helpers iterate over single-page segment. The passed 'struct
bio_vec' will contain a single-page IO vector during the iteration

	bio_for_each_segment()
	bio_for_each_segment_all()

* The following helpers iterate over multi-page bvec. The passed 'struct
bio_vec' will contain a multi-page IO vector during the iteration

	bio_for_each_bvec()
	rq_for_each_bvec()
+3 −0
Original line number Diff line number Diff line
@@ -857,6 +857,7 @@ struct file_operations {
	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
	int (*iopoll)(struct kiocb *kiocb, bool spin);
	int (*iterate) (struct file *, struct dir_context *);
	int (*iterate_shared) (struct file *, struct dir_context *);
	__poll_t (*poll) (struct file *, struct poll_table_struct *);
@@ -902,6 +903,8 @@ otherwise noted.

  write_iter: possibly asynchronous write with iov_iter as source

  iopoll: called when aio wants to poll for completions on HIPRI iocbs

  iterate: called when the VFS needs to read the directory contents

  iterate_shared: called when the VFS needs to read the directory contents
+380 −325
Original line number Diff line number Diff line
@@ -230,11 +230,16 @@ static struct kmem_cache *bfq_pool;
#define BFQ_MIN_TT		(2 * NSEC_PER_MSEC)

/* hw_tag detection: parallel requests threshold and min samples needed. */
#define BFQ_HW_QUEUE_THRESHOLD	4
#define BFQ_HW_QUEUE_THRESHOLD	3
#define BFQ_HW_QUEUE_SAMPLES	32

#define BFQQ_SEEK_THR		(sector_t)(8 * 100)
#define BFQQ_SECT_THR_NONROT	(sector_t)(2 * 32)
#define BFQ_RQ_SEEKY(bfqd, last_pos, rq) \
	(get_sdist(last_pos, rq) >			\
	 BFQQ_SEEK_THR &&				\
	 (!blk_queue_nonrot(bfqd->queue) ||		\
	  blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT))
#define BFQQ_CLOSE_THR		(sector_t)(8 * 1024)
#define BFQQ_SEEKY(bfqq)	(hweight32(bfqq->seek_history) > 19)

@@ -623,26 +628,6 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
		bfqq->pos_root = NULL;
}

/*
 * Tell whether there are active queues with different weights or
 * active groups.
 */
static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd)
{
	/*
	 * For queue weights to differ, queue_weights_tree must contain
	 * at least two nodes.
	 */
	return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
		(bfqd->queue_weights_tree.rb_node->rb_left ||
		 bfqd->queue_weights_tree.rb_node->rb_right)
#ifdef CONFIG_BFQ_GROUP_IOSCHED
	       ) ||
		(bfqd->num_groups_with_pending_reqs > 0
#endif
	       );
}

/*
 * The following function returns true if every queue must receive the
 * same share of the throughput (this condition is used when deciding
@@ -651,25 +636,48 @@ static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd)
 *
 * Such a scenario occurs when:
 * 1) all active queues have the same weight,
 * 2) all active groups at the same level in the groups tree have the same
 *    weight,
 * 2) all active queues belong to the same I/O-priority class,
 * 3) all active groups at the same level in the groups tree have the same
 *    weight,
 * 4) all active groups at the same level in the groups tree have the same
 *    number of children.
 *
 * Unfortunately, keeping the necessary state for evaluating exactly
 * the last two symmetry sub-conditions above would be quite complex
 * and time consuming. Therefore this function evaluates, instead,
 * only the following stronger two sub-conditions, for which it is
 * only the following stronger three sub-conditions, for which it is
 * much easier to maintain the needed state:
 * 1) all active queues have the same weight,
 * 2) there are no active groups.
 * 2) all active queues belong to the same I/O-priority class,
 * 3) there are no active groups.
 * In particular, the last condition is always true if hierarchical
 * support or the cgroups interface are not enabled, thus no state
 * needs to be maintained in this case.
 */
static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
{
	return !bfq_varied_queue_weights_or_active_groups(bfqd);
	/*
	 * For queue weights to differ, queue_weights_tree must contain
	 * at least two nodes.
	 */
	bool varied_queue_weights = !RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
		(bfqd->queue_weights_tree.rb_node->rb_left ||
		 bfqd->queue_weights_tree.rb_node->rb_right);

	bool multiple_classes_busy =
		(bfqd->busy_queues[0] && bfqd->busy_queues[1]) ||
		(bfqd->busy_queues[0] && bfqd->busy_queues[2]) ||
		(bfqd->busy_queues[1] && bfqd->busy_queues[2]);

	/*
	 * For queue weights to differ, queue_weights_tree must contain
	 * at least two nodes.
	 */
	return !(varied_queue_weights || multiple_classes_busy
#ifdef BFQ_GROUP_IOSCHED_ENABLED
	       || bfqd->num_groups_with_pending_reqs > 0
#endif
		);
}

/*
@@ -728,15 +736,14 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
	/*
	 * In the unlucky event of an allocation failure, we just
	 * exit. This will cause the weight of queue to not be
	 * considered in bfq_varied_queue_weights_or_active_groups,
	 * which, in its turn, causes the scenario to be deemed
	 * wrongly symmetric in case bfqq's weight would have been
	 * the only weight making the scenario asymmetric.  On the
	 * bright side, no unbalance will however occur when bfqq
	 * becomes inactive again (the invocation of this function
	 * is triggered by an activation of queue).  In fact,
	 * bfq_weights_tree_remove does nothing if
	 * !bfqq->weight_counter.
	 * considered in bfq_symmetric_scenario, which, in its turn,
	 * causes the scenario to be deemed wrongly symmetric in case
	 * bfqq's weight would have been the only weight making the
	 * scenario asymmetric.  On the bright side, no unbalance will
	 * however occur when bfqq becomes inactive again (the
	 * invocation of this function is triggered by an activation
	 * of queue).  In fact, bfq_weights_tree_remove does nothing
	 * if !bfqq->weight_counter.
	 */
	if (unlikely(!bfqq->weight_counter))
		return;
@@ -747,6 +754,7 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,

inc_counter:
	bfqq->weight_counter->num_active++;
	bfqq->ref++;
}

/*
@@ -771,6 +779,7 @@ void __bfq_weights_tree_remove(struct bfq_data *bfqd,

reset_entity_pointer:
	bfqq->weight_counter = NULL;
	bfq_put_queue(bfqq);
}

/*
@@ -782,9 +791,6 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
{
	struct bfq_entity *entity = bfqq->entity.parent;

	__bfq_weights_tree_remove(bfqd, bfqq,
				  &bfqd->queue_weights_tree);

	for_each_entity(entity) {
		struct bfq_sched_data *sd = entity->my_sched_data;

@@ -818,6 +824,15 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
			bfqd->num_groups_with_pending_reqs--;
		}
	}

	/*
	 * Next function is invoked last, because it causes bfqq to be
	 * freed if the following holds: bfqq is not in service and
	 * has no dispatched request. DO NOT use bfqq after the next
	 * function invocation.
	 */
	__bfq_weights_tree_remove(bfqd, bfqq,
				  &bfqd->queue_weights_tree);
}

/*
@@ -873,7 +888,8 @@ static struct request *bfq_find_next_rq(struct bfq_data *bfqd,
static unsigned long bfq_serv_to_charge(struct request *rq,
					struct bfq_queue *bfqq)
{
	if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1)
	if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1 ||
	    !bfq_symmetric_scenario(bfqq->bfqd))
		return blk_rq_sectors(rq);

	return blk_rq_sectors(rq) * bfq_async_charge_factor;
@@ -907,8 +923,10 @@ static void bfq_updated_next_req(struct bfq_data *bfqd,
		 */
		return;

	new_budget = max_t(unsigned long, bfqq->max_budget,
			   bfq_serv_to_charge(next_rq, bfqq));
	new_budget = max_t(unsigned long,
			   max_t(unsigned long, bfqq->max_budget,
				 bfq_serv_to_charge(next_rq, bfqq)),
			   entity->service);
	if (entity->budget != new_budget) {
		entity->budget = new_budget;
		bfq_log_bfqq(bfqd, bfqq, "updated next rq: new budget %lu",
@@ -1011,7 +1029,8 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,

static int bfqq_process_refs(struct bfq_queue *bfqq)
{
	return bfqq->ref - bfqq->allocated - bfqq->entity.on_st;
	return bfqq->ref - bfqq->allocated - bfqq->entity.on_st -
		(bfqq->weight_counter != NULL);
}

/* Empty burst list and add just bfqq (see comments on bfq_handle_burst) */
@@ -1380,7 +1399,15 @@ static bool bfq_bfqq_update_budg_for_activation(struct bfq_data *bfqd,
{
	struct bfq_entity *entity = &bfqq->entity;

	if (bfq_bfqq_non_blocking_wait_rq(bfqq) && arrived_in_time) {
	/*
	 * In the next compound condition, we check also whether there
	 * is some budget left, because otherwise there is no point in
	 * trying to go on serving bfqq with this same budget: bfqq
	 * would be expired immediately after being selected for
	 * service. This would only cause useless overhead.
	 */
	if (bfq_bfqq_non_blocking_wait_rq(bfqq) && arrived_in_time &&
	    bfq_bfqq_budget_left(bfqq) > 0) {
		/*
		 * We do not clear the flag non_blocking_wait_rq here, as
		 * the latter is used in bfq_activate_bfqq to signal
@@ -2217,14 +2244,15 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
		return NULL;

	/* If there is only one backlogged queue, don't search. */
	if (bfqd->busy_queues == 1)
	if (bfq_tot_busy_queues(bfqd) == 1)
		return NULL;

	in_service_bfqq = bfqd->in_service_queue;

	if (in_service_bfqq && in_service_bfqq != bfqq &&
	    likely(in_service_bfqq != &bfqd->oom_bfqq) &&
	    bfq_rq_close_to_sector(io_struct, request, bfqd->last_position) &&
	    bfq_rq_close_to_sector(io_struct, request,
				   bfqd->in_serv_last_pos) &&
	    bfqq->entity.parent == in_service_bfqq->entity.parent &&
	    bfq_may_be_close_cooperator(bfqq, in_service_bfqq)) {
		new_bfqq = bfq_setup_merge(bfqq, in_service_bfqq);
@@ -2742,7 +2770,7 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq)

	if ((bfqd->rq_in_driver > 0 ||
		now_ns - bfqd->last_completion < BFQ_MIN_TT)
	     && get_sdist(bfqd->last_position, rq) < BFQQ_SEEK_THR)
	    && !BFQ_RQ_SEEKY(bfqd, bfqd->last_position, rq))
		bfqd->sequential_samples++;

	bfqd->tot_sectors_dispatched += blk_rq_sectors(rq);
@@ -2764,6 +2792,8 @@ static void bfq_update_peak_rate(struct bfq_data *bfqd, struct request *rq)
	bfq_update_rate_reset(bfqd, rq);
update_last_values:
	bfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
	if (RQ_BFQQ(rq) == bfqd->in_service_queue)
		bfqd->in_serv_last_pos = bfqd->last_position;
	bfqd->last_dispatch = now_ns;
}

@@ -3274,16 +3304,32 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
		 * requests, then the request pattern is isochronous
		 * (see the comments on the function
		 * bfq_bfqq_softrt_next_start()). Thus we can compute
		 * soft_rt_next_start. If, instead, the queue still
		 * has outstanding requests, then we have to wait for
		 * the completion of all the outstanding requests to
		 * discover whether the request pattern is actually
		 * isochronous.
		 */
		if (bfqq->dispatched == 0)
		 * soft_rt_next_start. And we do it, unless bfqq is in
		 * interactive weight raising. We do not do it in the
		 * latter subcase, for the following reason. bfqq may
		 * be conveying the I/O needed to load a soft
		 * real-time application. Such an application will
		 * actually exhibit a soft real-time I/O pattern after
		 * it finally starts doing its job. But, if
		 * soft_rt_next_start is computed here for an
		 * interactive bfqq, and bfqq had received a lot of
		 * service before remaining with no outstanding
		 * request (likely to happen on a fast device), then
		 * soft_rt_next_start would be assigned such a high
		 * value that, for a very long time, bfqq would be
		 * prevented from being possibly considered as soft
		 * real time.
		 *
		 * If, instead, the queue still has outstanding
		 * requests, then we have to wait for the completion
		 * of all the outstanding requests to discover whether
		 * the request pattern is actually isochronous.
		 */
		if (bfqq->dispatched == 0 &&
		    bfqq->wr_coeff != bfqd->bfq_wr_coeff)
			bfqq->soft_rt_next_start =
				bfq_bfqq_softrt_next_start(bfqd, bfqq);
		else {
		else if (bfqq->dispatched > 0) {
			/*
			 * Schedule an update of soft_rt_next_start to when
			 * the task may be discovered to be isochronous.
@@ -3376,53 +3422,13 @@ static bool bfq_may_expire_for_budg_timeout(struct bfq_queue *bfqq)
		bfq_bfqq_budget_timeout(bfqq);
}

/*
 * For a queue that becomes empty, device idling is allowed only if
 * this function returns true for the queue. As a consequence, since
 * device idling plays a critical role in both throughput boosting and
 * service guarantees, the return value of this function plays a
 * critical role in both these aspects as well.
 *
 * In a nutshell, this function returns true only if idling is
 * beneficial for throughput or, even if detrimental for throughput,
 * idling is however necessary to preserve service guarantees (low
 * latency, desired throughput distribution, ...). In particular, on
 * NCQ-capable devices, this function tries to return false, so as to
 * help keep the drives' internal queues full, whenever this helps the
 * device boost the throughput without causing any service-guarantee
 * issue.
 *
 * In more detail, the return value of this function is obtained by,
 * first, computing a number of boolean variables that take into
 * account throughput and service-guarantee issues, and, then,
 * combining these variables in a logical expression. Most of the
 * issues taken into account are not trivial. We discuss these issues
 * individually while introducing the variables.
 */
static bool bfq_better_to_idle(struct bfq_queue *bfqq)
static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd,
					     struct bfq_queue *bfqq)
{
	struct bfq_data *bfqd = bfqq->bfqd;
	bool rot_without_queueing =
		!blk_queue_nonrot(bfqd->queue) && !bfqd->hw_tag,
		bfqq_sequential_and_IO_bound,
		idling_boosts_thr, idling_boosts_thr_without_issues,
		idling_needed_for_service_guarantees,
		asymmetric_scenario;

	if (bfqd->strict_guarantees)
		return true;

	/*
	 * Idling is performed only if slice_idle > 0. In addition, we
	 * do not idle if
	 * (a) bfqq is async
	 * (b) bfqq is in the idle io prio class: in this case we do
	 * not idle because we want to minimize the bandwidth that
	 * queues in this class can steal to higher-priority queues
	 */
	if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_sync(bfqq) ||
	    bfq_class_idle(bfqq))
		return false;
		idling_boosts_thr;

	bfqq_sequential_and_IO_bound = !BFQQ_SEEKY(bfqq) &&
		bfq_bfqq_IO_bound(bfqq) && bfq_bfqq_has_short_ttime(bfqq);
@@ -3454,8 +3460,7 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
		 bfqq_sequential_and_IO_bound);

	/*
	 * The value of the next variable,
	 * idling_boosts_thr_without_issues, is equal to that of
	 * The return value of this function is equal to that of
	 * idling_boosts_thr, unless a special case holds. In this
	 * special case, described below, idling may cause problems to
	 * weight-raised queues.
@@ -3472,29 +3477,29 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
	 * which enqueue several requests in advance, and further
	 * reorder internally-queued requests.
	 *
	 * For this reason, we force to false the value of
	 * idling_boosts_thr_without_issues if there are weight-raised
	 * busy queues. In this case, and if bfqq is not weight-raised,
	 * this guarantees that the device is not idled for bfqq (if,
	 * instead, bfqq is weight-raised, then idling will be
	 * guaranteed by another variable, see below). Combined with
	 * the timestamping rules of BFQ (see [1] for details), this
	 * behavior causes bfqq, and hence any sync non-weight-raised
	 * queue, to get a lower number of requests served, and thus
	 * to ask for a lower number of requests from the request
	 * pool, before the busy weight-raised queues get served
	 * again. This often mitigates starvation problems in the
	 * presence of heavy write workloads and NCQ, thereby
	 * guaranteeing a higher application and system responsiveness
	 * in these hostile scenarios.
	 */
	idling_boosts_thr_without_issues = idling_boosts_thr &&
	 * For this reason, we force to false the return value if
	 * there are weight-raised busy queues. In this case, and if
	 * bfqq is not weight-raised, this guarantees that the device
	 * is not idled for bfqq (if, instead, bfqq is weight-raised,
	 * then idling will be guaranteed by another variable, see
	 * below). Combined with the timestamping rules of BFQ (see
	 * [1] for details), this behavior causes bfqq, and hence any
	 * sync non-weight-raised queue, to get a lower number of
	 * requests served, and thus to ask for a lower number of
	 * requests from the request pool, before the busy
	 * weight-raised queues get served again. This often mitigates
	 * starvation problems in the presence of heavy write
	 * workloads and NCQ, thereby guaranteeing a higher
	 * application and system responsiveness in these hostile
	 * scenarios.
	 */
	return idling_boosts_thr &&
		bfqd->wr_busy_queues == 0;
}

/*
	 * There is then a case where idling must be performed not
	 * for throughput concerns, but to preserve service
	 * guarantees.
 * There is a case where idling must be performed not for
 * throughput concerns, but to preserve service guarantees.
 *
 * To introduce this case, we can note that allowing the drive
 * to enqueue more than one request at a time, and hence
@@ -3654,35 +3659,70 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
 * to let requests be served in the desired order until all
 * the requests already queued in the device have been served.
 */
	asymmetric_scenario = (bfqq->wr_coeff > 1 &&
			       bfqd->wr_busy_queues < bfqd->busy_queues) ||
static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd,
						 struct bfq_queue *bfqq)
{
	return (bfqq->wr_coeff > 1 &&
		bfqd->wr_busy_queues <
		bfq_tot_busy_queues(bfqd)) ||
		!bfq_symmetric_scenario(bfqd);
}

/*
	 * Finally, there is a case where maximizing throughput is the
	 * best choice even if it may cause unfairness toward
	 * bfqq. Such a case is when bfqq became active in a burst of
	 * queue activations. Queues that became active during a large
	 * burst benefit only from throughput, as discussed in the
	 * comments on bfq_handle_burst. Thus, if bfqq became active
	 * in a burst and not idling the device maximizes throughput,
	 * then the device must no be idled, because not idling the
	 * device provides bfqq and all other queues in the burst with
	 * maximum benefit. Combining this and the above case, we can
	 * now establish when idling is actually needed to preserve
	 * service guarantees.
 * For a queue that becomes empty, device idling is allowed only if
 * this function returns true for that queue. As a consequence, since
 * device idling plays a critical role for both throughput boosting
 * and service guarantees, the return value of this function plays a
 * critical role as well.
 *
 * In a nutshell, this function returns true only if idling is
 * beneficial for throughput or, even if detrimental for throughput,
 * idling is however necessary to preserve service guarantees (low
 * latency, desired throughput distribution, ...). In particular, on
 * NCQ-capable devices, this function tries to return false, so as to
 * help keep the drives' internal queues full, whenever this helps the
 * device boost the throughput without causing any service-guarantee
 * issue.
 *
 * Most of the issues taken into account to get the return value of
 * this function are not trivial. We discuss these issues in the two
 * functions providing the main pieces of information needed by this
 * function.
 */
static bool bfq_better_to_idle(struct bfq_queue *bfqq)
{
	struct bfq_data *bfqd = bfqq->bfqd;
	bool idling_boosts_thr_with_no_issue, idling_needed_for_service_guar;

	if (unlikely(bfqd->strict_guarantees))
		return true;

	/*
	 * Idling is performed only if slice_idle > 0. In addition, we
	 * do not idle if
	 * (a) bfqq is async
	 * (b) bfqq is in the idle io prio class: in this case we do
	 * not idle because we want to minimize the bandwidth that
	 * queues in this class can steal to higher-priority queues
	 */
	idling_needed_for_service_guarantees =
		asymmetric_scenario && !bfq_bfqq_in_large_burst(bfqq);
	if (bfqd->bfq_slice_idle == 0 || !bfq_bfqq_sync(bfqq) ||
	   bfq_class_idle(bfqq))
		return false;

	idling_boosts_thr_with_no_issue =
		idling_boosts_thr_without_issues(bfqd, bfqq);

	idling_needed_for_service_guar =
		idling_needed_for_service_guarantees(bfqd, bfqq);

	/*
	 * We have now all the components we need to compute the
	 * We have now the two components we need to compute the
	 * return value of the function, which is true only if idling
	 * either boosts the throughput (without issues), or is
	 * necessary to preserve service guarantees.
	 */
	return idling_boosts_thr_without_issues ||
		idling_needed_for_service_guarantees;
	return idling_boosts_thr_with_no_issue ||
		idling_needed_for_service_guar;
}

/*
@@ -3934,7 +3974,7 @@ static struct request *bfq_dispatch_rq_from_bfqq(struct bfq_data *bfqd,
	 * belongs to CLASS_IDLE and other queues are waiting for
	 * service.
	 */
	if (!(bfqd->busy_queues > 1 && bfq_class_idle(bfqq)))
	if (!(bfq_tot_busy_queues(bfqd) > 1 && bfq_class_idle(bfqq)))
		goto return_rq;

	bfq_bfqq_expire(bfqd, bfqq, false, BFQQE_BUDGET_EXHAUSTED);
@@ -3952,7 +3992,7 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx)
	 * most a call to dispatch for nothing
	 */
	return !list_empty_careful(&bfqd->dispatch) ||
		bfqd->busy_queues > 0;
		bfq_tot_busy_queues(bfqd) > 0;
}

static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
@@ -4006,9 +4046,10 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
		goto start_rq;
	}

	bfq_log(bfqd, "dispatch requests: %d busy queues", bfqd->busy_queues);
	bfq_log(bfqd, "dispatch requests: %d busy queues",
		bfq_tot_busy_queues(bfqd));

	if (bfqd->busy_queues == 0)
	if (bfq_tot_busy_queues(bfqd) == 0)
		goto exit;

	/*
@@ -4488,10 +4529,7 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq,
		       struct request *rq)
{
	bfqq->seek_history <<= 1;
	bfqq->seek_history |=
		get_sdist(bfqq->last_request_pos, rq) > BFQQ_SEEK_THR &&
		(!blk_queue_nonrot(bfqd->queue) ||
		 blk_rq_sectors(rq) < BFQQ_SECT_THR_NONROT);
	bfqq->seek_history |= BFQ_RQ_SEEKY(bfqd, bfqq->last_request_pos, rq);
}

static void bfq_update_has_short_ttime(struct bfq_data *bfqd,
@@ -4560,28 +4598,31 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq,
		bool budget_timeout = bfq_bfqq_budget_timeout(bfqq);

		/*
		 * There is just this request queued: if the request
		 * is small and the queue is not to be expired, then
		 * just exit.
		 * There is just this request queued: if
		 * - the request is small, and
		 * - we are idling to boost throughput, and
		 * - the queue is not to be expired,
		 * then just exit.
		 *
		 * In this way, if the device is being idled to wait
		 * for a new request from the in-service queue, we
		 * avoid unplugging the device and committing the
		 * device to serve just a small request. On the
		 * contrary, we wait for the block layer to decide
		 * when to unplug the device: hopefully, new requests
		 * will be merged to this one quickly, then the device
		 * will be unplugged and larger requests will be
		 * dispatched.
		 */
		if (small_req && !budget_timeout)
		 * device to serve just a small request. In contrast
		 * we wait for the block layer to decide when to
		 * unplug the device: hopefully, new requests will be
		 * merged to this one quickly, then the device will be
		 * unplugged and larger requests will be dispatched.
		 */
		if (small_req && idling_boosts_thr_without_issues(bfqd, bfqq) &&
		    !budget_timeout)
			return;

		/*
		 * A large enough request arrived, or the queue is to
		 * be expired: in both cases disk idling is to be
		 * stopped, so clear wait_request flag and reset
		 * timer.
		 * A large enough request arrived, or idling is being
		 * performed to preserve service guarantees, or
		 * finally the queue is to be expired: in all these
		 * cases disk idling is to be stopped, so clear
		 * wait_request flag and reset timer.
		 */
		bfq_clear_bfqq_wait_request(bfqq);
		hrtimer_try_to_cancel(&bfqd->idle_slice_timer);
@@ -4607,8 +4648,6 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq)
	bool waiting, idle_timer_disabled = false;

	if (new_bfqq) {
		if (bic_to_bfqq(RQ_BIC(rq), 1) != bfqq)
			new_bfqq = bic_to_bfqq(RQ_BIC(rq), 1);
		/*
		 * Release the request's reference to the old bfqq
		 * and make sure one is taken to the shared queue.
@@ -4751,6 +4790,8 @@ static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx,

static void bfq_update_hw_tag(struct bfq_data *bfqd)
{
	struct bfq_queue *bfqq = bfqd->in_service_queue;

	bfqd->max_rq_in_driver = max_t(int, bfqd->max_rq_in_driver,
				       bfqd->rq_in_driver);

@@ -4763,7 +4804,18 @@ static void bfq_update_hw_tag(struct bfq_data *bfqd)
	 * sum is not exact, as it's not taking into account deactivated
	 * requests.
	 */
	if (bfqd->rq_in_driver + bfqd->queued < BFQ_HW_QUEUE_THRESHOLD)
	if (bfqd->rq_in_driver + bfqd->queued <= BFQ_HW_QUEUE_THRESHOLD)
		return;

	/*
	 * If active queue hasn't enough requests and can idle, bfq might not
	 * dispatch sufficient requests to hardware. Don't zero hw_tag in this
	 * case
	 */
	if (bfqq && bfq_bfqq_has_short_ttime(bfqq) &&
	    bfqq->dispatched + bfqq->queued[0] + bfqq->queued[1] <
	    BFQ_HW_QUEUE_THRESHOLD &&
	    bfqd->rq_in_driver < BFQ_HW_QUEUE_THRESHOLD)
		return;

	if (bfqd->hw_tag_samples++ < BFQ_HW_QUEUE_SAMPLES)
@@ -4834,11 +4886,14 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
	 * isochronous, and both requisites for this condition to hold
	 * are now satisfied, then compute soft_rt_next_start (see the
	 * comments on the function bfq_bfqq_softrt_next_start()). We
	 * schedule this delayed check when bfqq expires, if it still
	 * has in-flight requests.
	 * do not compute soft_rt_next_start if bfqq is in interactive
	 * weight raising (see the comments in bfq_bfqq_expire() for
	 * an explanation). We schedule this delayed update when bfqq
	 * expires, if it still has in-flight requests.
	 */
	if (bfq_bfqq_softrt_update(bfqq) && bfqq->dispatched == 0 &&
	    RB_EMPTY_ROOT(&bfqq->sort_list))
	    RB_EMPTY_ROOT(&bfqq->sort_list) &&
	    bfqq->wr_coeff != bfqd->bfq_wr_coeff)
		bfqq->soft_rt_next_start =
			bfq_bfqq_softrt_next_start(bfqd, bfqq);

Loading