Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c7c22e4d authored by Jens Axboe's avatar Jens Axboe
Browse files

block: add support for IO CPU affinity



This patch adds support for controlling the IO completion CPU of
either all requests on a queue, or on a per-request basis. We export
a sysfs variable (rq_affinity) which, if set, migrates completions
of requests to the CPU that originally submitted it. A bio helper
(bio_set_completion_cpu()) is also added, so that queuers can ask
for completion on that specific CPU.

In testing, this has been show to cut the system time by as much
as 20-40% on synthetic workloads where CPU affinity is desired.

This requires a little help from the architecture, so it'll only
work as designed for archs that are using the new generic smp
helper infrastructure.

Signed-off-by: default avatarJens Axboe <jens.axboe@oracle.com>
parent 18887ad9
Loading
Loading
Loading
Loading
+23 −23
Original line number Diff line number Diff line
@@ -110,7 +110,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
	memset(rq, 0, sizeof(*rq));

	INIT_LIST_HEAD(&rq->queuelist);
	INIT_LIST_HEAD(&rq->donelist);
	rq->cpu = -1;
	rq->q = q;
	rq->sector = rq->hard_sector = (sector_t) -1;
	INIT_HLIST_NODE(&rq->hash);
@@ -322,6 +322,21 @@ void blk_unplug(struct request_queue *q)
}
EXPORT_SYMBOL(blk_unplug);

static void blk_invoke_request_fn(struct request_queue *q)
{
	/*
	 * one level of recursion is ok and is much faster than kicking
	 * the unplug handling
	 */
	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
		q->request_fn(q);
		queue_flag_clear(QUEUE_FLAG_REENTER, q);
	} else {
		queue_flag_set(QUEUE_FLAG_PLUGGED, q);
		kblockd_schedule_work(q, &q->unplug_work);
	}
}

/**
 * blk_start_queue - restart a previously stopped queue
 * @q:    The &struct request_queue in question
@@ -336,18 +351,7 @@ void blk_start_queue(struct request_queue *q)
	WARN_ON(!irqs_disabled());

	queue_flag_clear(QUEUE_FLAG_STOPPED, q);

	/*
	 * one level of recursion is ok and is much faster than kicking
	 * the unplug handling
	 */
	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
		q->request_fn(q);
		queue_flag_clear(QUEUE_FLAG_REENTER, q);
	} else {
		blk_plug_device(q);
		kblockd_schedule_work(q, &q->unplug_work);
	}
	blk_invoke_request_fn(q);
}
EXPORT_SYMBOL(blk_start_queue);

@@ -405,15 +409,8 @@ void __blk_run_queue(struct request_queue *q)
	 * Only recurse once to avoid overrunning the stack, let the unplug
	 * handling reinvoke the handler shortly if we already got there.
	 */
	if (!elv_queue_empty(q)) {
		if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
			q->request_fn(q);
			queue_flag_clear(QUEUE_FLAG_REENTER, q);
		} else {
			blk_plug_device(q);
			kblockd_schedule_work(q, &q->unplug_work);
		}
	}
	if (!elv_queue_empty(q))
		blk_invoke_request_fn(q);
}
EXPORT_SYMBOL(__blk_run_queue);

@@ -1056,6 +1053,7 @@ EXPORT_SYMBOL(blk_put_request);

void init_request_from_bio(struct request *req, struct bio *bio)
{
	req->cpu = bio->bi_comp_cpu;
	req->cmd_type = REQ_TYPE_FS;

	/*
@@ -1198,13 +1196,15 @@ get_rq:
	init_request_from_bio(req, bio);

	spin_lock_irq(q->queue_lock);
	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
	    bio_flagged(bio, BIO_CPU_AFFINE))
		req->cpu = blk_cpu_to_group(smp_processor_id());
	if (elv_queue_empty(q))
		blk_plug_device(q);
	add_request(q, req);
out:
	if (sync)
		__generic_unplug_device(q);

	spin_unlock_irq(q->queue_lock);
	return 0;

+1 −1
Original line number Diff line number Diff line
@@ -443,7 +443,7 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
}
EXPORT_SYMBOL(blk_queue_update_dma_alignment);

static int __init blk_settings_init(void)
int __init blk_settings_init(void)
{
	blk_max_low_pfn = max_low_pfn - 1;
	blk_max_pfn = max_pfn - 1;
+95 −31
Original line number Diff line number Diff line
@@ -13,6 +13,70 @@

static DEFINE_PER_CPU(struct list_head, blk_cpu_done);

/*
 * Softirq action handler - move entries to local list and loop over them
 * while passing them to the queue registered handler.
 */
static void blk_done_softirq(struct softirq_action *h)
{
	struct list_head *cpu_list, local_list;

	local_irq_disable();
	cpu_list = &__get_cpu_var(blk_cpu_done);
	list_replace_init(cpu_list, &local_list);
	local_irq_enable();

	while (!list_empty(&local_list)) {
		struct request *rq;

		rq = list_entry(local_list.next, struct request, csd.list);
		list_del_init(&rq->csd.list);
		rq->q->softirq_done_fn(rq);
	}
}

#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
static void trigger_softirq(void *data)
{
	struct request *rq = data;
	unsigned long flags;
	struct list_head *list;

	local_irq_save(flags);
	list = &__get_cpu_var(blk_cpu_done);
	list_add_tail(&rq->csd.list, list);

	if (list->next == &rq->csd.list)
		raise_softirq_irqoff(BLOCK_SOFTIRQ);

	local_irq_restore(flags);
}

/*
 * Setup and invoke a run of 'trigger_softirq' on the given cpu.
 */
static int raise_blk_irq(int cpu, struct request *rq)
{
	if (cpu_online(cpu)) {
		struct call_single_data *data = &rq->csd;

		data->func = trigger_softirq;
		data->info = rq;
		data->flags = 0;

		__smp_call_function_single(cpu, data);
		return 0;
	}

	return 1;
}
#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
static int raise_blk_irq(int cpu, struct request *rq)
{
	return 1;
}
#endif

static int __cpuinit blk_cpu_notify(struct notifier_block *self,
				    unsigned long action, void *hcpu)
{
@@ -33,33 +97,10 @@ static int __cpuinit blk_cpu_notify(struct notifier_block *self,
	return NOTIFY_OK;
}


static struct notifier_block blk_cpu_notifier __cpuinitdata = {
static struct notifier_block __cpuinitdata blk_cpu_notifier = {
	.notifier_call	= blk_cpu_notify,
};

/*
 * splice the completion data to a local structure and hand off to
 * process_completion_queue() to complete the requests
 */
static void blk_done_softirq(struct softirq_action *h)
{
	struct list_head *cpu_list, local_list;

	local_irq_disable();
	cpu_list = &__get_cpu_var(blk_cpu_done);
	list_replace_init(cpu_list, &local_list);
	local_irq_enable();

	while (!list_empty(&local_list)) {
		struct request *rq;

		rq = list_entry(local_list.next, struct request, donelist);
		list_del_init(&rq->donelist);
		rq->q->softirq_done_fn(rq);
	}
}

/**
 * blk_complete_request - end I/O on a request
 * @req:      the request being processed
@@ -71,25 +112,48 @@ static void blk_done_softirq(struct softirq_action *h)
 *     through a softirq handler. The user must have registered a completion
 *     callback through blk_queue_softirq_done().
 **/

void blk_complete_request(struct request *req)
{
	struct list_head *cpu_list;
	struct request_queue *q = req->q;
	unsigned long flags;
	int ccpu, cpu, group_cpu;

	BUG_ON(!req->q->softirq_done_fn);
	BUG_ON(!q->softirq_done_fn);

	local_irq_save(flags);
	cpu = smp_processor_id();
	group_cpu = blk_cpu_to_group(cpu);

	cpu_list = &__get_cpu_var(blk_cpu_done);
	list_add_tail(&req->donelist, cpu_list);
	/*
	 * Select completion CPU
	 */
	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
		ccpu = req->cpu;
	else
		ccpu = cpu;

	if (ccpu == cpu || ccpu == group_cpu) {
		struct list_head *list;
do_local:
		list = &__get_cpu_var(blk_cpu_done);
		list_add_tail(&req->csd.list, list);

		/*
		 * if the list only contains our just added request,
		 * signal a raise of the softirq. If there are already
		 * entries there, someone already raised the irq but it
		 * hasn't run yet.
		 */
		if (list->next == &req->csd.list)
			raise_softirq_irqoff(BLOCK_SOFTIRQ);
	} else if (raise_blk_irq(ccpu, req))
		goto do_local;

	local_irq_restore(flags);
}
EXPORT_SYMBOL(blk_complete_request);

int __init blk_softirq_init(void)
__init int blk_softirq_init(void)
{
	int i;

+31 −0
Original line number Diff line number Diff line
@@ -156,6 +156,30 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
	return ret;
}

static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
{
	unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);

	return queue_var_show(set != 0, page);
}

static ssize_t
queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
{
	ssize_t ret = -EINVAL;
#if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
	unsigned long val;

	ret = queue_var_store(&val, page, count);
	spin_lock_irq(q->queue_lock);
	if (val)
		queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
	else
		queue_flag_clear(QUEUE_FLAG_SAME_COMP,  q);
	spin_unlock_irq(q->queue_lock);
#endif
	return ret;
}

static struct queue_sysfs_entry queue_requests_entry = {
	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
@@ -197,6 +221,12 @@ static struct queue_sysfs_entry queue_nomerges_entry = {
	.store = queue_nomerges_store,
};

static struct queue_sysfs_entry queue_rq_affinity_entry = {
	.attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
	.show = queue_rq_affinity_show,
	.store = queue_rq_affinity_store,
};

static struct attribute *default_attrs[] = {
	&queue_requests_entry.attr,
	&queue_ra_entry.attr,
@@ -205,6 +235,7 @@ static struct attribute *default_attrs[] = {
	&queue_iosched_entry.attr,
	&queue_hw_sector_size_entry.attr,
	&queue_nomerges_entry.attr,
	&queue_rq_affinity_entry.attr,
	NULL,
};

+12 −0
Original line number Diff line number Diff line
@@ -59,4 +59,16 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)

#endif /* BLK_DEV_INTEGRITY */

static inline int blk_cpu_to_group(int cpu)
{
#ifdef CONFIG_SCHED_MC
	cpumask_t mask = cpu_coregroup_map(cpu);
	return first_cpu(mask);
#elif defined(CONFIG_SCHED_SMT)
	return first_cpu(per_cpu(cpu_sibling_map, cpu));
#else
	return cpu;
#endif
}

#endif
Loading