Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 24532f76 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-4.9/block-smp' of git://git.kernel.dk/linux-block

Pull blk-mq CPU hotplug update from Jens Axboe:
 "This is the conversion of blk-mq to the new hotplug state machine"

* 'for-4.9/block-smp' of git://git.kernel.dk/linux-block:
  blk-mq: fixup "Convert to new hotplug state machine"
  blk-mq: Convert to new hotplug state machine
  blk-mq/cpu-notif: Convert to new hotplug state machine
parents 12e3d3cd 97a32864
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
			blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
			blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
			blk-lib.o blk-mq.o blk-mq-tag.o \
			blk-lib.o blk-mq.o blk-mq-tag.o \
			blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
			blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \
			genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
			genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
			badblocks.o partitions/
			badblocks.o partitions/


block/blk-mq-cpu.c

deleted100644 → 0
+0 −67
Original line number Original line Diff line number Diff line
/*
 * CPU notifier helper code for blk-mq
 *
 * Copyright (C) 2013-2014 Jens Axboe
 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/list.h>
#include <linux/llist.h>
#include <linux/smp.h>
#include <linux/cpu.h>

#include <linux/blk-mq.h>
#include "blk-mq.h"

static LIST_HEAD(blk_mq_cpu_notify_list);
static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);

static int blk_mq_main_cpu_notify(struct notifier_block *self,
				  unsigned long action, void *hcpu)
{
	unsigned int cpu = (unsigned long) hcpu;
	struct blk_mq_cpu_notifier *notify;
	int ret = NOTIFY_OK;

	raw_spin_lock(&blk_mq_cpu_notify_lock);

	list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
		ret = notify->notify(notify->data, action, cpu);
		if (ret != NOTIFY_OK)
			break;
	}

	raw_spin_unlock(&blk_mq_cpu_notify_lock);
	return ret;
}

void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
{
	BUG_ON(!notifier->notify);

	raw_spin_lock(&blk_mq_cpu_notify_lock);
	list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
	raw_spin_unlock(&blk_mq_cpu_notify_lock);
}

void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
{
	raw_spin_lock(&blk_mq_cpu_notify_lock);
	list_del(&notifier->list);
	raw_spin_unlock(&blk_mq_cpu_notify_lock);
}

void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
			      int (*fn)(void *, unsigned long, unsigned int),
			      void *data)
{
	notifier->notify = fn;
	notifier->data = data;
}

void __init blk_mq_cpu_init(void)
{
	hotcpu_notifier(blk_mq_main_cpu_notify, 0);
}
+56 −67
Original line number Original line Diff line number Diff line
@@ -1563,11 +1563,13 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
 * software queue to the hw queue dispatch list, and ensure that it
 * software queue to the hw queue dispatch list, and ensure that it
 * gets run.
 * gets run.
 */
 */
static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
{
{
	struct blk_mq_hw_ctx *hctx;
	struct blk_mq_ctx *ctx;
	struct blk_mq_ctx *ctx;
	LIST_HEAD(tmp);
	LIST_HEAD(tmp);


	hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
	ctx = __blk_mq_get_ctx(hctx->queue, cpu);
	ctx = __blk_mq_get_ctx(hctx->queue, cpu);


	spin_lock(&ctx->lock);
	spin_lock(&ctx->lock);
@@ -1578,30 +1580,20 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
	spin_unlock(&ctx->lock);
	spin_unlock(&ctx->lock);


	if (list_empty(&tmp))
	if (list_empty(&tmp))
		return NOTIFY_OK;
		return 0;


	spin_lock(&hctx->lock);
	spin_lock(&hctx->lock);
	list_splice_tail_init(&tmp, &hctx->dispatch);
	list_splice_tail_init(&tmp, &hctx->dispatch);
	spin_unlock(&hctx->lock);
	spin_unlock(&hctx->lock);


	blk_mq_run_hw_queue(hctx, true);
	blk_mq_run_hw_queue(hctx, true);
	return NOTIFY_OK;
	return 0;
}
}


static int blk_mq_hctx_notify(void *data, unsigned long action,
static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
			      unsigned int cpu)
{
{
	struct blk_mq_hw_ctx *hctx = data;
	cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,

					    &hctx->cpuhp_dead);
	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
		return blk_mq_hctx_cpu_offline(hctx, cpu);

	/*
	 * In case of CPU online, tags may be reallocated
	 * in blk_mq_map_swqueue() after mapping is updated.
	 */

	return NOTIFY_OK;
}
}


/* hctx->ctxs will be freed in queue's release handler */
/* hctx->ctxs will be freed in queue's release handler */
@@ -1621,7 +1613,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
	if (set->ops->exit_hctx)
	if (set->ops->exit_hctx)
		set->ops->exit_hctx(hctx, hctx_idx);
		set->ops->exit_hctx(hctx, hctx_idx);


	blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
	blk_mq_remove_cpuhp(hctx);
	blk_free_flush_queue(hctx->fq);
	blk_free_flush_queue(hctx->fq);
	sbitmap_free(&hctx->ctx_map);
	sbitmap_free(&hctx->ctx_map);
}
}
@@ -1668,9 +1660,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
	hctx->queue_num = hctx_idx;
	hctx->queue_num = hctx_idx;
	hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
	hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;


	blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
	cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
					blk_mq_hctx_notify, hctx);
	blk_mq_register_cpu_notifier(&hctx->cpu_notifier);


	hctx->tags = set->tags[hctx_idx];
	hctx->tags = set->tags[hctx_idx];


@@ -1715,8 +1705,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 free_ctxs:
 free_ctxs:
	kfree(hctx->ctxs);
	kfree(hctx->ctxs);
 unregister_cpu_notifier:
 unregister_cpu_notifier:
	blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
	blk_mq_remove_cpuhp(hctx);

	return -1;
	return -1;
}
}


@@ -2089,50 +2078,18 @@ static void blk_mq_queue_reinit(struct request_queue *q,
	blk_mq_sysfs_register(q);
	blk_mq_sysfs_register(q);
}
}


static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
				      unsigned long action, void *hcpu)
{
	struct request_queue *q;
	int cpu = (unsigned long)hcpu;
/*
/*
 * New online cpumask which is going to be set in this hotplug event.
 * New online cpumask which is going to be set in this hotplug event.
 * Declare this cpumasks as global as cpu-hotplug operation is invoked
 * Declare this cpumasks as global as cpu-hotplug operation is invoked
 * one-by-one and dynamically allocating this could result in a failure.
 * one-by-one and dynamically allocating this could result in a failure.
 */
 */
	static struct cpumask online_new;
static struct cpumask cpuhp_online_new;


	/*
static void blk_mq_queue_reinit_work(void)
	 * Before hotadded cpu starts handling requests, new mappings must
{
	 * be established.  Otherwise, these requests in hw queue might
	struct request_queue *q;
	 * never be dispatched.
	 *
	 * For example, there is a single hw queue (hctx) and two CPU queues
	 * (ctx0 for CPU0, and ctx1 for CPU1).
	 *
	 * Now CPU1 is just onlined and a request is inserted into
	 * ctx1->rq_list and set bit0 in pending bitmap as ctx1->index_hw is
	 * still zero.
	 *
	 * And then while running hw queue, flush_busy_ctxs() finds bit0 is
	 * set in pending bitmap and tries to retrieve requests in
	 * hctx->ctxs[0]->rq_list.  But htx->ctxs[0] is a pointer to ctx0,
	 * so the request in ctx1->rq_list is ignored.
	 */
	switch (action & ~CPU_TASKS_FROZEN) {
	case CPU_DEAD:
	case CPU_UP_CANCELED:
		cpumask_copy(&online_new, cpu_online_mask);
		break;
	case CPU_UP_PREPARE:
		cpumask_copy(&online_new, cpu_online_mask);
		cpumask_set_cpu(cpu, &online_new);
		break;
	default:
		return NOTIFY_OK;
	}


	mutex_lock(&all_q_mutex);
	mutex_lock(&all_q_mutex);

	/*
	/*
	 * We need to freeze and reinit all existing queues.  Freezing
	 * We need to freeze and reinit all existing queues.  Freezing
	 * involves synchronous wait for an RCU grace period and doing it
	 * involves synchronous wait for an RCU grace period and doing it
@@ -2153,13 +2110,43 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
	}
	}


	list_for_each_entry(q, &all_q_list, all_q_node)
	list_for_each_entry(q, &all_q_list, all_q_node)
		blk_mq_queue_reinit(q, &online_new);
		blk_mq_queue_reinit(q, &cpuhp_online_new);


	list_for_each_entry(q, &all_q_list, all_q_node)
	list_for_each_entry(q, &all_q_list, all_q_node)
		blk_mq_unfreeze_queue(q);
		blk_mq_unfreeze_queue(q);


	mutex_unlock(&all_q_mutex);
	mutex_unlock(&all_q_mutex);
	return NOTIFY_OK;
}

static int blk_mq_queue_reinit_dead(unsigned int cpu)
{
	cpumask_copy(&cpuhp_online_new, cpu_online_mask);
	blk_mq_queue_reinit_work();
	return 0;
}

/*
 * Before hotadded cpu starts handling requests, new mappings must be
 * established.  Otherwise, these requests in hw queue might never be
 * dispatched.
 *
 * For example, there is a single hw queue (hctx) and two CPU queues (ctx0
 * for CPU0, and ctx1 for CPU1).
 *
 * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list
 * and set bit0 in pending bitmap as ctx1->index_hw is still zero.
 *
 * And then while running hw queue, flush_busy_ctxs() finds bit0 is set in
 * pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list.
 * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list
 * is ignored.
 */
static int blk_mq_queue_reinit_prepare(unsigned int cpu)
{
	cpumask_copy(&cpuhp_online_new, cpu_online_mask);
	cpumask_set_cpu(cpu, &cpuhp_online_new);
	blk_mq_queue_reinit_work();
	return 0;
}
}


static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
@@ -2378,10 +2365,12 @@ void blk_mq_enable_hotplug(void)


static int __init blk_mq_init(void)
static int __init blk_mq_init(void)
{
{
	blk_mq_cpu_init();
	cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,

				blk_mq_hctx_notify_dead);
	hotcpu_notifier(blk_mq_queue_reinit_notify, 0);


	cpuhp_setup_state_nocalls(CPUHP_BLK_MQ_PREPARE, "block/mq:prepare",
				  blk_mq_queue_reinit_prepare,
				  blk_mq_queue_reinit_dead);
	return 0;
	return 0;
}
}
subsys_initcall(blk_mq_init);
subsys_initcall(blk_mq_init);
+0 −7
Original line number Original line Diff line number Diff line
@@ -32,13 +32,6 @@ void blk_mq_wake_waiters(struct request_queue *q);
/*
/*
 * CPU hotplug helpers
 * CPU hotplug helpers
 */
 */
struct blk_mq_cpu_notifier;
void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
			      int (*fn)(void *, unsigned long, unsigned int),
			      void *data);
void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
void blk_mq_cpu_init(void);
void blk_mq_enable_hotplug(void);
void blk_mq_enable_hotplug(void);
void blk_mq_disable_hotplug(void);
void blk_mq_disable_hotplug(void);


+1 −7
Original line number Original line Diff line number Diff line
@@ -7,12 +7,6 @@
struct blk_mq_tags;
struct blk_mq_tags;
struct blk_flush_queue;
struct blk_flush_queue;


struct blk_mq_cpu_notifier {
	struct list_head list;
	void *data;
	int (*notify)(void *data, unsigned long action, unsigned int cpu);
};

struct blk_mq_hw_ctx {
struct blk_mq_hw_ctx {
	struct {
	struct {
		spinlock_t		lock;
		spinlock_t		lock;
@@ -53,7 +47,7 @@ struct blk_mq_hw_ctx {


	struct delayed_work	delay_work;
	struct delayed_work	delay_work;


	struct blk_mq_cpu_notifier	cpu_notifier;
	struct hlist_node	cpuhp_dead;
	struct kobject		kobj;
	struct kobject		kobj;


	unsigned long		poll_considered;
	unsigned long		poll_considered;