Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c5fdd531 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 - fix for a memory leak on certain unplug events
 - a collection of bcache fixes from Kent and Nicolas
 - a few null_blk fixes and updates form Matias
 - a marking of static of functions in the stec pci-e driver

* 'for-linus' of git://git.kernel.dk/linux-block:
  null_blk: support submit_queues on use_per_node_hctx
  null_blk: set use_per_node_hctx param to false
  null_blk: corrections to documentation
  null_blk: warning on ignored submit_queues param
  null_blk: refactor init and init errors code paths
  null_blk: documentation
  null_blk: mem garbage on NUMA systems during init
  drivers: block: Mark the functions as static in skd_main.c
  bcache: New writeback PD controller
  bcache: bugfix for race between moving_gc and bucket_invalidate
  bcache: fix for gc and writeback race
  bcache: bugfix - moving_gc now moves only correct buckets
  bcache: fix for gc crashing when no sectors are used
  bcache: Fix heap_peek() macro
  bcache: Fix for can_attach_cache()
  bcache: Fix dirty_data accounting
  bcache: Use uninterruptible sleep in writeback
  bcache: kthread don't set writeback task to INTERUPTIBLE
  block: fix memory leaks on unplugging block device
  bcache: fix sparse non static symbol warning
parents 70e672fa fc1bc354
Loading
Loading
Loading
Loading
+72 −0
Original line number Diff line number Diff line
Null block device driver
================================================================================

I. Overview

The null block device (/dev/nullb*) is used for benchmarking the various
block-layer implementations. It emulates a block device of X gigabytes in size.
The following instances are possible:

  Single-queue block-layer
    - Request-based.
    - Single submission queue per device.
    - Implements IO scheduling algorithms (CFQ, Deadline, noop).
  Multi-queue block-layer
    - Request-based.
    - Configurable submission queues per device.
  No block-layer (Known as bio-based)
    - Bio-based. IO requests are submitted directly to the device driver.
    - Directly accepts bio data structure and returns them.

All of them have a completion queue for each core in the system.

II. Module parameters applicable for all instances:

queue_mode=[0-2]: Default: 2-Multi-queue
  Selects which block-layer the module should instantiate with.

  0: Bio-based.
  1: Single-queue.
  2: Multi-queue.

home_node=[0--nr_nodes]: Default: NUMA_NO_NODE
  Selects what CPU node the data structures are allocated from.

gb=[Size in GB]: Default: 250GB
  The size of the device reported to the system.

bs=[Block size (in bytes)]: Default: 512 bytes
  The block size reported to the system.

nr_devices=[Number of devices]: Default: 2
  Number of block devices instantiated. They are instantiated as /dev/nullb0,
  etc.

irq_mode=[0-2]: Default: 1-Soft-irq
  The completion mode used for completing IOs to the block-layer.

  0: None.
  1: Soft-irq. Uses IPI to complete IOs across CPU nodes. Simulates the overhead
     when IOs are issued from another CPU node than the home the device is
     connected to.
  2: Timer: Waits a specific period (completion_nsec) for each IO before
     completion.

completion_nsec=[ns]: Default: 10.000ns
  Combined with irq_mode=2 (timer). The time each completion event must wait.

submit_queues=[0..nr_cpus]:
  The number of submission queues attached to the device driver. If unset, it
  defaults to 1 on single-queue and bio-based instances. For multi-queue,
  it is ignored when use_per_node_hctx module parameter is 1.

hw_queue_depth=[0..qdepth]: Default: 64
  The hardware queue depth of the device.

III: Multi-queue specific parameters

use_per_node_hctx=[0/1]: Default: 0
  0: The number of submit queues are set to the value of the submit_queues
     parameter.
  1: The multi-queue block layer is instantiated with a hardware dispatch
     queue for each CPU node in the system.
+13 −0
Original line number Diff line number Diff line
@@ -335,9 +335,22 @@ static struct kobj_type blk_mq_hw_ktype = {
void blk_mq_unregister_disk(struct gendisk *disk)
{
	struct request_queue *q = disk->queue;
	struct blk_mq_hw_ctx *hctx;
	struct blk_mq_ctx *ctx;
	int i, j;

	queue_for_each_hw_ctx(q, hctx, i) {
		hctx_for_each_ctx(hctx, ctx, j) {
			kobject_del(&ctx->kobj);
			kobject_put(&ctx->kobj);
		}
		kobject_del(&hctx->kobj);
		kobject_put(&hctx->kobj);
	}

	kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
	kobject_del(&q->mq_kobj);
	kobject_put(&q->mq_kobj);

	kobject_put(&disk_to_dev(disk)->kobj);
}
+76 −26
Original line number Diff line number Diff line
#include <linux/module.h>

#include <linux/moduleparam.h>
#include <linux/sched.h>
#include <linux/fs.h>
@@ -65,7 +66,7 @@ enum {
	NULL_Q_MQ		= 2,
};

static int submit_queues = 1;
static int submit_queues;
module_param(submit_queues, int, S_IRUGO);
MODULE_PARM_DESC(submit_queues, "Number of submission queues");

@@ -101,9 +102,9 @@ static int hw_queue_depth = 64;
module_param(hw_queue_depth, int, S_IRUGO);
MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");

static bool use_per_node_hctx = true;
static bool use_per_node_hctx = false;
module_param(use_per_node_hctx, bool, S_IRUGO);
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: true");
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");

static void put_tag(struct nullb_queue *nq, unsigned int tag)
{
@@ -346,8 +347,37 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)

static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index)
{
	return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL,
				hctx_index);
	int b_size = DIV_ROUND_UP(reg->nr_hw_queues, nr_online_nodes);
	int tip = (reg->nr_hw_queues % nr_online_nodes);
	int node = 0, i, n;

	/*
	 * Split submit queues evenly wrt to the number of nodes. If uneven,
	 * fill the first buckets with one extra, until the rest is filled with
	 * no extra.
	 */
	for (i = 0, n = 1; i < hctx_index; i++, n++) {
		if (n % b_size == 0) {
			n = 0;
			node++;

			tip--;
			if (!tip)
				b_size = reg->nr_hw_queues / nr_online_nodes;
		}
	}

	/*
	 * A node might not be online, therefore map the relative node id to the
	 * real node id.
	 */
	for_each_online_node(n) {
		if (!node)
			break;
		node--;
	}

	return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, n);
}

static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index)
@@ -355,16 +385,24 @@ static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index)
	kfree(hctx);
}

static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
{
	BUG_ON(!nullb);
	BUG_ON(!nq);

	init_waitqueue_head(&nq->wait);
	nq->queue_depth = nullb->queue_depth;
}

static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
			  unsigned int index)
{
	struct nullb *nullb = data;
	struct nullb_queue *nq = &nullb->queues[index];

	init_waitqueue_head(&nq->wait);
	nq->queue_depth = nullb->queue_depth;
	nullb->nr_queues++;
	hctx->driver_data = nq;
	null_init_queue(nullb, nq);
	nullb->nr_queues++;

	return 0;
}
@@ -417,13 +455,13 @@ static int setup_commands(struct nullb_queue *nq)

	nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL);
	if (!nq->cmds)
		return 1;
		return -ENOMEM;

	tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
	nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL);
	if (!nq->tag_map) {
		kfree(nq->cmds);
		return 1;
		return -ENOMEM;
	}

	for (i = 0; i < nq->queue_depth; i++) {
@@ -454,33 +492,37 @@ static void cleanup_queues(struct nullb *nullb)

static int setup_queues(struct nullb *nullb)
{
	struct nullb_queue *nq;
	int i;

	nullb->queues = kzalloc(submit_queues * sizeof(*nq), GFP_KERNEL);
	nullb->queues = kzalloc(submit_queues * sizeof(struct nullb_queue),
								GFP_KERNEL);
	if (!nullb->queues)
		return 1;
		return -ENOMEM;

	nullb->nr_queues = 0;
	nullb->queue_depth = hw_queue_depth;

	if (queue_mode == NULL_Q_MQ)
	return 0;
}

static int init_driver_queues(struct nullb *nullb)
{
	struct nullb_queue *nq;
	int i, ret = 0;

	for (i = 0; i < submit_queues; i++) {
		nq = &nullb->queues[i];
		init_waitqueue_head(&nq->wait);
		nq->queue_depth = hw_queue_depth;
		if (setup_commands(nq))
			break;

		null_init_queue(nullb, nq);

		ret = setup_commands(nq);
		if (ret)
			goto err_queue;
		nullb->nr_queues++;
	}

	if (i == submit_queues)
	return 0;

err_queue:
	cleanup_queues(nullb);
	return 1;
	return ret;
}

static int null_add_dev(void)
@@ -518,11 +560,13 @@ static int null_add_dev(void)
	} else if (queue_mode == NULL_Q_BIO) {
		nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
		blk_queue_make_request(nullb->q, null_queue_bio);
		init_driver_queues(nullb);
	} else {
		nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
		blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
		if (nullb->q)
			blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
		init_driver_queues(nullb);
	}

	if (!nullb->q)
@@ -579,7 +623,13 @@ static int __init null_init(void)
	}
#endif

	if (submit_queues > nr_cpu_ids)
	if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
		if (submit_queues < nr_online_nodes) {
			pr_warn("null_blk: submit_queues param is set to %u.",
							nr_online_nodes);
			submit_queues = nr_online_nodes;
		}
	} else if (submit_queues > nr_cpu_ids)
		submit_queues = nr_cpu_ids;
	else if (!submit_queues)
		submit_queues = 1;
+2 −2
Original line number Diff line number Diff line
@@ -5269,7 +5269,7 @@ const char *skd_skdev_state_to_str(enum skd_drvr_state state)
	}
}

const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
static const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
{
	switch (state) {
	case SKD_MSG_STATE_IDLE:
@@ -5281,7 +5281,7 @@ const char *skd_skmsg_state_to_str(enum skd_fit_msg_state state)
	}
}

const char *skd_skreq_state_to_str(enum skd_req_state state)
static const char *skd_skreq_state_to_str(enum skd_req_state state)
{
	switch (state) {
	case SKD_REQ_STATE_IDLE:
+2 −0
Original line number Diff line number Diff line
@@ -421,9 +421,11 @@ out:

	if (watermark <= WATERMARK_METADATA) {
		SET_GC_MARK(b, GC_MARK_METADATA);
		SET_GC_MOVE(b, 0);
		b->prio = BTREE_PRIO;
	} else {
		SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
		SET_GC_MOVE(b, 0);
		b->prio = INITIAL_PRIO;
	}

Loading