Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit bfffa1cc authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-4.2/core' of git://git.kernel.dk/linux-block

Pull core block IO update from Jens Axboe:
 "Nothing really major in here, mostly a collection of smaller
  optimizations and cleanups, mixed with various fixes.  In more detail,
  this contains:

   - Addition of policy specific data to blkcg for block cgroups.  From
     Arianna Avanzini.

   - Various cleanups around command types from Christoph.

   - Cleanup of the suspend block I/O path from Christoph.

   - Plugging updates from Shaohua and Jeff Moyer, for blk-mq.

   - Eliminating atomic inc/dec of both remaining IO count and reference
     count in a bio.  From me.

   - Fixes for SG gap and chunk size support for data-less (discards)
     IO, so we can merge these better.  From me.

   - Small restructuring of blk-mq shared tag support, freeing drivers
     from iterating hardware queues.  From Keith Busch.

   - A few cfq-iosched tweaks, from Tahsin Erdogan and me.  Makes the
     IOPS mode the default for non-rotational storage"

* 'for-4.2/core' of git://git.kernel.dk/linux-block: (35 commits)
  cfq-iosched: fix other locations where blkcg_to_cfqgd() can return NULL
  cfq-iosched: fix sysfs oops when attempting to read unconfigured weights
  cfq-iosched: move group scheduling functions under ifdef
  cfq-iosched: fix the setting of IOPS mode on SSDs
  blktrace: Add blktrace.c to BLOCK LAYER in MAINTAINERS file
  block, cgroup: implement policy-specific per-blkcg data
  block: Make CFQ default to IOPS mode on SSDs
  block: add blk_set_queue_dying() to blkdev.h
  blk-mq: Shared tag enhancements
  block: don't honor chunk sizes for data-less IO
  block: only honor SG gap prevention for merges that contain data
  block: fix returnvar.cocci warnings
  block, dm: don't copy bios for request clones
  block: remove management of bi_remaining when restoring original bi_end_io
  block: replace trylock with mutex_lock in blkdev_reread_part()
  block: export blkdev_reread_part() and __blkdev_reread_part()
  suspend: simplify block I/O handling
  block: collapse bio bit space
  block: remove unused BIO_RW_BLOCK and BIO_EOF flags
  block: remove BIO_EOPNOTSUPP
  ...
parents cc8a0a94 ae994ea9
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -2075,6 +2075,7 @@ M: Jens Axboe <axboe@kernel.dk>
T:	git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
S:	Maintained
F:	block/
F:	kernel/trace/blktrace.c

BLOCK2MTD DRIVER
M:	Joern Engel <joern@lazybastard.org>
+2 −2
Original line number Diff line number Diff line
@@ -361,7 +361,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)

	/* Restore original bio completion handler */
	bio->bi_end_io = bip->bip_end_io;
	bio_endio_nodec(bio, error);
	bio_endio(bio, error);
}

/**
@@ -388,7 +388,7 @@ void bio_integrity_endio(struct bio *bio, int error)
	 */
	if (error) {
		bio->bi_end_io = bip->bip_end_io;
		bio_endio_nodec(bio, error);
		bio_endio(bio, error);

		return;
	}
+47 −30
Original line number Diff line number Diff line
@@ -270,8 +270,8 @@ void bio_init(struct bio *bio)
{
	memset(bio, 0, sizeof(*bio));
	bio->bi_flags = 1 << BIO_UPTODATE;
	atomic_set(&bio->bi_remaining, 1);
	atomic_set(&bio->bi_cnt, 1);
	atomic_set(&bio->__bi_remaining, 1);
	atomic_set(&bio->__bi_cnt, 1);
}
EXPORT_SYMBOL(bio_init);

@@ -293,7 +293,7 @@ void bio_reset(struct bio *bio)

	memset(bio, 0, BIO_RESET_BYTES);
	bio->bi_flags = flags | (1 << BIO_UPTODATE);
	atomic_set(&bio->bi_remaining, 1);
	atomic_set(&bio->__bi_remaining, 1);
}
EXPORT_SYMBOL(bio_reset);

@@ -303,6 +303,17 @@ static void bio_chain_endio(struct bio *bio, int error)
	bio_put(bio);
}

/*
 * Increment chain count for the bio. Make sure the CHAIN flag update
 * is visible before the raised count.
 */
static inline void bio_inc_remaining(struct bio *bio)
{
	bio->bi_flags |= (1 << BIO_CHAIN);
	smp_mb__before_atomic();
	atomic_inc(&bio->__bi_remaining);
}

/**
 * bio_chain - chain bio completions
 * @bio: the target bio
@@ -320,7 +331,7 @@ void bio_chain(struct bio *bio, struct bio *parent)

	bio->bi_private = parent;
	bio->bi_end_io	= bio_chain_endio;
	atomic_inc(&parent->bi_remaining);
	bio_inc_remaining(parent);
}
EXPORT_SYMBOL(bio_chain);

@@ -524,14 +535,18 @@ EXPORT_SYMBOL(zero_fill_bio);
 **/
void bio_put(struct bio *bio)
{
	BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
	if (!bio_flagged(bio, BIO_REFFED))
		bio_free(bio);
	else {
		BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));

		/*
		 * last put frees it
		 */
	if (atomic_dec_and_test(&bio->bi_cnt))
		if (atomic_dec_and_test(&bio->__bi_cnt))
			bio_free(bio);
	}
}
EXPORT_SYMBOL(bio_put);

inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
@@ -1741,6 +1756,25 @@ void bio_flush_dcache_pages(struct bio *bi)
EXPORT_SYMBOL(bio_flush_dcache_pages);
#endif

static inline bool bio_remaining_done(struct bio *bio)
{
	/*
	 * If we're not chaining, then ->__bi_remaining is always 1 and
	 * we always end io on the first invocation.
	 */
	if (!bio_flagged(bio, BIO_CHAIN))
		return true;

	BUG_ON(atomic_read(&bio->__bi_remaining) <= 0);

	if (atomic_dec_and_test(&bio->__bi_remaining)) {
		clear_bit(BIO_CHAIN, &bio->bi_flags);
		return true;
	}

	return false;
}

/**
 * bio_endio - end I/O on a bio
 * @bio:	bio
@@ -1758,15 +1792,13 @@ EXPORT_SYMBOL(bio_flush_dcache_pages);
void bio_endio(struct bio *bio, int error)
{
	while (bio) {
		BUG_ON(atomic_read(&bio->bi_remaining) <= 0);

		if (error)
			clear_bit(BIO_UPTODATE, &bio->bi_flags);
		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
			error = -EIO;

		if (!atomic_dec_and_test(&bio->bi_remaining))
			return;
		if (unlikely(!bio_remaining_done(bio)))
			break;

		/*
		 * Need to have a real endio function for chained bios,
@@ -1789,21 +1821,6 @@ void bio_endio(struct bio *bio, int error)
}
EXPORT_SYMBOL(bio_endio);

/**
 * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining
 * @bio:	bio
 * @error:	error, if any
 *
 * For code that has saved and restored bi_end_io; thing hard before using this
 * function, probably you should've cloned the entire bio.
 **/
void bio_endio_nodec(struct bio *bio, int error)
{
	atomic_inc(&bio->bi_remaining);
	bio_endio(bio, error);
}
EXPORT_SYMBOL(bio_endio_nodec);

/**
 * bio_split - split a bio
 * @bio:	bio to split
+81 −11
Original line number Diff line number Diff line
@@ -9,6 +9,10 @@
 *
 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
 * 	              Nauman Rafique <nauman@google.com>
 *
 * For policy-specific per-blkcg data:
 * Copyright (C) 2015 Paolo Valente <paolo.valente@unimore.it>
 *                    Arianna Avanzini <avanzini.arianna@gmail.com>
 */
#include <linux/ioprio.h>
#include <linux/kdev_t.h>
@@ -26,8 +30,7 @@

static DEFINE_MUTEX(blkcg_pol_mutex);

struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT,
			    .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, };
struct blkcg blkcg_root;
EXPORT_SYMBOL_GPL(blkcg_root);

static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
@@ -823,6 +826,8 @@ static struct cgroup_subsys_state *
blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
{
	struct blkcg *blkcg;
	struct cgroup_subsys_state *ret;
	int i;

	if (!parent_css) {
		blkcg = &blkcg_root;
@@ -830,17 +835,49 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
	}

	blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
	if (!blkcg)
		return ERR_PTR(-ENOMEM);
	if (!blkcg) {
		ret = ERR_PTR(-ENOMEM);
		goto free_blkcg;
	}

	for (i = 0; i < BLKCG_MAX_POLS ; i++) {
		struct blkcg_policy *pol = blkcg_policy[i];
		struct blkcg_policy_data *cpd;

		/*
		 * If the policy hasn't been attached yet, wait for it
		 * to be attached before doing anything else. Otherwise,
		 * check if the policy requires any specific per-cgroup
		 * data: if it does, allocate and initialize it.
		 */
		if (!pol || !pol->cpd_size)
			continue;

		BUG_ON(blkcg->pd[i]);
		cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
		if (!cpd) {
			ret = ERR_PTR(-ENOMEM);
			goto free_pd_blkcg;
		}
		blkcg->pd[i] = cpd;
		cpd->plid = i;
		pol->cpd_init_fn(blkcg);
	}

	blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
	blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT;
done:
	spin_lock_init(&blkcg->lock);
	INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
	INIT_HLIST_HEAD(&blkcg->blkg_list);

	return &blkcg->css;

free_pd_blkcg:
	for (i--; i >= 0; i--)
		kfree(blkcg->pd[i]);

free_blkcg:
	kfree(blkcg);
	return ret;
}

/**
@@ -958,8 +995,10 @@ int blkcg_activate_policy(struct request_queue *q,
			  const struct blkcg_policy *pol)
{
	LIST_HEAD(pds);
	LIST_HEAD(cpds);
	struct blkcg_gq *blkg, *new_blkg;
	struct blkg_policy_data *pd, *n;
	struct blkg_policy_data *pd, *nd;
	struct blkcg_policy_data *cpd, *cnd;
	int cnt = 0, ret;
	bool preloaded;

@@ -1003,7 +1042,10 @@ int blkcg_activate_policy(struct request_queue *q,

	spin_unlock_irq(q->queue_lock);

	/* allocate policy_data for all existing blkgs */
	/*
	 * Allocate per-blkg and per-blkcg policy data
	 * for all existing blkgs.
	 */
	while (cnt--) {
		pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
		if (!pd) {
@@ -1011,26 +1053,50 @@ int blkcg_activate_policy(struct request_queue *q,
			goto out_free;
		}
		list_add_tail(&pd->alloc_node, &pds);

		if (!pol->cpd_size)
			continue;
		cpd = kzalloc_node(pol->cpd_size, GFP_KERNEL, q->node);
		if (!cpd) {
			ret = -ENOMEM;
			goto out_free;
		}
		list_add_tail(&cpd->alloc_node, &cpds);
	}

	/*
	 * Install the allocated pds.  With @q bypassing, no new blkg
	 * Install the allocated pds and cpds. With @q bypassing, no new blkg
	 * should have been created while the queue lock was dropped.
	 */
	spin_lock_irq(q->queue_lock);

	list_for_each_entry(blkg, &q->blkg_list, q_node) {
		if (WARN_ON(list_empty(&pds))) {
		if (WARN_ON(list_empty(&pds)) ||
		    WARN_ON(pol->cpd_size && list_empty(&cpds))) {
			/* umm... this shouldn't happen, just abort */
			ret = -ENOMEM;
			goto out_unlock;
		}
		cpd = list_first_entry(&cpds, struct blkcg_policy_data,
				       alloc_node);
		list_del_init(&cpd->alloc_node);
		pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
		list_del_init(&pd->alloc_node);

		/* grab blkcg lock too while installing @pd on @blkg */
		spin_lock(&blkg->blkcg->lock);

		if (!pol->cpd_size)
			goto no_cpd;
		if (!blkg->blkcg->pd[pol->plid]) {
			/* Per-policy per-blkcg data */
			blkg->blkcg->pd[pol->plid] = cpd;
			cpd->plid = pol->plid;
			pol->cpd_init_fn(blkg->blkcg);
		} else { /* must free it as it has already been extracted */
			kfree(cpd);
		}
no_cpd:
		blkg->pd[pol->plid] = pd;
		pd->blkg = blkg;
		pd->plid = pol->plid;
@@ -1045,8 +1111,10 @@ int blkcg_activate_policy(struct request_queue *q,
	spin_unlock_irq(q->queue_lock);
out_free:
	blk_queue_bypass_end(q);
	list_for_each_entry_safe(pd, n, &pds, alloc_node)
	list_for_each_entry_safe(pd, nd, &pds, alloc_node)
		kfree(pd);
	list_for_each_entry_safe(cpd, cnd, &cpds, alloc_node)
		kfree(cpd);
	return ret;
}
EXPORT_SYMBOL_GPL(blkcg_activate_policy);
@@ -1087,6 +1155,8 @@ void blkcg_deactivate_policy(struct request_queue *q,

		kfree(blkg->pd[pol->plid]);
		blkg->pd[pol->plid] = NULL;
		kfree(blkg->blkcg->pd[pol->plid]);
		blkg->blkcg->pd[pol->plid] = NULL;

		spin_unlock(&blkg->blkcg->lock);
	}
+32 −8
Original line number Diff line number Diff line
@@ -23,11 +23,6 @@
/* Max limits for throttle policy */
#define THROTL_IOPS_MAX		UINT_MAX

/* CFQ specific, out here for blkcg->cfq_weight */
#define CFQ_WEIGHT_MIN		10
#define CFQ_WEIGHT_MAX		1000
#define CFQ_WEIGHT_DEFAULT	500

#ifdef CONFIG_BLK_CGROUP

enum blkg_rwstat_type {
@@ -50,9 +45,7 @@ struct blkcg {
	struct blkcg_gq			*blkg_hint;
	struct hlist_head		blkg_list;

	/* TODO: per-policy storage in blkcg */
	unsigned int			cfq_weight;	/* belongs to cfq */
	unsigned int			cfq_leaf_weight;
	struct blkcg_policy_data	*pd[BLKCG_MAX_POLS];
};

struct blkg_stat {
@@ -87,6 +80,24 @@ struct blkg_policy_data {
	struct list_head		alloc_node;
};

/*
 * Policies that need to keep per-blkcg data which is independent
 * from any request_queue associated to it must specify its size
 * with the cpd_size field of the blkcg_policy structure and
 * embed a blkcg_policy_data in it. blkcg core allocates
 * policy-specific per-blkcg structures lazily the first time
 * they are actually needed, so it handles them together with
 * blkgs. cpd_init() is invoked to let each policy handle
 * per-blkcg data.
 */
struct blkcg_policy_data {
	/* the policy id this per-policy data belongs to */
	int				plid;

	/* used during policy activation */
	struct list_head		alloc_node;
};

/* association between a blk cgroup and a request queue */
struct blkcg_gq {
	/* Pointer to the associated request_queue */
@@ -112,6 +123,7 @@ struct blkcg_gq {
	struct rcu_head			rcu_head;
};

typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg);
typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg);
typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg);
typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg);
@@ -122,10 +134,13 @@ struct blkcg_policy {
	int				plid;
	/* policy specific private data size */
	size_t				pd_size;
	/* policy specific per-blkcg data size */
	size_t				cpd_size;
	/* cgroup files for the policy */
	struct cftype			*cftypes;

	/* operations */
	blkcg_pol_init_cpd_fn		*cpd_init_fn;
	blkcg_pol_init_pd_fn		*pd_init_fn;
	blkcg_pol_online_pd_fn		*pd_online_fn;
	blkcg_pol_offline_pd_fn		*pd_offline_fn;
@@ -218,6 +233,12 @@ static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
	return blkg ? blkg->pd[pol->plid] : NULL;
}

static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
						     struct blkcg_policy *pol)
{
	return blkcg ? blkcg->pd[pol->plid] : NULL;
}

/**
 * pdata_to_blkg - get blkg associated with policy private data
 * @pd: policy private data of interest
@@ -564,6 +585,9 @@ struct blkcg;
struct blkg_policy_data {
};

struct blkcg_policy_data {
};

struct blkcg_gq {
};

Loading