Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 275220f0 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.38/core' of git://git.kernel.dk/linux-2.6-block: (43 commits)
  block: ensure that completion error gets properly traced
  blktrace: add missing probe argument to block_bio_complete
  block cfq: don't use atomic_t for cfq_group
  block cfq: don't use atomic_t for cfq_queue
  block: trace event block fix unassigned field
  block: add internal hd part table references
  block: fix accounting bug on cross partition merges
  kref: add kref_test_and_get
  bio-integrity: mark kintegrityd_wq highpri and CPU intensive
  block: make kblockd_workqueue smarter
  Revert "sd: implement sd_check_events()"
  block: Clean up exit_io_context() source code.
  Fix compile warnings due to missing removal of a 'ret' variable
  fs/block: type signature of major_to_index(int) to major_to_index(unsigned)
  block: convert !IS_ERR(p) && p to !IS_ERR_NOR_NULL(p)
  cfq-iosched: don't check cfqg in choose_service_tree()
  fs/splice: Pull buf->ops->confirm() from splice_from_pipe actors
  cdrom: export cdrom_check_events()
  sd: implement sd_check_events()
  sr: implement sr_check_events()
  ...
parents fe3c560b 81c5e2ae
Loading
Loading
Loading
Loading
+27 −0
Original line number Diff line number Diff line
@@ -89,6 +89,33 @@ Throttling/Upper Limit policy

 Limits for writes can be put using blkio.write_bps_device file.

Hierarchical Cgroups
====================
- Currently none of the IO control policy supports hierarhical groups. But
  cgroup interface does allow creation of hierarhical cgroups and internally
  IO policies treat them as flat hierarchy.

  So this patch will allow creation of cgroup hierarhcy but at the backend
  everything will be treated as flat. So if somebody created a hierarchy like
  as follows.

			root
			/  \
		     test1 test2
			|
		     test3

  CFQ and throttling will practically treat all groups at same level.

				pivot
			     /  |   \  \
			root  test1 test2  test3

  Down the line we can implement hierarchical accounting/control support
  and also introduce a new cgroup file "use_hierarchy" which will control
  whether cgroup hierarchy is viewed as flat or hierarchical by the policy..
  This is how memory controller also has implemented the things.

Various user visible config options
===================================
CONFIG_BLK_CGROUP
+0 −4
Original line number Diff line number Diff line
@@ -1452,10 +1452,6 @@ blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
		goto done;
	}

	/* Currently we do not support hierarchy deeper than two level (0,1) */
	if (parent != cgroup->top_cgroup)
		return ERR_PTR(-EPERM);

	blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
	if (!blkcg)
		return ERR_PTR(-ENOMEM);
+29 −11
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@

#include "blk.h"

EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);

@@ -64,13 +64,27 @@ static void drive_stat_acct(struct request *rq, int new_io)
		return;

	cpu = part_stat_lock();
	part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));

	if (!new_io)
	if (!new_io) {
		part = rq->part;
		part_stat_inc(cpu, part, merges[rw]);
	else {
	} else {
		part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq));
		if (!hd_struct_try_get(part)) {
			/*
			 * The partition is already being removed,
			 * the request will be accounted on the disk only
			 *
			 * We take a reference on disk->part0 although that
			 * partition will never be deleted, so we can treat
			 * it as any other partition.
			 */
			part = &rq->rq_disk->part0;
			hd_struct_get(part);
		}
		part_round_stats(cpu, part);
		part_inc_in_flight(part, rw);
		rq->part = part;
	}

	part_stat_unlock();
@@ -128,6 +142,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
	rq->ref_count = 1;
	rq->start_time = jiffies;
	set_start_time_ns(rq);
	rq->part = NULL;
}
EXPORT_SYMBOL(blk_rq_init);

@@ -1329,7 +1344,7 @@ static inline void blk_partition_remap(struct bio *bio)
		bio->bi_sector += p->start_sect;
		bio->bi_bdev = bdev->bd_contains;

		trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
		trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
				      bdev->bd_dev,
				      bio->bi_sector - p->start_sect);
	}
@@ -1500,7 +1515,7 @@ static inline void __generic_make_request(struct bio *bio)
			goto end_io;

		if (old_sector != -1)
			trace_block_remap(q, bio, old_dev, old_sector);
			trace_block_bio_remap(q, bio, old_dev, old_sector);

		old_sector = bio->bi_sector;
		old_dev = bio->bi_bdev->bd_dev;
@@ -1776,7 +1791,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
		int cpu;

		cpu = part_stat_lock();
		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
		part = req->part;
		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
		part_stat_unlock();
	}
@@ -1796,13 +1811,14 @@ static void blk_account_io_done(struct request *req)
		int cpu;

		cpu = part_stat_lock();
		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
		part = req->part;

		part_stat_inc(cpu, part, ios[rw]);
		part_stat_add(cpu, part, ticks[rw], duration);
		part_round_stats(cpu, part);
		part_dec_in_flight(part, rw);

		hd_struct_put(part);
		part_stat_unlock();
	}
}
@@ -2606,7 +2622,9 @@ int __init blk_dev_init(void)
	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
			sizeof(((struct request *)0)->cmd_flags));

	kblockd_workqueue = create_workqueue("kblockd");
	/* used for unplugging and affects IO latency/throughput - HIGHPRI */
	kblockd_workqueue = alloc_workqueue("kblockd",
					    WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
	if (!kblockd_workqueue)
		panic("Failed to create kblockd\n");

+2 −3
Original line number Diff line number Diff line
@@ -64,7 +64,7 @@ static void cfq_exit(struct io_context *ioc)
	rcu_read_unlock();
}

/* Called by the exitting task */
/* Called by the exiting task */
void exit_io_context(struct task_struct *task)
{
	struct io_context *ioc;
@@ -74,10 +74,9 @@ void exit_io_context(struct task_struct *task)
	task->io_context = NULL;
	task_unlock(task);

	if (atomic_dec_and_test(&ioc->nr_tasks)) {
	if (atomic_dec_and_test(&ioc->nr_tasks))
		cfq_exit(ioc);

	}
	put_io_context(ioc);
}

+2 −1
Original line number Diff line number Diff line
@@ -351,11 +351,12 @@ static void blk_account_io_merge(struct request *req)
		int cpu;

		cpu = part_stat_lock();
		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
		part = req->part;

		part_round_stats(cpu, part);
		part_dec_in_flight(part, rq_data_dir(req));

		hd_struct_put(part);
		part_stat_unlock();
	}
}
Loading