Merge branch 'for-2.6.39/core' of git://git.kernel.dk/linux-2.6-block (6c510389) · Commits · e / devices / android_kernel_sony_msm8994

Documentation/block/biodoc.txt

+0 −5

Original line number	Original line	Diff line number	Diff line
	@@ -963,11 +963,6 @@ elevator_dispatch_fn* fills the dispatch queue with ready requests.

	elevator_add_req_fn* called to add a new request into the scheduler		elevator_add_req_fn* called to add a new request into the scheduler

	elevator_queue_empty_fn returns true if the merge queue is empty.
	Drivers shouldn't use this, but rather check
	if elv_next_request is NULL (without losing the
	request if one exists!)

	elevator_former_req_fn		elevator_former_req_fn
	elevator_latter_req_fn These return the request before or after the		elevator_latter_req_fn These return the request before or after the
	one specified in disk sort order. Used by the		one specified in disk sort order. Used by the

Documentation/cgroups/blkio-controller.txt

+1 −29

Original line number	Original line	Diff line number	Diff line
	@@ -140,7 +140,7 @@ Proportional weight policy files
	- Specifies per cgroup weight. This is default weight of the group		- Specifies per cgroup weight. This is default weight of the group
	on all the devices until and unless overridden by per device rule.		on all the devices until and unless overridden by per device rule.
	(See blkio.weight_device).		(See blkio.weight_device).
	Currently allowed range of weights is from 100 to 1000.		Currently allowed range of weights is from 10 to 1000.

	- blkio.weight_device		- blkio.weight_device
	- One can specify per cgroup per device rules using this interface.		- One can specify per cgroup per device rules using this interface.
	@@ -343,34 +343,6 @@ Common files among various policies

	CFQ sysfs tunable		CFQ sysfs tunable
	=================		=================
	/sys/block/<disk>/queue/iosched/group_isolation
	-----------------------------------------------

	If group_isolation=1, it provides stronger isolation between groups at the
	expense of throughput. By default group_isolation is 0. In general that
	means that if group_isolation=0, expect fairness for sequential workload
	only. Set group_isolation=1 to see fairness for random IO workload also.

	Generally CFQ will put random seeky workload in sync-noidle category. CFQ
	will disable idling on these queues and it does a collective idling on group
	of such queues. Generally these are slow moving queues and if there is a
	sync-noidle service tree in each group, that group gets exclusive access to
	disk for certain period. That means it will bring the throughput down if
	group does not have enough IO to drive deeper queue depths and utilize disk
	capacity to the fullest in the slice allocated to it. But the flip side is
	that even a random reader should get better latencies and overall throughput
	if there are lots of sequential readers/sync-idle workload running in the
	system.

	If group_isolation=0, then CFQ automatically moves all the random seeky queues
	in the root group. That means there will be no service differentiation for
	that kind of workload. This leads to better throughput as we do collective
	idling on root sync-noidle tree.

	By default one should run with group_isolation=0. If that is not sufficient
	and one wants stronger isolation between groups, then set group_isolation=1
	but this will come at cost of reduced throughput.

	/sys/block/<disk>/queue/iosched/slice_idle		/sys/block/<disk>/queue/iosched/slice_idle
	------------------------------------------		------------------------------------------
	On a faster hardware CFQ can be slow, especially with sequential workload.		On a faster hardware CFQ can be slow, especially with sequential workload.

Documentation/iostats.txt

+8 −9

block/blk-cgroup.c

+15 −1

Original line number	Original line	Diff line number	Diff line
	@@ -371,12 +371,14 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
	}		}
	EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);		EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);

	void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)		void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time,
			unsigned long unaccounted_time)
	{		{
	unsigned long flags;		unsigned long flags;

	spin_lock_irqsave(&blkg->stats_lock, flags);		spin_lock_irqsave(&blkg->stats_lock, flags);
	blkg->stats.time += time;		blkg->stats.time += time;
			blkg->stats.unaccounted_time += unaccounted_time;
	spin_unlock_irqrestore(&blkg->stats_lock, flags);		spin_unlock_irqrestore(&blkg->stats_lock, flags);
	}		}
	EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);		EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
	@@ -604,6 +606,9 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
	return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,		return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
	blkg->stats.sectors, cb, dev);		blkg->stats.sectors, cb, dev);
	#ifdef CONFIG_DEBUG_BLK_CGROUP		#ifdef CONFIG_DEBUG_BLK_CGROUP
			if (type == BLKIO_STAT_UNACCOUNTED_TIME)
			return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
			blkg->stats.unaccounted_time, cb, dev);
	if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {		if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
	uint64_t sum = blkg->stats.avg_queue_size_sum;		uint64_t sum = blkg->stats.avg_queue_size_sum;
	uint64_t samples = blkg->stats.avg_queue_size_samples;		uint64_t samples = blkg->stats.avg_queue_size_samples;
	@@ -1125,6 +1130,9 @@ static int blkiocg_file_read_map(struct cgroup cgrp, struct cftype cft,
	return blkio_read_blkg_stats(blkcg, cft, cb,		return blkio_read_blkg_stats(blkcg, cft, cb,
	BLKIO_STAT_QUEUED, 1);		BLKIO_STAT_QUEUED, 1);
	#ifdef CONFIG_DEBUG_BLK_CGROUP		#ifdef CONFIG_DEBUG_BLK_CGROUP
			case BLKIO_PROP_unaccounted_time:
			return blkio_read_blkg_stats(blkcg, cft, cb,
			BLKIO_STAT_UNACCOUNTED_TIME, 0);
	case BLKIO_PROP_dequeue:		case BLKIO_PROP_dequeue:
	return blkio_read_blkg_stats(blkcg, cft, cb,		return blkio_read_blkg_stats(blkcg, cft, cb,
	BLKIO_STAT_DEQUEUE, 0);		BLKIO_STAT_DEQUEUE, 0);
	@@ -1382,6 +1390,12 @@ struct cftype blkio_files[] = {
	BLKIO_PROP_dequeue),		BLKIO_PROP_dequeue),
	.read_map = blkiocg_file_read_map,		.read_map = blkiocg_file_read_map,
	},		},
			{
			.name = "unaccounted_time",
			.private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
			BLKIO_PROP_unaccounted_time),
			.read_map = blkiocg_file_read_map,
			},
	#endif		#endif
	};		};

block/blk-cgroup.h

+11 −3

Original line number	Original line	Diff line number	Diff line
	@@ -49,6 +49,8 @@ enum stat_type {
	/* All the single valued stats go below this */		/* All the single valued stats go below this */
	BLKIO_STAT_TIME,		BLKIO_STAT_TIME,
	BLKIO_STAT_SECTORS,		BLKIO_STAT_SECTORS,
			/* Time not charged to this cgroup */
			BLKIO_STAT_UNACCOUNTED_TIME,
	#ifdef CONFIG_DEBUG_BLK_CGROUP		#ifdef CONFIG_DEBUG_BLK_CGROUP
	BLKIO_STAT_AVG_QUEUE_SIZE,		BLKIO_STAT_AVG_QUEUE_SIZE,
	BLKIO_STAT_IDLE_TIME,		BLKIO_STAT_IDLE_TIME,
	@@ -81,6 +83,7 @@ enum blkcg_file_name_prop {
	BLKIO_PROP_io_serviced,		BLKIO_PROP_io_serviced,
	BLKIO_PROP_time,		BLKIO_PROP_time,
	BLKIO_PROP_sectors,		BLKIO_PROP_sectors,
			BLKIO_PROP_unaccounted_time,
	BLKIO_PROP_io_service_time,		BLKIO_PROP_io_service_time,
	BLKIO_PROP_io_wait_time,		BLKIO_PROP_io_wait_time,
	BLKIO_PROP_io_merged,		BLKIO_PROP_io_merged,
	@@ -114,6 +117,8 @@ struct blkio_group_stats {
	/* total disk time and nr sectors dispatched by this group */		/* total disk time and nr sectors dispatched by this group */
	uint64_t time;		uint64_t time;
	uint64_t sectors;		uint64_t sectors;
			/* Time not charged to this cgroup */
			uint64_t unaccounted_time;
	uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];		uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
	#ifdef CONFIG_DEBUG_BLK_CGROUP		#ifdef CONFIG_DEBUG_BLK_CGROUP
	/* Sum of number of IOs queued across all samples */		/* Sum of number of IOs queued across all samples */
	@@ -240,7 +245,7 @@ static inline char blkg_path(struct blkio_group blkg) { return NULL; }

	#endif		#endif

	#define BLKIO_WEIGHT_MIN 100		#define BLKIO_WEIGHT_MIN 10
	#define BLKIO_WEIGHT_MAX 1000		#define BLKIO_WEIGHT_MAX 1000
	#define BLKIO_WEIGHT_DEFAULT 500		#define BLKIO_WEIGHT_DEFAULT 500

	@@ -293,7 +298,8 @@ extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
	extern struct blkio_group blkiocg_lookup_group(struct blkio_cgroup blkcg,		extern struct blkio_group blkiocg_lookup_group(struct blkio_cgroup blkcg,
	void *key);		void *key);
	void blkiocg_update_timeslice_used(struct blkio_group *blkg,		void blkiocg_update_timeslice_used(struct blkio_group *blkg,
	unsigned long time);		unsigned long time,
			unsigned long unaccounted_time);
	void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes,		void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes,
	bool direction, bool sync);		bool direction, bool sync);
	void blkiocg_update_completion_stats(struct blkio_group *blkg,		void blkiocg_update_completion_stats(struct blkio_group *blkg,
	@@ -319,7 +325,9 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
	static inline struct blkio_group *		static inline struct blkio_group *
	blkiocg_lookup_group(struct blkio_cgroup blkcg, void key) { return NULL; }		blkiocg_lookup_group(struct blkio_cgroup blkcg, void key) { return NULL; }
	static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,		static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
	unsigned long time) {}		unsigned long time,
			unsigned long unaccounted_time)
			{}
	static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,		static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
	uint64_t bytes, bool direction, bool sync) {}		uint64_t bytes, bool direction, bool sync) {}
	static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,		static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,

Original line number	Original line	Diff line number	Diff line
	I/O statistics fields		I/O statistics fields
	---------------		---------------

	Last modified Sep 30, 2003

	Since 2.4.20 (and some versions before, with patches), and 2.5.45,		Since 2.4.20 (and some versions before, with patches), and 2.5.45,
	more extensive disk statistics have been introduced to help measure disk		more extensive disk statistics have been introduced to help measure disk
	activity. Tools such as sar and iostat typically interpret these and do		activity. Tools such as sar and iostat typically interpret these and do
	@@ -46,11 +44,12 @@ the above example, the first field of statistics would be 446216.
	By contrast, in 2.6 if you look at /sys/block/hda/stat, you'll		By contrast, in 2.6 if you look at /sys/block/hda/stat, you'll
	find just the eleven fields, beginning with 446216. If you look at		find just the eleven fields, beginning with 446216. If you look at
	/proc/diskstats, the eleven fields will be preceded by the major and		/proc/diskstats, the eleven fields will be preceded by the major and
	minor device numbers, and device name. Each of these formats provide		minor device numbers, and device name. Each of these formats provides
	eleven fields of statistics, each meaning exactly the same things.		eleven fields of statistics, each meaning exactly the same things.
	All fields except field 9 are cumulative since boot. Field 9 should		All fields except field 9 are cumulative since boot. Field 9 should
	go to zero as I/Os complete; all others only increase. Yes, these are		go to zero as I/Os complete; all others only increase (unless they
	32 bit unsigned numbers, and on a very busy or long-lived system they		overflow and wrap). Yes, these are (32-bit or 64-bit) unsigned long
			(native word size) numbers, and on a very busy or long-lived system they
	may wrap. Applications should be prepared to deal with that; unless		may wrap. Applications should be prepared to deal with that; unless
	your observations are measured in large numbers of minutes or hours,		your observations are measured in large numbers of minutes or hours,
	they should not wrap twice before you notice them.		they should not wrap twice before you notice them.
	@@ -96,11 +95,11 @@ introduced when changes collide, so (for instance) adding up all the
	read I/Os issued per partition should equal those made to the disks ...		read I/Os issued per partition should equal those made to the disks ...
	but due to the lack of locking it may only be very close.		but due to the lack of locking it may only be very close.

	In 2.6, there are counters for each cpu, which made the lack of locking		In 2.6, there are counters for each CPU, which make the lack of locking
	almost a non-issue. When the statistics are read, the per-cpu counters		almost a non-issue. When the statistics are read, the per-CPU counters
	are summed (possibly overflowing the unsigned 32-bit variable they are		are summed (possibly overflowing the unsigned long variable they are
	summed to) and the result given to the user. There is no convenient		summed to) and the result given to the user. There is no convenient
	user interface for accessing the per-cpu counters themselves.		user interface for accessing the per-CPU counters themselves.

	Disks vs Partitions		Disks vs Partitions
	-------------------		-------------------