Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c9959059 authored by Tejun Heo's avatar Tejun Heo Committed by Jens Axboe
Browse files

block: fix diskstats access



There are two variants of stat functions - ones prefixed with double
underbars which don't care about preemption and ones without which
disable preemption before manipulating per-cpu counters.  It's unclear
whether the underbarred ones assume that preemtion is disabled on
entry as some callers don't do that.

This patch unifies diskstats access by implementing disk_stat_lock()
and disk_stat_unlock() which take care of both RCU (for partition
access) and preemption (for per-cpu counter access).  diskstats access
should always be enclosed between the two functions.  As such, there's
no need for the versions which disables preemption.  They're removed
and double underbars ones are renamed to drop the underbars.  As an
extra argument is added, there's no danger of using the old version
unconverted.

disk_stat_lock() uses get_cpu() and returns the cpu index and all
diskstat functions which access per-cpu counters now has @cpu
argument to help RT.

This change adds RCU or preemption operations at some places but also
collapses several preemption ops into one at others.  Overall, the
performance difference should be negligible as all involved ops are
very lightweight per-cpu ones.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: default avatarJens Axboe <jens.axboe@oracle.com>
parent e71bf0d0
Loading
Loading
Loading
Loading
+27 −25
Original line number Diff line number Diff line
@@ -56,25 +56,26 @@ static void drive_stat_acct(struct request *rq, int new_io)
{
	struct hd_struct *part;
	int rw = rq_data_dir(rq);
	int cpu;

	if (!blk_fs_request(rq) || !rq->rq_disk)
		return;

	rcu_read_lock();

	cpu = disk_stat_lock();
	part = disk_map_sector_rcu(rq->rq_disk, rq->sector);

	if (!new_io)
		__all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector);
		all_stat_inc(cpu, rq->rq_disk, part, merges[rw], rq->sector);
	else {
		disk_round_stats(rq->rq_disk);
		disk_round_stats(cpu, rq->rq_disk);
		rq->rq_disk->in_flight++;
		if (part) {
			part_round_stats(part);
			part_round_stats(cpu, part);
			part->in_flight++;
		}
	}

	rcu_read_unlock();
	disk_stat_unlock();
}

void blk_queue_congestion_threshold(struct request_queue *q)
@@ -997,7 +998,7 @@ static inline void add_request(struct request_queue *q, struct request *req)
 * /proc/diskstats.  This accounts immediately for all queue usage up to
 * the current jiffies and restarts the counters again.
 */
void disk_round_stats(struct gendisk *disk)
void disk_round_stats(int cpu, struct gendisk *disk)
{
	unsigned long now = jiffies;

@@ -1005,15 +1006,15 @@ void disk_round_stats(struct gendisk *disk)
		return;

	if (disk->in_flight) {
		__disk_stat_add(disk, time_in_queue,
		disk_stat_add(cpu, disk, time_in_queue,
			      disk->in_flight * (now - disk->stamp));
		__disk_stat_add(disk, io_ticks, (now - disk->stamp));
		disk_stat_add(cpu, disk, io_ticks, (now - disk->stamp));
	}
	disk->stamp = now;
}
EXPORT_SYMBOL_GPL(disk_round_stats);

void part_round_stats(struct hd_struct *part)
void part_round_stats(int cpu, struct hd_struct *part)
{
	unsigned long now = jiffies;

@@ -1021,9 +1022,9 @@ void part_round_stats(struct hd_struct *part)
		return;

	if (part->in_flight) {
		__part_stat_add(part, time_in_queue,
		part_stat_add(cpu, part, time_in_queue,
			      part->in_flight * (now - part->stamp));
		__part_stat_add(part, io_ticks, (now - part->stamp));
		part_stat_add(cpu, part, io_ticks, (now - part->stamp));
	}
	part->stamp = now;
}
@@ -1563,12 +1564,13 @@ static int __end_that_request_first(struct request *req, int error,
	if (blk_fs_request(req) && req->rq_disk) {
		const int rw = rq_data_dir(req);
		struct hd_struct *part;
		int cpu;

		rcu_read_lock();
		cpu = disk_stat_lock();
		part = disk_map_sector_rcu(req->rq_disk, req->sector);
		all_stat_add(req->rq_disk, part, sectors[rw],
		all_stat_add(cpu, req->rq_disk, part, sectors[rw],
			     nr_bytes >> 9, req->sector);
		rcu_read_unlock();
		disk_stat_unlock();
	}

	total_bytes = bio_nbytes = 0;
@@ -1753,21 +1755,21 @@ static void end_that_request_last(struct request *req, int error)
		unsigned long duration = jiffies - req->start_time;
		const int rw = rq_data_dir(req);
		struct hd_struct *part;
		int cpu;

		rcu_read_lock();

		cpu = disk_stat_lock();
		part = disk_map_sector_rcu(disk, req->sector);

		__all_stat_inc(disk, part, ios[rw], req->sector);
		__all_stat_add(disk, part, ticks[rw], duration, req->sector);
		disk_round_stats(disk);
		all_stat_inc(cpu, disk, part, ios[rw], req->sector);
		all_stat_add(cpu, disk, part, ticks[rw], duration, req->sector);
		disk_round_stats(cpu, disk);
		disk->in_flight--;
		if (part) {
			part_round_stats(part);
			part_round_stats(cpu, part);
			part->in_flight--;
		}

		rcu_read_unlock();
		disk_stat_unlock();
	}

	if (req->end_io)
+6 −5
Original line number Diff line number Diff line
@@ -388,18 +388,19 @@ static int attempt_merge(struct request_queue *q, struct request *req,

	if (req->rq_disk) {
		struct hd_struct *part;
		int cpu;

		rcu_read_lock();

		cpu = disk_stat_lock();
		part = disk_map_sector_rcu(req->rq_disk, req->sector);
		disk_round_stats(req->rq_disk);

		disk_round_stats(cpu, req->rq_disk);
		req->rq_disk->in_flight--;
		if (part) {
			part_round_stats(part);
			part_round_stats(cpu, part);
			part->in_flight--;
		}

		rcu_read_unlock();
		disk_stat_unlock();
	}

	req->ioprio = ioprio_best(req->ioprio, next->ioprio);
+11 −9
Original line number Diff line number Diff line
@@ -633,10 +633,11 @@ static ssize_t disk_stat_show(struct device *dev,
			      struct device_attribute *attr, char *buf)
{
	struct gendisk *disk = dev_to_disk(dev);
	int cpu;

	preempt_disable();
	disk_round_stats(disk);
	preempt_enable();
	cpu = disk_stat_lock();
	disk_round_stats(cpu, disk);
	disk_stat_unlock();
	return sprintf(buf,
		"%8lu %8lu %8llu %8u "
		"%8lu %8lu %8llu %8u "
@@ -749,6 +750,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
	struct disk_part_iter piter;
	struct hd_struct *hd;
	char buf[BDEVNAME_SIZE];
	int cpu;

	/*
	if (&gp->dev.kobj.entry == block_class.devices.next)
@@ -758,9 +760,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
				"\n\n");
	*/
 
	preempt_disable();
	disk_round_stats(gp);
	preempt_enable();
	cpu = disk_stat_lock();
	disk_round_stats(cpu, gp);
	disk_stat_unlock();
	seq_printf(seqf, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n",
		MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)),
		disk_name(gp, 0, buf),
@@ -777,9 +779,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
	/* now show all non-0 size partitions of it */
	disk_part_iter_init(&piter, gp, 0);
	while ((hd = disk_part_iter_next(&piter))) {
		preempt_disable();
		part_round_stats(hd);
		preempt_enable();
		cpu = disk_stat_lock();
		part_round_stats(cpu, hd);
		disk_stat_unlock();
		seq_printf(seqf, "%4d %4d %s %lu %lu %llu "
			   "%u %lu %lu %llu %u %u %u %u\n",
			   MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
+8 −7
Original line number Diff line number Diff line
@@ -756,16 +756,17 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector
	unsigned long n_sect = bio->bi_size >> 9;
	const int rw = bio_data_dir(bio);
	struct hd_struct *part;
	int cpu;

	rcu_read_lock();

	cpu = disk_stat_lock();
	part = disk_map_sector_rcu(disk, sector);
	all_stat_inc(disk, part, ios[rw], sector);
	all_stat_add(disk, part, ticks[rw], duration, sector);
	all_stat_add(disk, part, sectors[rw], n_sect, sector);
	all_stat_add(disk, part, io_ticks, duration, sector);

	rcu_read_unlock();
	all_stat_inc(cpu, disk, part, ios[rw], sector);
	all_stat_add(cpu, disk, part, ticks[rw], duration, sector);
	all_stat_add(cpu, disk, part, sectors[rw], n_sect, sector);
	all_stat_add(cpu, disk, part, io_ticks, duration, sector);

	disk_stat_unlock();
}

void
+15 −11
Original line number Diff line number Diff line
@@ -377,12 +377,13 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
static void start_io_acct(struct dm_io *io)
{
	struct mapped_device *md = io->md;
	int cpu;

	io->start_time = jiffies;

	preempt_disable();
	disk_round_stats(dm_disk(md));
	preempt_enable();
	cpu = disk_stat_lock();
	disk_round_stats(cpu, dm_disk(md));
	disk_stat_unlock();
	dm_disk(md)->in_flight = atomic_inc_return(&md->pending);
}

@@ -391,15 +392,15 @@ static int end_io_acct(struct dm_io *io)
	struct mapped_device *md = io->md;
	struct bio *bio = io->bio;
	unsigned long duration = jiffies - io->start_time;
	int pending;
	int pending, cpu;
	int rw = bio_data_dir(bio);

	preempt_disable();
	disk_round_stats(dm_disk(md));
	preempt_enable();
	dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending);
	cpu = disk_stat_lock();
	disk_round_stats(cpu, dm_disk(md));
	disk_stat_add(cpu, dm_disk(md), ticks[rw], duration);
	disk_stat_unlock();

	disk_stat_add(dm_disk(md), ticks[rw], duration);
	dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending);

	return !pending;
}
@@ -885,6 +886,7 @@ static int dm_request(struct request_queue *q, struct bio *bio)
	int r = -EIO;
	int rw = bio_data_dir(bio);
	struct mapped_device *md = q->queuedata;
	int cpu;

	/*
	 * There is no use in forwarding any barrier request since we can't
@@ -897,8 +899,10 @@ static int dm_request(struct request_queue *q, struct bio *bio)

	down_read(&md->io_lock);

	disk_stat_inc(dm_disk(md), ios[rw]);
	disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio));
	cpu = disk_stat_lock();
	disk_stat_inc(cpu, dm_disk(md), ios[rw]);
	disk_stat_add(cpu, dm_disk(md), sectors[rw], bio_sectors(bio));
	disk_stat_unlock();

	/*
	 * If we're suspended we have to queue
Loading