Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0ff92245 authored by Shailabh Nagar's avatar Shailabh Nagar Committed by Linus Torvalds
Browse files

[PATCH] per-task-delay-accounting: sync block I/O and swapin delay collection



Unlike earlier iterations of the delay accounting patches, now delays are only
collected for the actual I/O waits rather than try and cover the delays seen
in I/O submission paths.

Account separately for block I/O delays incurred as a result of swapin page
faults whose frequency can be affected by the task/process' rss limit.  Hence
swapin delays can act as feedback for rss limit changes independent of I/O
priority changes.

Signed-off-by: default avatarShailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: default avatarBalbir Singh <balbir@in.ibm.com>
Cc: Jes Sorensen <jes@sgi.com>
Cc: Peter Chubb <peterc@gelato.unsw.edu.au>
Cc: Erich Focht <efocht@ess.nec.de>
Cc: Levent Serinol <lserinol@gmail.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent ca74e92b
Loading
Loading
Loading
Loading
+25 −0
Original line number Diff line number Diff line
@@ -19,6 +19,13 @@

#include <linux/sched.h>

/*
 * Per-task flags relevant to delay accounting
 * maintained privately to avoid exhausting similar flags in sched.h:PF_*
 * Used to set current->delays->flags
 */
#define DELAYACCT_PF_SWAPIN	0x00000001	/* I am doing a swapin */

#ifdef CONFIG_TASK_DELAY_ACCT

extern int delayacct_on;	/* Delay accounting turned on/off */
@@ -26,6 +33,8 @@ extern kmem_cache_t *delayacct_cache;
extern void delayacct_init(void);
extern void __delayacct_tsk_init(struct task_struct *);
extern void __delayacct_tsk_exit(struct task_struct *);
extern void __delayacct_blkio_start(void);
extern void __delayacct_blkio_end(void);

static inline void delayacct_set_flag(int flag)
{
@@ -53,6 +62,18 @@ static inline void delayacct_tsk_exit(struct task_struct *tsk)
		__delayacct_tsk_exit(tsk);
}

static inline void delayacct_blkio_start(void)
{
	if (current->delays)
		__delayacct_blkio_start();
}

static inline void delayacct_blkio_end(void)
{
	if (current->delays)
		__delayacct_blkio_end();
}

#else
static inline void delayacct_set_flag(int flag)
{}
@@ -64,6 +85,10 @@ static inline void delayacct_tsk_init(struct task_struct *tsk)
{}
static inline void delayacct_tsk_exit(struct task_struct *tsk)
{}
static inline void delayacct_blkio_start(void)
{}
static inline void delayacct_blkio_end(void)
{}
#endif /* CONFIG_TASK_DELAY_ACCT */

#endif
+13 −0
Original line number Diff line number Diff line
@@ -566,6 +566,19 @@ struct task_delay_info {
	 * Atomicity of updates to XXX_delay, XXX_count protected by
	 * single lock above (split into XXX_lock if contention is an issue).
	 */

	/*
	 * XXX_count is incremented on every XXX operation, the delay
	 * associated with the operation is added to XXX_delay.
	 * XXX_delay contains the accumulated delay time in nanoseconds.
	 */
	struct timespec blkio_start, blkio_end;	/* Shared by blkio, swapin */
	u64 blkio_delay;	/* wait for sync block io completion */
	u64 swapin_delay;	/* wait for swapin block io completion */
	u32 blkio_count;	/* total count of the number of sync block */
				/* io operations performed */
	u32 swapin_count;	/* total count of the number of swapin block */
				/* io operations performed */
};
#endif

+19 −0
Original line number Diff line number Diff line
@@ -85,3 +85,22 @@ static void delayacct_end(struct timespec *start, struct timespec *end,
	spin_unlock(&current->delays->lock);
}

void __delayacct_blkio_start(void)
{
	delayacct_start(&current->delays->blkio_start);
}

void __delayacct_blkio_end(void)
{
	if (current->delays->flags & DELAYACCT_PF_SWAPIN)
		/* Swapin block I/O */
		delayacct_end(&current->delays->blkio_start,
			&current->delays->blkio_end,
			&current->delays->swapin_delay,
			&current->delays->swapin_count);
	else	/* Other block I/O */
		delayacct_end(&current->delays->blkio_start,
			&current->delays->blkio_end,
			&current->delays->blkio_delay,
			&current->delays->blkio_count);
}
+5 −0
Original line number Diff line number Diff line
@@ -51,6 +51,7 @@
#include <linux/times.h>
#include <linux/acct.h>
#include <linux/kprobes.h>
#include <linux/delayacct.h>
#include <asm/tlb.h>

#include <asm/unistd.h>
@@ -4534,9 +4535,11 @@ void __sched io_schedule(void)
{
	struct rq *rq = &__raw_get_cpu_var(runqueues);

	delayacct_blkio_start();
	atomic_inc(&rq->nr_iowait);
	schedule();
	atomic_dec(&rq->nr_iowait);
	delayacct_blkio_end();
}
EXPORT_SYMBOL(io_schedule);

@@ -4545,9 +4548,11 @@ long __sched io_schedule_timeout(long timeout)
	struct rq *rq = &__raw_get_cpu_var(runqueues);
	long ret;

	delayacct_blkio_start();
	atomic_inc(&rq->nr_iowait);
	ret = schedule_timeout(timeout);
	atomic_dec(&rq->nr_iowait);
	delayacct_blkio_end();
	return ret;
}

+4 −0
Original line number Diff line number Diff line
@@ -47,6 +47,7 @@
#include <linux/pagemap.h>
#include <linux/rmap.h>
#include <linux/module.h>
#include <linux/delayacct.h>
#include <linux/init.h>

#include <asm/pgalloc.h>
@@ -1934,6 +1935,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
		migration_entry_wait(mm, pmd, address);
		goto out;
	}
	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
	page = lookup_swap_cache(entry);
	if (!page) {
 		swapin_readahead(entry, address, vma);
@@ -1946,6 +1948,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
			page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
			if (likely(pte_same(*page_table, orig_pte)))
				ret = VM_FAULT_OOM;
			delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
			goto unlock;
		}

@@ -1955,6 +1958,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
		grab_swap_token();
	}

	delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
	mark_page_accessed(page);
	lock_page(page);