diff options
author | Shailabh Nagar <nagar@watson.ibm.com> | 2006-07-14 00:24:37 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-07-14 21:53:56 -0700 |
commit | 0ff922452df86f3e9a2c6f705c4588ec62d096a7 (patch) | |
tree | ac84041bfb63f12d0e2db733c46b2cd2438b4882 | |
parent | ca74e92b4698276b6696f15a801759f50944f387 (diff) | |
download | lwn-0ff922452df86f3e9a2c6f705c4588ec62d096a7.tar.gz lwn-0ff922452df86f3e9a2c6f705c4588ec62d096a7.zip |
[PATCH] per-task-delay-accounting: sync block I/O and swapin delay collection
Unlike earlier iterations of the delay accounting patches, now delays are only
collected for the actual I/O waits rather than try and cover the delays seen
in I/O submission paths.
Account separately for block I/O delays incurred as a result of swapin page
faults whose frequency can be affected by the task/process' rss limit. Hence
swapin delays can act as feedback for rss limit changes independent of I/O
priority changes.
Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com>
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
Cc: Jes Sorensen <jes@sgi.com>
Cc: Peter Chubb <peterc@gelato.unsw.edu.au>
Cc: Erich Focht <efocht@ess.nec.de>
Cc: Levent Serinol <lserinol@gmail.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/delayacct.h | 25 | ||||
-rw-r--r-- | include/linux/sched.h | 13 | ||||
-rw-r--r-- | kernel/delayacct.c | 19 | ||||
-rw-r--r-- | kernel/sched.c | 5 | ||||
-rw-r--r-- | mm/memory.c | 4 |
5 files changed, 66 insertions, 0 deletions
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 9572cfa1f129..0ecbf9aad8e1 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -19,6 +19,13 @@ #include <linux/sched.h> +/* + * Per-task flags relevant to delay accounting + * maintained privately to avoid exhausting similar flags in sched.h:PF_* + * Used to set current->delays->flags + */ +#define DELAYACCT_PF_SWAPIN 0x00000001 /* I am doing a swapin */ + #ifdef CONFIG_TASK_DELAY_ACCT extern int delayacct_on; /* Delay accounting turned on/off */ @@ -26,6 +33,8 @@ extern kmem_cache_t *delayacct_cache; extern void delayacct_init(void); extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); +extern void __delayacct_blkio_start(void); +extern void __delayacct_blkio_end(void); static inline void delayacct_set_flag(int flag) { @@ -53,6 +62,18 @@ static inline void delayacct_tsk_exit(struct task_struct *tsk) __delayacct_tsk_exit(tsk); } +static inline void delayacct_blkio_start(void) +{ + if (current->delays) + __delayacct_blkio_start(); +} + +static inline void delayacct_blkio_end(void) +{ + if (current->delays) + __delayacct_blkio_end(); +} + #else static inline void delayacct_set_flag(int flag) {} @@ -64,6 +85,10 @@ static inline void delayacct_tsk_init(struct task_struct *tsk) {} static inline void delayacct_tsk_exit(struct task_struct *tsk) {} +static inline void delayacct_blkio_start(void) +{} +static inline void delayacct_blkio_end(void) +{} #endif /* CONFIG_TASK_DELAY_ACCT */ #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 7a54e62763c5..2f43f1fb7de7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -566,6 +566,19 @@ struct task_delay_info { * Atomicity of updates to XXX_delay, XXX_count protected by * single lock above (split into XXX_lock if contention is an issue). */ + + /* + * XXX_count is incremented on every XXX operation, the delay + * associated with the operation is added to XXX_delay. + * XXX_delay contains the accumulated delay time in nanoseconds. + */ + struct timespec blkio_start, blkio_end; /* Shared by blkio, swapin */ + u64 blkio_delay; /* wait for sync block io completion */ + u64 swapin_delay; /* wait for swapin block io completion */ + u32 blkio_count; /* total count of the number of sync block */ + /* io operations performed */ + u32 swapin_count; /* total count of the number of swapin block */ + /* io operations performed */ }; #endif diff --git a/kernel/delayacct.c b/kernel/delayacct.c index fbf7f2284952..3546b0800f9f 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -85,3 +85,22 @@ static void delayacct_end(struct timespec *start, struct timespec *end, spin_unlock(¤t->delays->lock); } +void __delayacct_blkio_start(void) +{ + delayacct_start(¤t->delays->blkio_start); +} + +void __delayacct_blkio_end(void) +{ + if (current->delays->flags & DELAYACCT_PF_SWAPIN) + /* Swapin block I/O */ + delayacct_end(¤t->delays->blkio_start, + ¤t->delays->blkio_end, + ¤t->delays->swapin_delay, + ¤t->delays->swapin_count); + else /* Other block I/O */ + delayacct_end(¤t->delays->blkio_start, + ¤t->delays->blkio_end, + ¤t->delays->blkio_delay, + ¤t->delays->blkio_count); +} diff --git a/kernel/sched.c b/kernel/sched.c index e9a0b61f12ab..9d42cbfc4f8b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -51,6 +51,7 @@ #include <linux/times.h> #include <linux/acct.h> #include <linux/kprobes.h> +#include <linux/delayacct.h> #include <asm/tlb.h> #include <asm/unistd.h> @@ -4534,9 +4535,11 @@ void __sched io_schedule(void) { struct rq *rq = &__raw_get_cpu_var(runqueues); + delayacct_blkio_start(); atomic_inc(&rq->nr_iowait); schedule(); atomic_dec(&rq->nr_iowait); + delayacct_blkio_end(); } EXPORT_SYMBOL(io_schedule); @@ -4545,9 +4548,11 @@ long __sched io_schedule_timeout(long timeout) struct rq *rq = &__raw_get_cpu_var(runqueues); long ret; + delayacct_blkio_start(); atomic_inc(&rq->nr_iowait); ret = schedule_timeout(timeout); atomic_dec(&rq->nr_iowait); + delayacct_blkio_end(); return ret; } diff --git a/mm/memory.c b/mm/memory.c index de8bc85dc8f3..109e9866237e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -47,6 +47,7 @@ #include <linux/pagemap.h> #include <linux/rmap.h> #include <linux/module.h> +#include <linux/delayacct.h> #include <linux/init.h> #include <asm/pgalloc.h> @@ -1934,6 +1935,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, migration_entry_wait(mm, pmd, address); goto out; } + delayacct_set_flag(DELAYACCT_PF_SWAPIN); page = lookup_swap_cache(entry); if (!page) { swapin_readahead(entry, address, vma); @@ -1946,6 +1948,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, page_table = pte_offset_map_lock(mm, pmd, address, &ptl); if (likely(pte_same(*page_table, orig_pte))) ret = VM_FAULT_OOM; + delayacct_clear_flag(DELAYACCT_PF_SWAPIN); goto unlock; } @@ -1955,6 +1958,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, grab_swap_token(); } + delayacct_clear_flag(DELAYACCT_PF_SWAPIN); mark_page_accessed(page); lock_page(page); |