diff options
author | Yang Yang <yang.yang29@zte.com.cn> | 2022-06-01 15:55:25 -0700 |
---|---|---|
committer | akpm <akpm@linux-foundation.org> | 2022-06-01 15:55:25 -0700 |
commit | 662ce1dc9caf493c309200edbe38d186f1ea20d0 (patch) | |
tree | e49eeda0a61590b5843d47d1be17dbb18cf3aaa1 | |
parent | 54eb8462f21fb170a05ad64620f0d8d0cf2b7fb5 (diff) | |
download | lwn-662ce1dc9caf493c309200edbe38d186f1ea20d0.tar.gz lwn-662ce1dc9caf493c309200edbe38d186f1ea20d0.zip |
delayacct: track delays from write-protect copy
Delay accounting does not track the delay of write-protect copy. When
tasks trigger many write-protect copys(include COW and unsharing of
anonymous pages[1]), it may spend a amount of time waiting for them. To
get the delay of tasks in write-protect copy, could help users to evaluate
the impact of using KSM or fork() or GUP.
Also update tools/accounting/getdelays.c:
/ # ./getdelays -dl -p 231
print delayacct stats ON
listen forever
PID 231
CPU count real total virtual total delay total delay average
6247 1859000000 2154070021 1674255063 0.268ms
IO count delay total delay average
0 0 0ms
SWAP count delay total delay average
0 0 0ms
RECLAIM count delay total delay average
0 0 0ms
THRASHING count delay total delay average
0 0 0ms
COMPACT count delay total delay average
3 72758 0ms
WPCOPY count delay total delay average
3635 271567604 0ms
[1] commit 31cc5bc4af70("mm: support GUP-triggered unsharing of anonymous pages")
Link: https://lkml.kernel.org/r/20220409014342.2505532-1-yang.yang29@zte.com.cn
Signed-off-by: Yang Yang <yang.yang29@zte.com.cn>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jiang Xuexin <jiang.xuexin@zte.com.cn>
Reviewed-by: Ran Xiaokai <ran.xiaokai@zte.com.cn>
Reviewed-by: wangyong <wang.yong12@zte.com.cn>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r-- | Documentation/accounting/delay-accounting.rst | 5 | ||||
-rw-r--r-- | include/linux/delayacct.h | 28 | ||||
-rw-r--r-- | include/uapi/linux/taskstats.h | 6 | ||||
-rw-r--r-- | kernel/delayacct.c | 16 | ||||
-rw-r--r-- | mm/hugetlb.c | 8 | ||||
-rw-r--r-- | mm/memory.c | 8 | ||||
-rw-r--r-- | tools/accounting/getdelays.c | 8 |
7 files changed, 76 insertions, 3 deletions
diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 197fe319cbec..241d1a87f2cd 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -15,6 +15,7 @@ c) swapping in pages d) memory reclaim e) thrashing page cache f) direct compact +g) write-protect copy and makes these statistics available to userspace through the taskstats interface. @@ -48,7 +49,7 @@ this structure. See for a description of the fields pertaining to delay accounting. It will generally be in the form of counters returning the cumulative delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page -cache, direct compact etc. +cache, direct compact, write-protect copy etc. Taking the difference of two successive readings of a given counter (say cpu_delay_total) for a task will give the delay @@ -117,6 +118,8 @@ Get sum of delays, since system boot, for all pids with tgid 5:: 0 0 0ms COMPACT count delay total delay average 0 0 0ms + WPCOPY count delay total delay average + 0 0 0ms Get IO accounting for pid 1, it works only with -p:: diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 6b16a6930a19..58aea2d7385c 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -45,9 +45,13 @@ struct task_delay_info { u64 compact_start; u64 compact_delay; /* wait for memory compact */ + u64 wpcopy_start; + u64 wpcopy_delay; /* wait for write-protect copy */ + u32 freepages_count; /* total count of memory reclaim */ u32 thrashing_count; /* total count of thrash waits */ u32 compact_count; /* total count of memory compact */ + u32 wpcopy_count; /* total count of write-protect copy */ }; #endif @@ -75,6 +79,8 @@ extern void __delayacct_swapin_start(void); extern void __delayacct_swapin_end(void); extern void __delayacct_compact_start(void); extern void __delayacct_compact_end(void); +extern void __delayacct_wpcopy_start(void); +extern void __delayacct_wpcopy_end(void); static inline void delayacct_tsk_init(struct task_struct *tsk) { @@ -191,6 +197,24 @@ static inline void delayacct_compact_end(void) __delayacct_compact_end(); } +static inline void delayacct_wpcopy_start(void) +{ + if (!static_branch_unlikely(&delayacct_key)) + return; + + if (current->delays) + __delayacct_wpcopy_start(); +} + +static inline void delayacct_wpcopy_end(void) +{ + if (!static_branch_unlikely(&delayacct_key)) + return; + + if (current->delays) + __delayacct_wpcopy_end(); +} + #else static inline void delayacct_init(void) {} @@ -225,6 +249,10 @@ static inline void delayacct_compact_start(void) {} static inline void delayacct_compact_end(void) {} +static inline void delayacct_wpcopy_start(void) +{} +static inline void delayacct_wpcopy_end(void) +{} #endif /* CONFIG_TASK_DELAY_ACCT */ diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 736154171489..a7f5b11a8f1b 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 12 +#define TASKSTATS_VERSION 13 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -194,6 +194,10 @@ struct taskstats { __u64 ac_exe_dev; /* program binary device ID */ __u64 ac_exe_inode; /* program binary inode number */ /* v12 end */ + + /* v13: Delay waiting for write-protect copy */ + __u64 wpcopy_count; + __u64 wpcopy_delay_total; }; diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 2c1e18f7c5cf..164ed9ef77a3 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -177,11 +177,14 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp; tmp = d->compact_delay_total + tsk->delays->compact_delay; d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp; + tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay; + d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp; d->blkio_count += tsk->delays->blkio_count; d->swapin_count += tsk->delays->swapin_count; d->freepages_count += tsk->delays->freepages_count; d->thrashing_count += tsk->delays->thrashing_count; d->compact_count += tsk->delays->compact_count; + d->wpcopy_count += tsk->delays->wpcopy_count; raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); return 0; @@ -249,3 +252,16 @@ void __delayacct_compact_end(void) ¤t->delays->compact_delay, ¤t->delays->compact_count); } + +void __delayacct_wpcopy_start(void) +{ + current->delays->wpcopy_start = local_clock(); +} + +void __delayacct_wpcopy_end(void) +{ + delayacct_end(¤t->delays->lock, + ¤t->delays->wpcopy_start, + ¤t->delays->wpcopy_delay, + ¤t->delays->wpcopy_count); +} diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7c468ac1d069..a57e1be41401 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -32,6 +32,7 @@ #include <linux/cma.h> #include <linux/migrate.h> #include <linux/nospec.h> +#include <linux/delayacct.h> #include <asm/page.h> #include <asm/pgalloc.h> @@ -5230,6 +5231,8 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, pte = huge_ptep_get(ptep); old_page = pte_page(pte); + delayacct_wpcopy_start(); + retry_avoidcopy: /* * If no-one else is actually using this page, we're the exclusive @@ -5240,6 +5243,8 @@ retry_avoidcopy: page_move_anon_rmap(old_page, vma); if (likely(!unshare)) set_huge_ptep_writable(vma, haddr, ptep); + + delayacct_wpcopy_end(); return 0; } VM_BUG_ON_PAGE(PageAnon(old_page) && PageAnonExclusive(old_page), @@ -5309,6 +5314,7 @@ retry_avoidcopy: * race occurs while re-acquiring page table * lock, and our job is done. */ + delayacct_wpcopy_end(); return 0; } @@ -5367,6 +5373,8 @@ out_release_old: put_page(old_page); spin_lock(ptl); /* Caller expects lock to be held */ + + delayacct_wpcopy_end(); return ret; } diff --git a/mm/memory.c b/mm/memory.c index 21dadf03f089..7a089145cad4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3090,6 +3090,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) int page_copied = 0; struct mmu_notifier_range range; + delayacct_wpcopy_start(); + if (unlikely(anon_vma_prepare(vma))) goto oom; @@ -3114,6 +3116,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) put_page(new_page); if (old_page) put_page(old_page); + + delayacct_wpcopy_end(); return 0; } } @@ -3220,12 +3224,16 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) free_swap_cache(old_page); put_page(old_page); } + + delayacct_wpcopy_end(); return (page_copied && !unshare) ? VM_FAULT_WRITE : 0; oom_free_new: put_page(new_page); oom: if (old_page) put_page(old_page); + + delayacct_wpcopy_end(); return VM_FAULT_OOM; } diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 11e86739456d..e83e6e47a21e 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -207,6 +207,8 @@ static void print_delayacct(struct taskstats *t) "THRASHING%12s%15s%15s\n" " %15llu%15llu%15llums\n" "COMPACT %12s%15s%15s\n" + " %15llu%15llu%15llums\n" + "WPCOPY %12s%15s%15s\n" " %15llu%15llu%15llums\n", "count", "real total", "virtual total", "delay total", "delay average", @@ -234,7 +236,11 @@ static void print_delayacct(struct taskstats *t) "count", "delay total", "delay average", (unsigned long long)t->compact_count, (unsigned long long)t->compact_delay_total, - average_ms(t->compact_delay_total, t->compact_count)); + average_ms(t->compact_delay_total, t->compact_count), + "count", "delay total", "delay average", + (unsigned long long)t->wpcopy_count, + (unsigned long long)t->wpcopy_delay_total, + average_ms(t->wpcopy_delay_total, t->wpcopy_count)); } static void task_context_switch_counts(struct taskstats *t) |