diff options
author | Tejun Heo <tj@kernel.org> | 2023-05-17 17:02:09 -1000 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2023-05-17 17:02:09 -1000 |
commit | 8a1dd1e547c1a037692e7a6da6a76108108c72b1 (patch) | |
tree | 8441f7c4e566a76ecc0bb4d94091d0997f05f8f4 | |
parent | 6363845005202148b8409ec3082e80845c19d309 (diff) | |
download | lwn-8a1dd1e547c1a037692e7a6da6a76108108c72b1.tar.gz lwn-8a1dd1e547c1a037692e7a6da6a76108108c72b1.zip |
workqueue: Track and monitor per-workqueue CPU time usage
Now that wq_worker_tick() is there, we can easily track the rough CPU time
consumption of each workqueue by charging the whole tick whenever a tick
hits an active workqueue. While not super accurate, it provides reasonable
visibility into the workqueues that consume a lot of CPU cycles.
wq_monitor.py is updated to report the per-workqueue CPU times.
v2: wq_monitor.py was using "cputime" as the key when outputting in json
format. Use "cpu_time" instead for consistency with other fields.
Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r-- | Documentation/core-api/workqueue.rst | 38 | ||||
-rw-r--r-- | kernel/workqueue.c | 3 | ||||
-rw-r--r-- | tools/workqueue/wq_monitor.py | 9 |
3 files changed, 30 insertions, 20 deletions
diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst index a389f31b025c..a4c9b9d1905f 100644 --- a/Documentation/core-api/workqueue.rst +++ b/Documentation/core-api/workqueue.rst @@ -354,25 +354,25 @@ Monitoring Use tools/workqueue/wq_monitor.py to monitor workqueue operations: :: $ tools/workqueue/wq_monitor.py events - total infl CPUitsv CMwake mayday rescued - events 18545 0 0 5 - - - events_highpri 8 0 0 0 - - - events_long 3 0 0 0 - - - events_unbound 38306 0 - - - - - events_freezable 0 0 0 0 - - - events_power_efficient 29598 0 0 0 - - - events_freezable_power_ 10 0 0 0 - - - sock_diag_events 0 0 0 0 - - - - total infl CPUitsv CMwake mayday rescued - events 18548 0 0 5 - - - events_highpri 8 0 0 0 - - - events_long 3 0 0 0 - - - events_unbound 38322 0 - - - - - events_freezable 0 0 0 0 - - - events_power_efficient 29603 0 0 0 - - - events_freezable_power_ 10 0 0 0 - - - sock_diag_events 0 0 0 0 - - + total infl CPUtime CPUhog CMwake mayday rescued + events 18545 0 6.1 0 5 - - + events_highpri 8 0 0.0 0 0 - - + events_long 3 0 0.0 0 0 - - + events_unbound 38306 0 0.1 - - - - + events_freezable 0 0 0.0 0 0 - - + events_power_efficient 29598 0 0.2 0 0 - - + events_freezable_power_ 10 0 0.0 0 0 - - + sock_diag_events 0 0 0.0 0 0 - - + + total infl CPUtime CPUhog CMwake mayday rescued + events 18548 0 6.1 0 5 - - + events_highpri 8 0 0.0 0 0 - - + events_long 3 0 0.0 0 0 - - + events_unbound 38322 0 0.1 - - - - + events_freezable 0 0 0.0 0 0 - - + events_power_efficient 29603 0 0.2 0 0 - - + events_freezable_power_ 10 0 0.0 0 0 - - + sock_diag_events 0 0 0.0 0 0 - - ... diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4ca66384d288..ee16ddb0647c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -212,6 +212,7 @@ struct worker_pool { enum pool_workqueue_stats { PWQ_STAT_STARTED, /* work items started execution */ PWQ_STAT_COMPLETED, /* work items completed execution */ + PWQ_STAT_CPU_TIME, /* total CPU time consumed */ PWQ_STAT_CPU_INTENSIVE, /* wq_cpu_intensive_thresh_us violations */ PWQ_STAT_CM_WAKEUP, /* concurrency-management worker wakeups */ PWQ_STAT_MAYDAY, /* maydays to rescuer */ @@ -1136,6 +1137,8 @@ void wq_worker_tick(struct task_struct *task) if (!pwq) return; + pwq->stats[PWQ_STAT_CPU_TIME] += TICK_USEC; + /* * If the current worker is concurrency managed and hogged the CPU for * longer than wq_cpu_intensive_thresh_us, it's automatically marked diff --git a/tools/workqueue/wq_monitor.py b/tools/workqueue/wq_monitor.py index 7c6f523b9164..6e258d123e8c 100644 --- a/tools/workqueue/wq_monitor.py +++ b/tools/workqueue/wq_monitor.py @@ -11,6 +11,10 @@ https://github.com/osandov/drgn. infl The number of currently in-flight work items. + CPUtime Total CPU time consumed by the workqueue in seconds. This is + sampled from scheduler ticks and only provides ballpark + measurement. "nohz_full=" CPUs are excluded from measurement. + CPUitsv The number of times a concurrency-managed work item hogged CPU longer than the threshold (workqueue.cpu_intensive_thresh_us) and got excluded from concurrency management to avoid stalling @@ -58,6 +62,7 @@ WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM'] PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution +PWQ_STAT_CPU_TIME = prog['PWQ_STAT_CPU_TIME'] # total CPU time consumed PWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer @@ -81,13 +86,14 @@ class WqStats: 'mem_reclaim' : self.mem_reclaim, 'started' : self.stats[PWQ_STAT_STARTED], 'completed' : self.stats[PWQ_STAT_COMPLETED], + 'cpu_time' : self.stats[PWQ_STAT_CPU_TIME], 'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE], 'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP], 'mayday' : self.stats[PWQ_STAT_MAYDAY], 'rescued' : self.stats[PWQ_STAT_RESCUED], } def table_header_str(): - return f'{"":>24} {"total":>8} {"infl":>5} '\ + return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\ f'{"CPUitsv":>7} {"CMwake":>7} {"mayday":>7} {"rescued":>7}' def table_row_str(self): @@ -107,6 +113,7 @@ class WqStats: out = f'{self.name[-24:]:24} ' \ f'{self.stats[PWQ_STAT_STARTED]:8} ' \ f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \ + f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \ f'{cpu_intensive:>7} ' \ f'{cm_wakeup:>7} ' \ f'{mayday:>7} ' \ |