summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-27 16:32:52 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-27 16:32:52 -0700
commit7ab044a4f42aecba23db5ce96e763e5ec807bf42 (patch)
tree057896b2b9749e0a380e088bf4a749fb3a59b2e4 /tools
parent18eb3b6dff007f2e4ef4f0d8567dfb5cdb6086fc (diff)
parent18c8ae813156a6855f026de80fffb91e1a28ab3d (diff)
downloadlwn-7ab044a4f42aecba23db5ce96e763e5ec807bf42.tar.gz
lwn-7ab044a4f42aecba23db5ce96e763e5ec807bf42.zip
Merge tag 'wq-for-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue updates from Tejun Heo: - Concurrency-managed per-cpu work items that hog CPUs and delay the execution of other work items are now automatically detected and excluded from concurrency management. Reporting on such work items can also be enabled through a config option. - Added tools/workqueue/wq_monitor.py which improves visibility into workqueue usages and behaviors. - Arnd's minimal fix for gcc-13 enum warning on 32bit compiles, superseded by commit afa4bb778e48 in mainline. * tag 'wq-for-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: workqueue: Disable per-cpu CPU hog detection when wq_cpu_intensive_thresh_us is 0 workqueue: Fix WARN_ON_ONCE() triggers in worker_enter_idle() workqueue: fix enum type for gcc-13 workqueue: Track and monitor per-workqueue CPU time usage workqueue: Report work funcs that trigger automatic CPU_INTENSIVE mechanism workqueue: Automatically mark CPU-hogging work items CPU_INTENSIVE workqueue: Improve locking rule description for worker fields workqueue: Move worker_set/clr_flags() upwards workqueue: Re-order struct worker fields workqueue: Add pwq->stats[] and a monitoring script Further upgrade queue_work_on() comment
Diffstat (limited to 'tools')
-rw-r--r--tools/workqueue/wq_monitor.py168
1 files changed, 168 insertions, 0 deletions
diff --git a/tools/workqueue/wq_monitor.py b/tools/workqueue/wq_monitor.py
new file mode 100644
index 000000000000..6e258d123e8c
--- /dev/null
+++ b/tools/workqueue/wq_monitor.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env drgn
+#
+# Copyright (C) 2023 Tejun Heo <tj@kernel.org>
+# Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
+
+desc = """
+This is a drgn script to monitor workqueues. For more info on drgn, visit
+https://github.com/osandov/drgn.
+
+ total Total number of work items executed by the workqueue.
+
+ infl The number of currently in-flight work items.
+
+ CPUtime Total CPU time consumed by the workqueue in seconds. This is
+ sampled from scheduler ticks and only provides ballpark
+ measurement. "nohz_full=" CPUs are excluded from measurement.
+
+ CPUitsv The number of times a concurrency-managed work item hogged CPU
+ longer than the threshold (workqueue.cpu_intensive_thresh_us)
+ and got excluded from concurrency management to avoid stalling
+ other work items.
+
+ CMwake The number of concurrency-management wake-ups while executing a
+ work item of the workqueue.
+
+ mayday The number of times the rescuer was requested while waiting for
+ new worker creation.
+
+ rescued The number of work items executed by the rescuer.
+"""
+
+import sys
+import signal
+import os
+import re
+import time
+import json
+
+import drgn
+from drgn.helpers.linux.list import list_for_each_entry,list_empty
+from drgn.helpers.linux.cpumask import for_each_possible_cpu
+
+import argparse
+parser = argparse.ArgumentParser(description=desc,
+ formatter_class=argparse.RawTextHelpFormatter)
+parser.add_argument('workqueue', metavar='REGEX', nargs='*',
+ help='Target workqueue name patterns (all if empty)')
+parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
+ help='Monitoring interval (0 to print once and exit)')
+parser.add_argument('-j', '--json', action='store_true',
+ help='Output in json')
+args = parser.parse_args()
+
+def err(s):
+ print(s, file=sys.stderr, flush=True)
+ sys.exit(1)
+
+workqueues = prog['workqueues']
+
+WQ_UNBOUND = prog['WQ_UNBOUND']
+WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM']
+
+PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution
+PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution
+PWQ_STAT_CPU_TIME = prog['PWQ_STAT_CPU_TIME'] # total CPU time consumed
+PWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations
+PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups
+PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer
+PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer
+PWQ_NR_STATS = prog['PWQ_NR_STATS']
+
+class WqStats:
+ def __init__(self, wq):
+ self.name = wq.name.string_().decode()
+ self.unbound = wq.flags & WQ_UNBOUND != 0
+ self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
+ self.stats = [0] * PWQ_NR_STATS
+ for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
+ for i in range(PWQ_NR_STATS):
+ self.stats[i] += int(pwq.stats[i])
+
+ def dict(self, now):
+ return { 'timestamp' : now,
+ 'name' : self.name,
+ 'unbound' : self.unbound,
+ 'mem_reclaim' : self.mem_reclaim,
+ 'started' : self.stats[PWQ_STAT_STARTED],
+ 'completed' : self.stats[PWQ_STAT_COMPLETED],
+ 'cpu_time' : self.stats[PWQ_STAT_CPU_TIME],
+ 'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE],
+ 'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP],
+ 'mayday' : self.stats[PWQ_STAT_MAYDAY],
+ 'rescued' : self.stats[PWQ_STAT_RESCUED], }
+
+ def table_header_str():
+ return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\
+ f'{"CPUitsv":>7} {"CMwake":>7} {"mayday":>7} {"rescued":>7}'
+
+ def table_row_str(self):
+ cpu_intensive = '-'
+ cm_wakeup = '-'
+ mayday = '-'
+ rescued = '-'
+
+ if not self.unbound:
+ cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE])
+ cm_wakeup = str(self.stats[PWQ_STAT_CM_WAKEUP])
+
+ if self.mem_reclaim:
+ mayday = str(self.stats[PWQ_STAT_MAYDAY])
+ rescued = str(self.stats[PWQ_STAT_RESCUED])
+
+ out = f'{self.name[-24:]:24} ' \
+ f'{self.stats[PWQ_STAT_STARTED]:8} ' \
+ f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
+ f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \
+ f'{cpu_intensive:>7} ' \
+ f'{cm_wakeup:>7} ' \
+ f'{mayday:>7} ' \
+ f'{rescued:>7} '
+ return out.rstrip(':')
+
+exit_req = False
+
+def sigint_handler(signr, frame):
+ global exit_req
+ exit_req = True
+
+def main():
+ # handle args
+ table_fmt = not args.json
+ interval = args.interval
+
+ re_str = None
+ if args.workqueue:
+ for r in args.workqueue:
+ if re_str is None:
+ re_str = r
+ else:
+ re_str += '|' + r
+
+ filter_re = re.compile(re_str) if re_str else None
+
+ # monitoring loop
+ signal.signal(signal.SIGINT, sigint_handler)
+
+ while not exit_req:
+ now = time.time()
+
+ if table_fmt:
+ print()
+ print(WqStats.table_header_str())
+
+ for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
+ stats = WqStats(wq)
+ if filter_re and not filter_re.search(stats.name):
+ continue
+ if table_fmt:
+ print(stats.table_row_str())
+ else:
+ print(stats.dict(now))
+
+ if interval == 0:
+ break
+ time.sleep(interval)
+
+if __name__ == "__main__":
+ main()