summaryrefslogtreecommitdiff
path: root/tools/lib
diff options
context:
space:
mode:
authorSwapnil Sapkal <swapnil.sapkal@amd.com>2026-01-19 17:58:25 +0000
committerArnaldo Carvalho de Melo <acme@redhat.com>2026-01-22 12:29:28 -0300
commitc3030995f23b3d35f94b9bc4375706ec5916fd55 (patch)
tree9f7eb0d88b2759ac232ed03d20a06aab2143ce9a /tools/lib
parentd40c68a49f69c9bdb4ca14b3e6a0422bbaeb5d8f (diff)
downloadlwn-c3030995f23b3d35f94b9bc4375706ec5916fd55.tar.gz
lwn-c3030995f23b3d35f94b9bc4375706ec5916fd55.zip
perf sched stats: Add record and rawdump support
Define new, perf tool only, sample types and their layouts. Add logic to parse /proc/schedstat, convert it to perf sample format and save samples to perf.data file with `perf sched stats record` command. Also add logic to read perf.data file, interpret schedstat samples and print rawdump of samples with `perf script -D`. Note that, /proc/schedstat file output is standardized with version number. The patch supports v15 but older or newer version can be added easily. Co-developed-by: Ravi Bangoria <ravi.bangoria@amd.com> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com> Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com> Tested-by: Chen Yu <yu.c.chen@intel.com> Tested-by: James Clark <james.clark@linaro.org> Acked-by: Ian Rogers <irogers@google.com> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Anubhav Shelat <ashelat@redhat.com> Cc: Ben Gainey <ben.gainey@arm.com> Cc: Blake Jones <blakejones@google.com> Cc: Chun-Tse Shao <ctshao@google.com> Cc: David Vernet <void@manifault.com> Cc: Dmitriy Vyukov <dvyukov@google.com> Cc: Dr. David Alan Gilbert <linux@treblig.org> Cc: Gautham Shenoy <gautham.shenoy@amd.com> Cc: Graham Woodward <graham.woodward@arm.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: K Prateek Nayak <kprateek.nayak@amd.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Leo Yan <leo.yan@arm.com> Cc: Madadi Vineeth Reddy <vineethr@linux.ibm.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Sandipan Das <sandipan.das@amd.com> Cc: Santosh Shukla <santosh.shukla@amd.com> Cc: Shrikanth Hegde <sshegde@linux.ibm.com> Cc: Steven Rostedt (VMware) <rostedt@goodmis.org> Cc: Tejun Heo <tj@kernel.org> Cc: Thomas Falcon <thomas.falcon@intel.com> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Vincent Guittot <vincent.guittot@linaro.org> Cc: Yang Jihong <yangjihong@bytedance.com> Cc: Yujie Liu <yujie.liu@intel.com> Cc: Zhongqiu Han <quic_zhonhan@quicinc.com> [ PRIu64 needs uint64_t, not 'unsigned long' to work on both 32-bit and 64-bit ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/lib')
-rw-r--r--tools/lib/perf/Documentation/libperf.txt2
-rw-r--r--tools/lib/perf/Makefile1
-rw-r--r--tools/lib/perf/include/perf/event.h41
-rw-r--r--tools/lib/perf/include/perf/schedstat-v15.h146
4 files changed, 190 insertions, 0 deletions
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index 4072bc9b7670..576ecc5fc312 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -211,6 +211,8 @@ SYNOPSIS
struct perf_record_header_feature;
struct perf_record_compressed;
struct perf_record_compressed2;
+ struct perf_record_schedstat_cpu;
+ struct perf_record_schedstat_domain;
--
DESCRIPTION
diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index 7fbb50b74c00..9fa28e512ca8 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -179,6 +179,7 @@ install_lib: libs
cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h
+HDRS += schedstat-v15.h
INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h
INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 43a8cb04994f..ce04fed7cefc 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -496,6 +496,43 @@ struct perf_record_bpf_metadata {
struct perf_record_bpf_metadata_entry entries[];
};
+struct perf_record_schedstat_cpu_v15 {
+#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) _type _name
+#include "schedstat-v15.h"
+#undef CPU_FIELD
+};
+
+struct perf_record_schedstat_cpu {
+ struct perf_event_header header;
+ __u64 timestamp;
+ __u32 cpu;
+ __u16 version;
+ /* Padding */
+ char __pad[2];
+ union {
+ struct perf_record_schedstat_cpu_v15 v15;
+ };
+};
+
+struct perf_record_schedstat_domain_v15 {
+#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) _type _name
+#include "schedstat-v15.h"
+#undef DOMAIN_FIELD
+};
+
+#define DOMAIN_NAME_LEN 16
+
+struct perf_record_schedstat_domain {
+ struct perf_event_header header;
+ __u64 timestamp;
+ __u32 cpu;
+ __u16 version;
+ __u16 domain;
+ union {
+ struct perf_record_schedstat_domain_v15 v15;
+ };
+};
+
enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_USER_TYPE_START = 64,
PERF_RECORD_HEADER_ATTR = 64,
@@ -519,6 +556,8 @@ enum perf_user_event_type { /* above any possible kernel type */
PERF_RECORD_FINISHED_INIT = 82,
PERF_RECORD_COMPRESSED2 = 83,
PERF_RECORD_BPF_METADATA = 84,
+ PERF_RECORD_SCHEDSTAT_CPU = 85,
+ PERF_RECORD_SCHEDSTAT_DOMAIN = 86,
PERF_RECORD_HEADER_MAX
};
@@ -562,6 +601,8 @@ union perf_event {
struct perf_record_compressed pack;
struct perf_record_compressed2 pack2;
struct perf_record_bpf_metadata bpf_metadata;
+ struct perf_record_schedstat_cpu schedstat_cpu;
+ struct perf_record_schedstat_domain schedstat_domain;
};
#endif /* __LIBPERF_EVENT_H */
diff --git a/tools/lib/perf/include/perf/schedstat-v15.h b/tools/lib/perf/include/perf/schedstat-v15.h
new file mode 100644
index 000000000000..639458df05f8
--- /dev/null
+++ b/tools/lib/perf/include/perf/schedstat-v15.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef CPU_FIELD
+CPU_FIELD(__u32, yld_count, "sched_yield() count",
+ "%11u", false, yld_count, v15);
+CPU_FIELD(__u32, array_exp, "Legacy counter can be ignored",
+ "%11u", false, array_exp, v15);
+CPU_FIELD(__u32, sched_count, "schedule() called",
+ "%11u", false, sched_count, v15);
+CPU_FIELD(__u32, sched_goidle, "schedule() left the processor idle",
+ "%11u", true, sched_count, v15);
+CPU_FIELD(__u32, ttwu_count, "try_to_wake_up() was called",
+ "%11u", false, ttwu_count, v15);
+CPU_FIELD(__u32, ttwu_local, "try_to_wake_up() was called to wake up the local cpu",
+ "%11u", true, ttwu_count, v15);
+CPU_FIELD(__u64, rq_cpu_time, "total runtime by tasks on this processor (in jiffies)",
+ "%11llu", false, rq_cpu_time, v15);
+CPU_FIELD(__u64, run_delay, "total waittime by tasks on this processor (in jiffies)",
+ "%11llu", true, rq_cpu_time, v15);
+CPU_FIELD(__u64, pcount, "total timeslices run on this cpu",
+ "%11llu", false, pcount, v15);
+#endif
+
+#ifdef DOMAIN_FIELD
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category idle> ");
+#endif
+DOMAIN_FIELD(__u32, idle_lb_count,
+ "load_balance() count on cpu idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, idle_lb_balanced,
+ "load_balance() found balanced on cpu idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, idle_lb_failed,
+ "load_balance() move task failed on cpu idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, idle_lb_imbalance,
+ "imbalance sum on cpu idle", "%11u", false, v15);
+DOMAIN_FIELD(__u32, idle_lb_gained,
+ "pull_task() count on cpu idle", "%11u", false, v15);
+DOMAIN_FIELD(__u32, idle_lb_hot_gained,
+ "pull_task() when target task was cache-hot on cpu idle", "%11u", false, v15);
+DOMAIN_FIELD(__u32, idle_lb_nobusyq,
+ "load_balance() failed to find busier queue on cpu idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, idle_lb_nobusyg,
+ "load_balance() failed to find busier group on cpu idle", "%11u", true, v15);
+#ifdef DERIVED_CNT_FIELD
+DERIVED_CNT_FIELD(idle_lb_success_count, "load_balance() success count on cpu idle", "%11u",
+ idle_lb_count, idle_lb_balanced, idle_lb_failed, v15);
+#endif
+#ifdef DERIVED_AVG_FIELD
+DERIVED_AVG_FIELD(idle_lb_avg_pulled,
+ "avg task pulled per successful lb attempt (cpu idle)", "%11.2Lf",
+ idle_lb_count, idle_lb_balanced, idle_lb_failed, idle_lb_gained, v15);
+#endif
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category busy> ");
+#endif
+DOMAIN_FIELD(__u32, busy_lb_count,
+ "load_balance() count on cpu busy", "%11u", true, v15);
+DOMAIN_FIELD(__u32, busy_lb_balanced,
+ "load_balance() found balanced on cpu busy", "%11u", true, v15);
+DOMAIN_FIELD(__u32, busy_lb_failed,
+ "load_balance() move task failed on cpu busy", "%11u", true, v15);
+DOMAIN_FIELD(__u32, busy_lb_imbalance,
+ "imbalance sum on cpu busy", "%11u", false, v15);
+DOMAIN_FIELD(__u32, busy_lb_gained,
+ "pull_task() count on cpu busy", "%11u", false, v15);
+DOMAIN_FIELD(__u32, busy_lb_hot_gained,
+ "pull_task() when target task was cache-hot on cpu busy", "%11u", false, v15);
+DOMAIN_FIELD(__u32, busy_lb_nobusyq,
+ "load_balance() failed to find busier queue on cpu busy", "%11u", true, v15);
+DOMAIN_FIELD(__u32, busy_lb_nobusyg,
+ "load_balance() failed to find busier group on cpu busy", "%11u", true, v15);
+#ifdef DERIVED_CNT_FIELD
+DERIVED_CNT_FIELD(busy_lb_success_count, "load_balance() success count on cpu busy", "%11u",
+ busy_lb_count, busy_lb_balanced, busy_lb_failed, v15);
+#endif
+#ifdef DERIVED_AVG_FIELD
+DERIVED_AVG_FIELD(busy_lb_avg_pulled,
+ "avg task pulled per successful lb attempt (cpu busy)", "%11.2Lf",
+ busy_lb_count, busy_lb_balanced, busy_lb_failed, busy_lb_gained, v15);
+#endif
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category newidle> ");
+#endif
+DOMAIN_FIELD(__u32, newidle_lb_count,
+ "load_balance() count on cpu newly idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, newidle_lb_balanced,
+ "load_balance() found balanced on cpu newly idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, newidle_lb_failed,
+ "load_balance() move task failed on cpu newly idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, newidle_lb_imbalance,
+ "imbalance sum on cpu newly idle", "%11u", false, v15);
+DOMAIN_FIELD(__u32, newidle_lb_gained,
+ "pull_task() count on cpu newly idle", "%11u", false, v15);
+DOMAIN_FIELD(__u32, newidle_lb_hot_gained,
+ "pull_task() when target task was cache-hot on cpu newly idle", "%11u", false, v15);
+DOMAIN_FIELD(__u32, newidle_lb_nobusyq,
+ "load_balance() failed to find busier queue on cpu newly idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, newidle_lb_nobusyg,
+ "load_balance() failed to find busier group on cpu newly idle", "%11u", true, v15);
+#ifdef DERIVED_CNT_FIELD
+DERIVED_CNT_FIELD(newidle_lb_success_count,
+ "load_balance() success count on cpu newly idle", "%11u",
+ newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, v15);
+#endif
+#ifdef DERIVED_AVG_FIELD
+DERIVED_AVG_FIELD(newidle_lb_avg_pulled,
+ "avg task pulled per successful lb attempt (cpu newly idle)", "%11.2Lf",
+ newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, newidle_lb_gained, v15);
+#endif
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category active_load_balance()> ");
+#endif
+DOMAIN_FIELD(__u32, alb_count,
+ "active_load_balance() count", "%11u", false, v15);
+DOMAIN_FIELD(__u32, alb_failed,
+ "active_load_balance() move task failed", "%11u", false, v15);
+DOMAIN_FIELD(__u32, alb_pushed,
+ "active_load_balance() successfully moved a task", "%11u", false, v15);
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category sched_balance_exec()> ");
+#endif
+DOMAIN_FIELD(__u32, sbe_count,
+ "sbe_count is not used", "%11u", false, v15);
+DOMAIN_FIELD(__u32, sbe_balanced,
+ "sbe_balanced is not used", "%11u", false, v15);
+DOMAIN_FIELD(__u32, sbe_pushed,
+ "sbe_pushed is not used", "%11u", false, v15);
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category sched_balance_fork()> ");
+#endif
+DOMAIN_FIELD(__u32, sbf_count,
+ "sbf_count is not used", "%11u", false, v15);
+DOMAIN_FIELD(__u32, sbf_balanced,
+ "sbf_balanced is not used", "%11u", false, v15);
+DOMAIN_FIELD(__u32, sbf_pushed,
+ "sbf_pushed is not used", "%11u", false, v15);
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Wakeup Info> ");
+#endif
+DOMAIN_FIELD(__u32, ttwu_wake_remote,
+ "try_to_wake_up() awoke a task that last ran on a diff cpu", "%11u", false, v15);
+DOMAIN_FIELD(__u32, ttwu_move_affine,
+ "try_to_wake_up() moved task because cache-cold on own cpu", "%11u", false, v15);
+DOMAIN_FIELD(__u32, ttwu_move_balance,
+ "try_to_wake_up() started passive balancing", "%11u", false, v15);
+#endif /* DOMAIN_FIELD */