summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDelyan Kratunov <delyank@fb.com>2022-05-11 18:28:36 +0000
committerPeter Zijlstra <peterz@infradead.org>2022-05-12 00:37:11 +0200
commit9c2136be0878c88c53dea26943ce40bb03ad8d8d (patch)
tree63ea6e07f82cc650db9a4d14d87f4b82df34a9c6
parentc5eb0a61238dd6faf37f58c9ce61c9980aaffd7a (diff)
downloadlwn-9c2136be0878c88c53dea26943ce40bb03ad8d8d.tar.gz
lwn-9c2136be0878c88c53dea26943ce40bb03ad8d8d.zip
sched/tracing: Append prev_state to tp args instead
Commit fa2c3254d7cf (sched/tracing: Don't re-read p->state when emitting sched_switch event, 2022-01-20) added a new prev_state argument to the sched_switch tracepoint, before the prev task_struct pointer. This reordering of arguments broke BPF programs that use the raw tracepoint (e.g. tp_btf programs). The type of the second argument has changed and existing programs that assume a task_struct* argument (e.g. for bpf_task_storage access) will now fail to verify. If we instead append the new argument to the end, all existing programs would continue to work and can conditionally extract the prev_state argument on supported kernel versions. Fixes: fa2c3254d7cf (sched/tracing: Don't re-read p->state when emitting sched_switch event, 2022-01-20) Signed-off-by: Delyan Kratunov <delyank@fb.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org> Link: https://lkml.kernel.org/r/c8a6930dfdd58a4a5755fc01732675472979732b.camel@fb.com
-rw-r--r--include/trace/events/sched.h6
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/trace/fgraph.c4
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/trace_events.c8
-rw-r--r--kernel/trace/trace_osnoise.c4
-rw-r--r--kernel/trace/trace_sched_switch.c4
-rw-r--r--kernel/trace/trace_sched_wakeup.c4
-rw-r--r--samples/trace_events/trace_custom_sched.h6
9 files changed, 21 insertions, 21 deletions
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 65e786756321..fbb99a61f714 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -222,11 +222,11 @@ static inline long __trace_sched_switch_state(bool preempt,
TRACE_EVENT(sched_switch,
TP_PROTO(bool preempt,
- unsigned int prev_state,
struct task_struct *prev,
- struct task_struct *next),
+ struct task_struct *next,
+ unsigned int prev_state),
- TP_ARGS(preempt, prev_state, prev, next),
+ TP_ARGS(preempt, prev, next, prev_state),
TP_STRUCT__entry(
__array( char, prev_comm, TASK_COMM_LEN )
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 51efaabac3e4..d58c0389eb23 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6382,7 +6382,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
migrate_disable_switch(rq, prev);
psi_sched_switch(prev, next, !task_on_rq_queued(prev));
- trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev_state, prev, next);
+ trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state);
/* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf);
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 8f4fb328133a..a7e84c8543cb 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -404,9 +404,9 @@ free:
static void
ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
- unsigned int prev_state,
struct task_struct *prev,
- struct task_struct *next)
+ struct task_struct *next,
+ unsigned int prev_state)
{
unsigned long long timestamp;
int index;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4f1d2f5e7263..af899b058c8d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -7420,9 +7420,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
static void
ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
- unsigned int prev_state,
struct task_struct *prev,
- struct task_struct *next)
+ struct task_struct *next,
+ unsigned int prev_state)
{
struct trace_array *tr = data;
struct trace_pid_list *pid_list;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index e11e167b7809..f97de82d1342 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -773,9 +773,9 @@ void trace_event_follow_fork(struct trace_array *tr, bool enable)
static void
event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
- unsigned int prev_state,
struct task_struct *prev,
- struct task_struct *next)
+ struct task_struct *next,
+ unsigned int prev_state)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
@@ -799,9 +799,9 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
static void
event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
- unsigned int prev_state,
struct task_struct *prev,
- struct task_struct *next)
+ struct task_struct *next,
+ unsigned int prev_state)
{
struct trace_array *tr = data;
struct trace_pid_list *no_pid_list;
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index e9ae1f33a7f0..afb92e2f0aea 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1168,9 +1168,9 @@ thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
*/
static void
trace_sched_switch_callback(void *data, bool preempt,
- unsigned int prev_state,
struct task_struct *p,
- struct task_struct *n)
+ struct task_struct *n,
+ unsigned int prev_state)
{
struct osnoise_variables *osn_var = this_cpu_osn_var();
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 45796d8bd4b2..c9ffdcfe622e 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -22,8 +22,8 @@ static DEFINE_MUTEX(sched_register_mutex);
static void
probe_sched_switch(void *ignore, bool preempt,
- unsigned int prev_state,
- struct task_struct *prev, struct task_struct *next)
+ struct task_struct *prev, struct task_struct *next,
+ unsigned int prev_state)
{
int flags;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 46429f9a96fa..330aee1c1a49 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -426,8 +426,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
static void notrace
probe_wakeup_sched_switch(void *ignore, bool preempt,
- unsigned int prev_state,
- struct task_struct *prev, struct task_struct *next)
+ struct task_struct *prev, struct task_struct *next,
+ unsigned int prev_state)
{
struct trace_array_cpu *data;
u64 T0, T1, delta;
diff --git a/samples/trace_events/trace_custom_sched.h b/samples/trace_events/trace_custom_sched.h
index 9fdd8e7c2a45..951388334a3f 100644
--- a/samples/trace_events/trace_custom_sched.h
+++ b/samples/trace_events/trace_custom_sched.h
@@ -25,11 +25,11 @@ TRACE_CUSTOM_EVENT(sched_switch,
* that the custom event is using.
*/
TP_PROTO(bool preempt,
- unsigned int prev_state,
struct task_struct *prev,
- struct task_struct *next),
+ struct task_struct *next,
+ unsigned int prev_state),
- TP_ARGS(preempt, prev_state, prev, next),
+ TP_ARGS(preempt, prev, next, prev_state),
/*
* The next fields are where the customization happens.