summaryrefslogtreecommitdiff
path: root/kernel/trace/trace_functions_graph.c
diff options
context:
space:
mode:
authorSteven Rostedt <rostedt@goodmis.org>2024-09-14 17:48:07 -0400
committerSteven Rostedt (Google) <rostedt@goodmis.org>2024-09-30 11:12:46 -0400
commit3c9880f3ab52b52b5b4e1850a70e80dd7329cb4c (patch)
treed452cbcf7c5341bba2a86315963d793709cdf468 /kernel/trace/trace_functions_graph.c
parenta312a0f7834e605e7c41570f0e9525d0fc4a70a4 (diff)
downloadlwn-3c9880f3ab52b52b5b4e1850a70e80dd7329cb4c.tar.gz
lwn-3c9880f3ab52b52b5b4e1850a70e80dd7329cb4c.zip
ftrace: Use a running sleeptime instead of saving on shadow stack
The fgraph "sleep-time" option tells the function graph tracer and the profiler whether to include the time a function "sleeps" (is scheduled off the CPU) in its duration for the function. By default it is true, which means the duration of a function is calculated by the timestamp of when the function was entered to the timestamp of when it exits. If the "sleep-time" option is disabled, it needs to remove the time that the task was not running on the CPU during the function. Currently it is done in a sched_switch tracepoint probe where it moves the "calltime" (time of entry of the function) forward by the sleep time calculated. It updates all the calltime in the shadow stack. This is time consuming for those users of the function graph tracer that does not care about the sleep time. Instead, add a "ftrace_sleeptime" to the task_struct that gets the sleep time added each time the task wakes up. Then have the function entry save the current "ftrace_sleeptime" and on function exit, move the calltime forward by the difference of the current "ftrace_sleeptime" from the saved sleeptime. This removes one dependency of "calltime" needed to be on the shadow stack. It also simplifies the code that removes the sleep time of functions. TODO: Only enable the sched_switch tracepoint when this is needed. Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Jiri Olsa <olsajiri@gmail.com> Link: https://lore.kernel.org/20240914214826.938908568@goodmis.org Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Diffstat (limited to 'kernel/trace/trace_functions_graph.c')
-rw-r--r--kernel/trace/trace_functions_graph.c28
1 files changed, 28 insertions, 0 deletions
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index a569daaac4c4..bbd898f5a73c 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -133,6 +133,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
+ unsigned long *sleeptime;
unsigned long flags;
unsigned int trace_ctx;
long disabled;
@@ -167,6 +168,13 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
if (ftrace_graph_ignore_irqs())
return 0;
+ /* save the current sleep time if we are to ignore it */
+ if (!fgraph_sleep_time) {
+ sleeptime = fgraph_reserve_data(gops->idx, sizeof(*sleeptime));
+ if (sleeptime)
+ *sleeptime = current->ftrace_sleeptime;
+ }
+
/*
* Stop here if tracing_threshold is set. We only write function return
* events to the ring buffer.
@@ -238,6 +246,22 @@ void __trace_graph_return(struct trace_array *tr,
trace_buffer_unlock_commit_nostack(buffer, event);
}
+static void handle_nosleeptime(struct ftrace_graph_ret *trace,
+ struct fgraph_ops *gops)
+{
+ unsigned long long *sleeptime;
+ int size;
+
+ if (fgraph_sleep_time)
+ return;
+
+ sleeptime = fgraph_retrieve_data(gops->idx, &size);
+ if (!sleeptime)
+ return;
+
+ trace->calltime += current->ftrace_sleeptime - *sleeptime;
+}
+
void trace_graph_return(struct ftrace_graph_ret *trace,
struct fgraph_ops *gops)
{
@@ -256,6 +280,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
return;
}
+ handle_nosleeptime(trace, gops);
+
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
@@ -278,6 +304,8 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
return;
}
+ handle_nosleeptime(trace, gops);
+
if (tracing_thresh &&
(trace->rettime - trace->calltime < tracing_thresh))
return;