From bc82c38a6933aab308387d4aca47e0a05de7b553 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 11 Feb 2022 08:10:18 +0100 Subject: tracing: Uninline trace_trigger_soft_disabled() partly On a powerpc32 build with CONFIG_CC_OPTIMISE_FOR_SIZE, the inline keyword is not honored and trace_trigger_soft_disabled() appears approx 50 times in vmlinux. Adding -Winline to the build, the following message appears: ./include/linux/trace_events.h:712:1: error: inlining failed in call to 'trace_trigger_soft_disabled': call is unlikely and code size would grow [-Werror=inline] That function is rather big for an inlined function: c003df60 : c003df60: 94 21 ff f0 stwu r1,-16(r1) c003df64: 7c 08 02 a6 mflr r0 c003df68: 90 01 00 14 stw r0,20(r1) c003df6c: bf c1 00 08 stmw r30,8(r1) c003df70: 83 e3 00 24 lwz r31,36(r3) c003df74: 73 e9 01 00 andi. r9,r31,256 c003df78: 41 82 00 10 beq c003df88 c003df7c: 38 60 00 00 li r3,0 c003df80: 39 61 00 10 addi r11,r1,16 c003df84: 4b fd 60 ac b c0014030 <_rest32gpr_30_x> c003df88: 73 e9 00 80 andi. r9,r31,128 c003df8c: 7c 7e 1b 78 mr r30,r3 c003df90: 41 a2 00 14 beq c003dfa4 c003df94: 38 c0 00 00 li r6,0 c003df98: 38 a0 00 00 li r5,0 c003df9c: 38 80 00 00 li r4,0 c003dfa0: 48 05 c5 f1 bl c009a590 c003dfa4: 73 e9 00 40 andi. r9,r31,64 c003dfa8: 40 82 00 28 bne c003dfd0 c003dfac: 73 ff 02 00 andi. r31,r31,512 c003dfb0: 41 82 ff cc beq c003df7c c003dfb4: 80 01 00 14 lwz r0,20(r1) c003dfb8: 83 e1 00 0c lwz r31,12(r1) c003dfbc: 7f c3 f3 78 mr r3,r30 c003dfc0: 83 c1 00 08 lwz r30,8(r1) c003dfc4: 7c 08 03 a6 mtlr r0 c003dfc8: 38 21 00 10 addi r1,r1,16 c003dfcc: 48 05 6f 6c b c0094f38 c003dfd0: 38 60 00 01 li r3,1 c003dfd4: 4b ff ff ac b c003df80 However it is located in a hot path so inlining it is important. But forcing inlining of the entire function by using __always_inline leads to increasing the text size by approx 20 kbytes. Instead, split the fonction in two parts, one part with the likely fast path, flagged __always_inline, and a second part out of line. With this change, on a powerpc32 with CONFIG_CC_OPTIMISE_FOR_SIZE vmlinux text increases by only 1,4 kbytes, which is partly compensated by a decrease of vmlinux data by 7 kbytes. On ppc64_defconfig which has CONFIG_CC_OPTIMISE_FOR_SPEED, this change reduces vmlinux text by more than 30 kbytes. Link: https://lkml.kernel.org/r/69ce0986a52d026d381d612801d978aa4f977460.1644563295.git.christophe.leroy@csgroup.eu Signed-off-by: Christophe Leroy Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'include/linux/trace_events.h') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 70c069aef02c..dcea51fb60e2 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -699,6 +699,8 @@ event_triggers_post_call(struct trace_event_file *file, bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); +bool __trace_trigger_soft_disabled(struct trace_event_file *file); + /** * trace_trigger_soft_disabled - do triggers and test if soft disabled * @file: The file pointer of the event to test @@ -708,20 +710,20 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); * triggers that require testing the fields, it will return true, * otherwise false. */ -static inline bool +static __always_inline bool trace_trigger_soft_disabled(struct trace_event_file *file) { unsigned long eflags = file->flags; - if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { - if (eflags & EVENT_FILE_FL_TRIGGER_MODE) - event_triggers_call(file, NULL, NULL, NULL); - if (eflags & EVENT_FILE_FL_SOFT_DISABLED) - return true; - if (eflags & EVENT_FILE_FL_PID_FILTER) - return trace_event_ignore_this_pid(file); - } - return false; + if (likely(!(eflags & (EVENT_FILE_FL_TRIGGER_MODE | + EVENT_FILE_FL_SOFT_DISABLED | + EVENT_FILE_FL_PID_FILTER)))) + return false; + + if (likely(eflags & EVENT_FILE_FL_TRIGGER_COND)) + return false; + + return __trace_trigger_soft_disabled(file); } #ifdef CONFIG_BPF_EVENTS -- cgit v1.2.3 From 3a73333fb370f7b65de9d94c53df503642bda789 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 3 Mar 2022 17:05:34 -0500 Subject: tracing: Add TRACE_CUSTOM_EVENT() macro To make it really easy to add custom events from modules, add a TRACE_CUSTOM_EVENT() macro that acts just like the TRACE_EVENT() macro, but creates a custom event to an already existing tracepoint. The trace_custom_sched.[ch] has been updated to use this new macro to show how simple it is. Link: https://lkml.kernel.org/r/20220303220625.738622494@goodmis.org Cc: Ingo Molnar Cc: Andrew Morton Cc: Joel Fernandes Cc: Peter Zijlstra Cc: Masami Hiramatsu Cc: Tom Zanussi Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 24 ++- include/trace/define_custom_trace.h | 77 +++++++++ include/trace/trace_custom_events.h | 221 +++++++++++++++++++++++++ samples/trace_events/Makefile | 2 +- samples/trace_events/trace_custom_sched.c | 259 +++--------------------------- samples/trace_events/trace_custom_sched.h | 95 +++++++++++ 6 files changed, 441 insertions(+), 237 deletions(-) create mode 100644 include/trace/define_custom_trace.h create mode 100644 include/trace/trace_custom_events.h create mode 100644 samples/trace_events/trace_custom_sched.h (limited to 'include/linux/trace_events.h') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 70c069aef02c..9b09fd633d48 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -315,6 +315,7 @@ enum { TRACE_EVENT_FL_KPROBE_BIT, TRACE_EVENT_FL_UPROBE_BIT, TRACE_EVENT_FL_EPROBE_BIT, + TRACE_EVENT_FL_CUSTOM_BIT, }; /* @@ -328,6 +329,9 @@ enum { * KPROBE - Event is a kprobe * UPROBE - Event is a uprobe * EPROBE - Event is an event probe + * CUSTOM - Event is a custom event (to be attached to an exsiting tracepoint) + * This is set when the custom event has not been attached + * to a tracepoint yet, then it is cleared when it is. */ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), @@ -339,6 +343,7 @@ enum { TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT), TRACE_EVENT_FL_EPROBE = (1 << TRACE_EVENT_FL_EPROBE_BIT), + TRACE_EVENT_FL_CUSTOM = (1 << TRACE_EVENT_FL_CUSTOM_BIT), }; #define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE) @@ -440,7 +445,9 @@ static inline bool bpf_prog_array_valid(struct trace_event_call *call) static inline const char * trace_event_name(struct trace_event_call *call) { - if (call->flags & TRACE_EVENT_FL_TRACEPOINT) + if (call->flags & TRACE_EVENT_FL_CUSTOM) + return call->name; + else if (call->flags & TRACE_EVENT_FL_TRACEPOINT) return call->tp ? call->tp->name : NULL; else return call->name; @@ -901,3 +908,18 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, #endif #endif /* _LINUX_TRACE_EVENT_H */ + +/* + * Note: we keep the TRACE_CUSTOM_EVENT outside the include file ifdef protection. + * This is due to the way trace custom events work. If a file includes two + * trace event headers under one "CREATE_CUSTOM_TRACE_EVENTS" the first include + * will override the TRACE_CUSTOM_EVENT and break the second include. + */ + +#ifndef TRACE_CUSTOM_EVENT + +#define DECLARE_CUSTOM_EVENT_CLASS(name, proto, args, tstruct, assign, print) +#define DEFINE_CUSTOM_EVENT(template, name, proto, args) +#define TRACE_CUSTOM_EVENT(name, proto, args, struct, assign, print) + +#endif /* ifdef TRACE_CUSTOM_EVENT (see note above) */ diff --git a/include/trace/define_custom_trace.h b/include/trace/define_custom_trace.h new file mode 100644 index 000000000000..5827a4c92c74 --- /dev/null +++ b/include/trace/define_custom_trace.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Trace files that want to automate creation of all tracepoints defined + * in their file should include this file. The following are macros that the + * trace file may define: + * + * TRACE_SYSTEM defines the system the tracepoint is for + * + * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h + * This macro may be defined to tell define_trace.h what file to include. + * Note, leave off the ".h". + * + * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace + * then this macro can define the path to use. Note, the path is relative to + * define_trace.h, not the file including it. Full path names for out of tree + * modules must be used. + */ + +#ifdef CREATE_CUSTOM_TRACE_EVENTS + +/* Prevent recursion */ +#undef CREATE_CUSTOM_TRACE_EVENTS + +#include + +#undef TRACE_CUSTOM_EVENT +#define TRACE_CUSTOM_EVENT(name, proto, args, tstruct, assign, print) + +#undef DEFINE_CUSTOM_EVENT +#define DEFINE_CUSTOM_EVENT(template, name, proto, args) + +#undef TRACE_INCLUDE +#undef __TRACE_INCLUDE + +#ifndef TRACE_INCLUDE_FILE +# define TRACE_INCLUDE_FILE TRACE_SYSTEM +# define UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifndef TRACE_INCLUDE_PATH +# define __TRACE_INCLUDE(system) +# define UNDEF_TRACE_INCLUDE_PATH +#else +# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h) +#endif + +# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system) + +/* Let the trace headers be reread */ +#define TRACE_CUSTOM_MULTI_READ + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#ifdef TRACEPOINTS_ENABLED +#include +#endif + +#undef TRACE_CUSTOM_EVENT +#undef DECLARE_CUSTOM_EVENT_CLASS +#undef DEFINE_CUSTOM_EVENT +#undef TRACE_CUSTOM_MULTI_READ + +/* Only undef what we defined in this file */ +#ifdef UNDEF_TRACE_INCLUDE_FILE +# undef TRACE_INCLUDE_FILE +# undef UNDEF_TRACE_INCLUDE_FILE +#endif + +#ifdef UNDEF_TRACE_INCLUDE_PATH +# undef TRACE_INCLUDE_PATH +# undef UNDEF_TRACE_INCLUDE_PATH +#endif + +/* We may be processing more files */ +#define CREATE_CUSTOM_TRACE_POINTS + +#endif /* CREATE_CUSTOM_TRACE_POINTS */ diff --git a/include/trace/trace_custom_events.h b/include/trace/trace_custom_events.h new file mode 100644 index 000000000000..b567c7202339 --- /dev/null +++ b/include/trace/trace_custom_events.h @@ -0,0 +1,221 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This is similar to the trace_events.h file, but is to only + * create custom trace events to be attached to existing tracepoints. + * Where as the TRACE_EVENT() macro (from trace_events.h) will create + * both the trace event and the tracepoint it will attach the event to, + * TRACE_CUSTOM_EVENT() is to create only a custom version of an existing + * trace event (created by TRACE_EVENT() or DEFINE_EVENT()), and will + * be placed in the "custom" system. + */ + +#include + +/* All custom events are placed in the custom group */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM custom + +#ifndef TRACE_SYSTEM_VAR +#define TRACE_SYSTEM_VAR TRACE_SYSTEM +#endif + +/* The init stage creates the system string and enum mappings */ + +#include "stages/init.h" + +#undef TRACE_CUSTOM_EVENT +#define TRACE_CUSTOM_EVENT(name, proto, args, tstruct, assign, print) \ + DECLARE_CUSTOM_EVENT_CLASS(name, \ + PARAMS(proto), \ + PARAMS(args), \ + PARAMS(tstruct), \ + PARAMS(assign), \ + PARAMS(print)); \ + DEFINE_CUSTOM_EVENT(name, name, PARAMS(proto), PARAMS(args)); + +/* Stage 1 creates the structure of the recorded event layout */ + +#include "stages/stage1_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ + struct trace_custom_event_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + char __data[]; \ + }; \ + \ + static struct trace_event_class custom_event_class_##name; + +#undef DEFINE_CUSTOM_EVENT +#define DEFINE_CUSTOM_EVENT(template, name, proto, args) \ + static struct trace_event_call __used \ + __attribute__((__aligned__(4))) custom_event_##name + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* Stage 2 creates the custom class */ + +#include "stages/stage2_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ + struct trace_custom_event_data_offsets_##call { \ + tstruct; \ + }; + +#undef DEFINE_CUSTOM_EVENT +#define DEFINE_CUSTOM_EVENT(template, name, proto, args) + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* Stage 3 create the way to print the custom event */ + +#include "stages/stage3_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +static notrace enum print_line_t \ +trace_custom_raw_output_##call(struct trace_iterator *iter, int flags, \ + struct trace_event *trace_event) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct trace_seq __maybe_unused *p = &iter->tmp_seq; \ + struct trace_custom_event_raw_##call *field; \ + int ret; \ + \ + field = (typeof(field))iter->ent; \ + \ + ret = trace_raw_output_prep(iter, trace_event); \ + if (ret != TRACE_TYPE_HANDLED) \ + return ret; \ + \ + trace_event_printf(iter, print); \ + \ + return trace_handle_return(s); \ +} \ +static struct trace_event_functions trace_custom_event_type_funcs_##call = { \ + .trace = trace_custom_raw_output_##call, \ +}; + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* Stage 4 creates the offset layout for the fields */ + +#include "stages/stage4_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, func, print) \ +static struct trace_event_fields trace_custom_event_fields_##call[] = { \ + tstruct \ + {} }; + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* Stage 5 creates the helper function for dynamic fields */ + +#include "stages/stage5_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +static inline notrace int trace_custom_event_get_offsets_##call( \ + struct trace_custom_event_data_offsets_##call *__data_offsets, proto) \ +{ \ + int __data_size = 0; \ + int __maybe_unused __item_length; \ + struct trace_custom_event_raw_##call __maybe_unused *entry; \ + \ + tstruct; \ + \ + return __data_size; \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* Stage 6 creates the probe function that records the event */ + +#include "stages/stage6_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ + \ +static notrace void \ +trace_custom_event_raw_event_##call(void *__data, proto) \ +{ \ + struct trace_event_file *trace_file = __data; \ + struct trace_custom_event_data_offsets_##call __maybe_unused __data_offsets; \ + struct trace_event_buffer fbuffer; \ + struct trace_custom_event_raw_##call *entry; \ + int __data_size; \ + \ + if (trace_trigger_soft_disabled(trace_file)) \ + return; \ + \ + __data_size = trace_custom_event_get_offsets_##call(&__data_offsets, args); \ + \ + entry = trace_event_buffer_reserve(&fbuffer, trace_file, \ + sizeof(*entry) + __data_size); \ + \ + if (!entry) \ + return; \ + \ + tstruct \ + \ + { assign; } \ + \ + trace_event_buffer_commit(&fbuffer); \ +} +/* + * The ftrace_test_custom_probe is compiled out, it is only here as a build time check + * to make sure that if the tracepoint handling changes, the ftrace probe will + * fail to compile unless it too is updated. + */ + +#undef DEFINE_CUSTOM_EVENT +#define DEFINE_CUSTOM_EVENT(template, call, proto, args) \ +static inline void ftrace_test_custom_probe_##call(void) \ +{ \ + check_trace_callback_type_##call(trace_custom_event_raw_event_##template); \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +/* Stage 7 creates the actual class and event structure for the custom event */ + +#include "stages/stage7_defines.h" + +#undef DECLARE_CUSTOM_EVENT_CLASS +#define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +static char custom_print_fmt_##call[] = print; \ +static struct trace_event_class __used __refdata custom_event_class_##call = { \ + .system = TRACE_SYSTEM_STRING, \ + .fields_array = trace_custom_event_fields_##call, \ + .fields = LIST_HEAD_INIT(custom_event_class_##call.fields),\ + .raw_init = trace_event_raw_init, \ + .probe = trace_custom_event_raw_event_##call, \ + .reg = trace_event_reg, \ +}; + +#undef DEFINE_CUSTOM_EVENT +#define DEFINE_CUSTOM_EVENT(template, call, proto, args) \ + \ +static struct trace_event_call __used custom_event_##call = { \ + .name = #call, \ + .class = &custom_event_class_##template, \ + .event.funcs = &trace_custom_event_type_funcs_##template, \ + .print_fmt = custom_print_fmt_##template, \ + .flags = TRACE_EVENT_FL_CUSTOM, \ +}; \ +static inline int trace_custom_event_##call##_update(struct tracepoint *tp) \ +{ \ + if (tp->name && strcmp(tp->name, #call) == 0) { \ + custom_event_##call.tp = tp; \ + custom_event_##call.flags = TRACE_EVENT_FL_TRACEPOINT; \ + return 1; \ + } \ + return 0; \ +} \ +static struct trace_event_call __used \ +__section("_ftrace_events") *__custom_event_##call = &custom_event_##call + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile index e98afc447fe1..b3808bb4cf8b 100644 --- a/samples/trace_events/Makefile +++ b/samples/trace_events/Makefile @@ -11,7 +11,7 @@ # Here trace-events-sample.c does the CREATE_TRACE_POINTS. # CFLAGS_trace-events-sample.o := -I$(src) +CFLAGS_trace_custom_sched.o := -I$(src) obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o - obj-$(CONFIG_SAMPLE_TRACE_CUSTOM_EVENTS) += trace_custom_sched.o diff --git a/samples/trace_events/trace_custom_sched.c b/samples/trace_events/trace_custom_sched.c index 70a12c32ff99..b99d9ab7db85 100644 --- a/samples/trace_events/trace_custom_sched.c +++ b/samples/trace_events/trace_custom_sched.c @@ -11,256 +11,45 @@ #include #include #include -#include - -#define THIS_SYSTEM "custom_sched" - -#define SCHED_PRINT_FMT \ - C("prev_prio=%d next_pid=%d next_prio=%d", REC->prev_prio, REC->next_pid, \ - REC->next_prio) - -#define SCHED_WAKING_FMT \ - C("pid=%d prio=%d", REC->pid, REC->prio) - -#undef C -#define C(a, b...) a, b - -static struct trace_event_fields sched_switch_fields[] = { - { - .type = "unsigned short", - .name = "prev_prio", - .size = sizeof(short), - .align = __alignof__(short), - .is_signed = 0, - .filter_type = FILTER_OTHER, - }, - { - .type = "unsigned short", - .name = "next_prio", - .size = sizeof(short), - .align = __alignof__(short), - .is_signed = 0, - .filter_type = FILTER_OTHER, - }, - { - .type = "unsigned int", - .name = "next_prio", - .size = sizeof(int), - .align = __alignof__(int), - .is_signed = 0, - .filter_type = FILTER_OTHER, - }, - {} -}; - -struct sched_event { - struct trace_entry ent; - unsigned short prev_prio; - unsigned short next_prio; - unsigned int next_pid; -}; - -static struct trace_event_fields sched_waking_fields[] = { - { - .type = "unsigned int", - .name = "pid", - .size = sizeof(int), - .align = __alignof__(int), - .is_signed = 0, - .filter_type = FILTER_OTHER, - }, - { - .type = "unsigned short", - .name = "prio", - .size = sizeof(short), - .align = __alignof__(short), - .is_signed = 0, - .filter_type = FILTER_OTHER, - }, - {} -}; - -struct wake_event { - struct trace_entry ent; - unsigned int pid; - unsigned short prio; -}; - -static void sched_switch_probe(void *data, bool preempt, struct task_struct *prev, - struct task_struct *next) -{ - struct trace_event_file *trace_file = data; - struct trace_event_buffer fbuffer; - struct sched_event *entry; - - if (trace_trigger_soft_disabled(trace_file)) - return; - - entry = trace_event_buffer_reserve(&fbuffer, trace_file, - sizeof(*entry)); - - if (!entry) - return; - - entry->prev_prio = prev->prio; - entry->next_prio = next->prio; - entry->next_pid = next->pid; - - trace_event_buffer_commit(&fbuffer); -} - -static struct trace_event_class sched_switch_class = { - .system = THIS_SYSTEM, - .reg = trace_event_reg, - .fields_array = sched_switch_fields, - .fields = LIST_HEAD_INIT(sched_switch_class.fields), - .probe = sched_switch_probe, -}; - -static void sched_waking_probe(void *data, struct task_struct *t) -{ - struct trace_event_file *trace_file = data; - struct trace_event_buffer fbuffer; - struct wake_event *entry; - - if (trace_trigger_soft_disabled(trace_file)) - return; - - entry = trace_event_buffer_reserve(&fbuffer, trace_file, - sizeof(*entry)); - - if (!entry) - return; - - entry->prio = t->prio; - entry->pid = t->pid; - - trace_event_buffer_commit(&fbuffer); -} - -static struct trace_event_class sched_waking_class = { - .system = THIS_SYSTEM, - .reg = trace_event_reg, - .fields_array = sched_waking_fields, - .fields = LIST_HEAD_INIT(sched_waking_class.fields), - .probe = sched_waking_probe, -}; - -static enum print_line_t sched_switch_output(struct trace_iterator *iter, int flags, - struct trace_event *trace_event) -{ - struct trace_seq *s = &iter->seq; - struct sched_event *REC = (struct sched_event *)iter->ent; - int ret; - - ret = trace_raw_output_prep(iter, trace_event); - if (ret != TRACE_TYPE_HANDLED) - return ret; - - trace_seq_printf(s, SCHED_PRINT_FMT); - trace_seq_putc(s, '\n'); - return trace_handle_return(s); -} - -static struct trace_event_functions sched_switch_funcs = { - .trace = sched_switch_output, -}; - -static enum print_line_t sched_waking_output(struct trace_iterator *iter, int flags, - struct trace_event *trace_event) -{ - struct trace_seq *s = &iter->seq; - struct wake_event *REC = (struct wake_event *)iter->ent; - int ret; - - ret = trace_raw_output_prep(iter, trace_event); - if (ret != TRACE_TYPE_HANDLED) - return ret; - - trace_seq_printf(s, SCHED_WAKING_FMT); - trace_seq_putc(s, '\n'); - - return trace_handle_return(s); -} - -static struct trace_event_functions sched_waking_funcs = { - .trace = sched_waking_output, -}; - -#undef C -#define C(a, b...) #a "," __stringify(b) +/* + * Must include the event header that the custom event will attach to, + * from the C file, and not in the custom header file. + */ +#include -static struct trace_event_call sched_switch_call = { - .class = &sched_switch_class, - .event = { - .funcs = &sched_switch_funcs, - }, - .print_fmt = SCHED_PRINT_FMT, - .module = THIS_MODULE, - .flags = TRACE_EVENT_FL_TRACEPOINT, -}; +/* Declare CREATE_CUSTOM_TRACE_EVENTS before including custom header */ +#define CREATE_CUSTOM_TRACE_EVENTS -static struct trace_event_call sched_waking_call = { - .class = &sched_waking_class, - .event = { - .funcs = &sched_waking_funcs, - }, - .print_fmt = SCHED_WAKING_FMT, - .module = THIS_MODULE, - .flags = TRACE_EVENT_FL_TRACEPOINT, -}; +#include "trace_custom_sched.h" +/* + * As the trace events are not exported to modules, the use of + * for_each_kernel_tracepoint() is needed to find the trace event + * to attach to. The fct() function below, is a callback that + * will be called for every event. + * + * Helper functions are created by the TRACE_CUSTOM_EVENT() macro + * update the event. Those are of the form: + * + * trace_custom_event__update() + * + * Where is the event to attach. + */ static void fct(struct tracepoint *tp, void *priv) { - if (tp->name && strcmp(tp->name, "sched_switch") == 0) - sched_switch_call.tp = tp; - else if (tp->name && strcmp(tp->name, "sched_waking") == 0) - sched_waking_call.tp = tp; -} - -static int add_event(struct trace_event_call *call) -{ - int ret; - - ret = register_trace_event(&call->event); - if (WARN_ON(!ret)) - return -ENODEV; - - ret = trace_add_event_call(call); - if (WARN_ON(ret)) - unregister_trace_event(&call->event); - - return ret; + trace_custom_event_sched_switch_update(tp); + trace_custom_event_sched_waking_update(tp); } static int __init trace_sched_init(void) { - int ret; - - check_trace_callback_type_sched_switch(sched_switch_probe); - check_trace_callback_type_sched_waking(sched_waking_probe); - for_each_kernel_tracepoint(fct, NULL); - - ret = add_event(&sched_switch_call); - if (ret) - return ret; - - ret = add_event(&sched_waking_call); - if (ret) - trace_remove_event_call(&sched_switch_call); - - return ret; + return 0; } static void __exit trace_sched_exit(void) { - trace_set_clr_event(THIS_SYSTEM, "sched_switch", 0); - trace_set_clr_event(THIS_SYSTEM, "sched_waking", 0); - - trace_remove_event_call(&sched_switch_call); - trace_remove_event_call(&sched_waking_call); } module_init(trace_sched_init); diff --git a/samples/trace_events/trace_custom_sched.h b/samples/trace_events/trace_custom_sched.h new file mode 100644 index 000000000000..a3d14de6a2e5 --- /dev/null +++ b/samples/trace_events/trace_custom_sched.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Like the headers that use TRACE_EVENT(), the TRACE_CUSTOM_EVENT() + * needs a header that allows for multiple inclusions. + * + * Test for a unique name (here we have _TRACE_CUSTOM_SCHED_H), + * also allowing to continue if TRACE_CUSTOM_MULTI_READ is defined. + */ +#if !defined(_TRACE_CUSTOM_SCHED_H) || defined(TRACE_CUSTOM_MULTI_READ) +#define _TRACE_CUSTOM_SCHED_H + +/* Include linux/trace_events.h for initial defines of TRACE_CUSTOM_EVENT() */ +#include + +/* + * TRACE_CUSTOM_EVENT() is just like TRACE_EVENT(). The first parameter + * is the event name of an existing event where the TRACE_EVENT has been included + * in the C file before including this file. + */ +TRACE_CUSTOM_EVENT(sched_switch, + + /* + * The TP_PROTO() and TP_ARGS must match the trace event + * that the custom event is using. + */ + TP_PROTO(bool preempt, + struct task_struct *prev, + struct task_struct *next), + + TP_ARGS(preempt, prev, next), + + /* + * The next fields are where the customization happens. + * The TP_STRUCT__entry() defines what will be recorded + * in the ring buffer when the custom event triggers. + * + * The rest is just like the TRACE_EVENT() macro except that + * it uses the custom entry. + */ + TP_STRUCT__entry( + __field( unsigned short, prev_prio ) + __field( unsigned short, next_prio ) + __field( pid_t, next_pid ) + ), + + TP_fast_assign( + __entry->prev_prio = prev->prio; + __entry->next_pid = next->pid; + __entry->next_prio = next->prio; + ), + + TP_printk("prev_prio=%d next_pid=%d next_prio=%d", + __entry->prev_prio, __entry->next_pid, __entry->next_prio) +) + + +TRACE_CUSTOM_EVENT(sched_waking, + + TP_PROTO(struct task_struct *p), + + TP_ARGS(p), + + TP_STRUCT__entry( + __field( pid_t, pid ) + __field( unsigned short, prio ) + ), + + TP_fast_assign( + __entry->pid = p->pid; + __entry->prio = p->prio; + ), + + TP_printk("pid=%d prio=%d", __entry->pid, __entry->prio) +) +#endif +/* + * Just like the headers that create TRACE_EVENTs, the below must + * be outside the protection of the above #if block. + */ + +/* + * It is required that the Makefile includes: + * CFLAGS_.o := -I$(src) + */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . + +/* + * It is requred that the TRACE_INCLUDE_FILE be the same + * as this file without the ".h". + */ +#define TRACE_INCLUDE_FILE trace_custom_sched +#include -- cgit v1.2.3 From 0dcac272540613d41c05e89679e4ddb978b612f1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Mar 2022 13:24:09 +0100 Subject: bpf: Add multi kprobe link Adding new link type BPF_LINK_TYPE_KPROBE_MULTI that attaches kprobe program through fprobe API. The fprobe API allows to attach probe on multiple functions at once very fast, because it works on top of ftrace. On the other hand this limits the probe point to the function entry or return. The kprobe program gets the same pt_regs input ctx as when it's attached through the perf API. Adding new attach type BPF_TRACE_KPROBE_MULTI that allows attachment kprobe to multiple function with new link. User provides array of addresses or symbols with count to attach the kprobe program to. The new link_create uapi interface looks like: struct { __u32 flags; __u32 cnt; __aligned_u64 syms; __aligned_u64 addrs; } kprobe_multi; The flags field allows single BPF_TRACE_KPROBE_MULTI bit to create return multi kprobe. Signed-off-by: Masami Hiramatsu Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220316122419.933957-4-jolsa@kernel.org --- include/linux/bpf_types.h | 1 + include/linux/trace_events.h | 7 ++ include/uapi/linux/bpf.h | 13 +++ kernel/bpf/syscall.c | 26 ++++- kernel/trace/bpf_trace.c | 211 +++++++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 13 +++ 6 files changed, 266 insertions(+), 5 deletions(-) (limited to 'include/linux/trace_events.h') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 48a91c51c015..3e24ad0c4b3c 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -140,3 +140,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) #ifdef CONFIG_PERF_EVENTS BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf) #endif +BPF_LINK_TYPE(BPF_LINK_TYPE_KPROBE_MULTI, kprobe_multi) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index dcea51fb60e2..8f0e9e7cb493 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -15,6 +15,7 @@ struct array_buffer; struct tracer; struct dentry; struct bpf_prog; +union bpf_attr; const char *trace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, @@ -738,6 +739,7 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp); int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id, u32 *fd_type, const char **buf, u64 *probe_offset, u64 *probe_addr); +int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); #else static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { @@ -779,6 +781,11 @@ static inline int bpf_get_perf_event_info(const struct perf_event *event, { return -EOPNOTSUPP; } +static inline int +bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} #endif enum { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 99fab54ae9c0..d77f47af7752 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -997,6 +997,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, + BPF_TRACE_KPROBE_MULTI, __MAX_BPF_ATTACH_TYPE }; @@ -1011,6 +1012,7 @@ enum bpf_link_type { BPF_LINK_TYPE_NETNS = 5, BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, + BPF_LINK_TYPE_KPROBE_MULTI = 8, MAX_BPF_LINK_TYPE, }; @@ -1118,6 +1120,11 @@ enum bpf_link_type { */ #define BPF_F_XDP_HAS_FRAGS (1U << 5) +/* link_create.kprobe_multi.flags used in LINK_CREATE command for + * BPF_TRACE_KPROBE_MULTI attach type to create return probe. + */ +#define BPF_F_KPROBE_MULTI_RETURN (1U << 0) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * @@ -1475,6 +1482,12 @@ union bpf_attr { */ __u64 bpf_cookie; } perf_event; + struct { + __u32 flags; + __u32 cnt; + __aligned_u64 syms; + __aligned_u64 addrs; + } kprobe_multi; }; } link_create; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 9beb585be5a6..b8bb67ee6c57 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -32,6 +32,7 @@ #include #include #include +#include #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ @@ -3022,6 +3023,11 @@ out_put_file: fput(perf_file); return err; } +#else +static int bpf_perf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_PERF_EVENTS */ #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd @@ -4255,7 +4261,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, bpfptr_t uattr, return -EINVAL; } -#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len +#define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe_multi.addrs static int link_create(union bpf_attr *attr, bpfptr_t uattr) { enum bpf_prog_type ptype; @@ -4279,7 +4285,6 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) ret = tracing_bpf_link_attach(attr, uattr, prog); goto out; case BPF_PROG_TYPE_PERF_EVENT: - case BPF_PROG_TYPE_KPROBE: case BPF_PROG_TYPE_TRACEPOINT: if (attr->link_create.attach_type != BPF_PERF_EVENT) { ret = -EINVAL; @@ -4287,6 +4292,14 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) } ptype = prog->type; break; + case BPF_PROG_TYPE_KPROBE: + if (attr->link_create.attach_type != BPF_PERF_EVENT && + attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) { + ret = -EINVAL; + goto out; + } + ptype = prog->type; + break; default: ptype = attach_type_to_prog_type(attr->link_create.attach_type); if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) { @@ -4318,13 +4331,16 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) ret = bpf_xdp_link_attach(attr, prog); break; #endif -#ifdef CONFIG_PERF_EVENTS case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_TRACEPOINT: - case BPF_PROG_TYPE_KPROBE: ret = bpf_perf_link_attach(attr, prog); break; -#endif + case BPF_PROG_TYPE_KPROBE: + if (attr->link_create.attach_type == BPF_PERF_EVENT) + ret = bpf_perf_link_attach(attr, prog); + else + ret = bpf_kprobe_multi_link_attach(attr, prog); + break; default: ret = -EINVAL; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index a2024ba32a20..fffa2171fae4 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -2181,3 +2182,213 @@ static int __init bpf_event_init(void) fs_initcall(bpf_event_init); #endif /* CONFIG_MODULES */ + +#ifdef CONFIG_FPROBE +struct bpf_kprobe_multi_link { + struct bpf_link link; + struct fprobe fp; + unsigned long *addrs; +}; + +static void bpf_kprobe_multi_link_release(struct bpf_link *link) +{ + struct bpf_kprobe_multi_link *kmulti_link; + + kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); + unregister_fprobe(&kmulti_link->fp); +} + +static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link) +{ + struct bpf_kprobe_multi_link *kmulti_link; + + kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); + kvfree(kmulti_link->addrs); + kfree(kmulti_link); +} + +static const struct bpf_link_ops bpf_kprobe_multi_link_lops = { + .release = bpf_kprobe_multi_link_release, + .dealloc = bpf_kprobe_multi_link_dealloc, +}; + +static int +kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link, + struct pt_regs *regs) +{ + int err; + + if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { + err = 0; + goto out; + } + + migrate_disable(); + rcu_read_lock(); + err = bpf_prog_run(link->link.prog, regs); + rcu_read_unlock(); + migrate_enable(); + + out: + __this_cpu_dec(bpf_prog_active); + return err; +} + +static void +kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip, + struct pt_regs *regs) +{ + unsigned long saved_ip = instruction_pointer(regs); + struct bpf_kprobe_multi_link *link; + + /* + * Because fprobe's regs->ip is set to the next instruction of + * dynamic-ftrace instruction, correct entry ip must be set, so + * that the bpf program can access entry address via regs as same + * as kprobes. + * + * Both kprobe and kretprobe see the entry ip of traced function + * as instruction pointer. + */ + instruction_pointer_set(regs, entry_ip); + + link = container_of(fp, struct bpf_kprobe_multi_link, fp); + kprobe_multi_link_prog_run(link, regs); + + instruction_pointer_set(regs, saved_ip); +} + +static int +kprobe_multi_resolve_syms(const void *usyms, u32 cnt, + unsigned long *addrs) +{ + unsigned long addr, size; + const char **syms; + int err = -ENOMEM; + unsigned int i; + char *func; + + size = cnt * sizeof(*syms); + syms = kvzalloc(size, GFP_KERNEL); + if (!syms) + return -ENOMEM; + + func = kmalloc(KSYM_NAME_LEN, GFP_KERNEL); + if (!func) + goto error; + + if (copy_from_user(syms, usyms, size)) { + err = -EFAULT; + goto error; + } + + for (i = 0; i < cnt; i++) { + err = strncpy_from_user(func, syms[i], KSYM_NAME_LEN); + if (err == KSYM_NAME_LEN) + err = -E2BIG; + if (err < 0) + goto error; + err = -EINVAL; + addr = kallsyms_lookup_name(func); + if (!addr) + goto error; + if (!kallsyms_lookup_size_offset(addr, &size, NULL)) + goto error; + addr = ftrace_location_range(addr, addr + size - 1); + if (!addr) + goto error; + addrs[i] = addr; + } + + err = 0; +error: + kvfree(syms); + kfree(func); + return err; +} + +int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + struct bpf_kprobe_multi_link *link = NULL; + struct bpf_link_primer link_primer; + unsigned long *addrs; + u32 flags, cnt, size; + void __user *uaddrs; + void __user *usyms; + int err; + + /* no support for 32bit archs yet */ + if (sizeof(u64) != sizeof(void *)) + return -EOPNOTSUPP; + + if (prog->expected_attach_type != BPF_TRACE_KPROBE_MULTI) + return -EINVAL; + + flags = attr->link_create.kprobe_multi.flags; + if (flags & ~BPF_F_KPROBE_MULTI_RETURN) + return -EINVAL; + + uaddrs = u64_to_user_ptr(attr->link_create.kprobe_multi.addrs); + usyms = u64_to_user_ptr(attr->link_create.kprobe_multi.syms); + if (!!uaddrs == !!usyms) + return -EINVAL; + + cnt = attr->link_create.kprobe_multi.cnt; + if (!cnt) + return -EINVAL; + + size = cnt * sizeof(*addrs); + addrs = kvmalloc(size, GFP_KERNEL); + if (!addrs) + return -ENOMEM; + + if (uaddrs) { + if (copy_from_user(addrs, uaddrs, size)) { + err = -EFAULT; + goto error; + } + } else { + err = kprobe_multi_resolve_syms(usyms, cnt, addrs); + if (err) + goto error; + } + + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) { + err = -ENOMEM; + goto error; + } + + bpf_link_init(&link->link, BPF_LINK_TYPE_KPROBE_MULTI, + &bpf_kprobe_multi_link_lops, prog); + + err = bpf_link_prime(&link->link, &link_primer); + if (err) + goto error; + + if (flags & BPF_F_KPROBE_MULTI_RETURN) + link->fp.exit_handler = kprobe_multi_link_handler; + else + link->fp.entry_handler = kprobe_multi_link_handler; + + link->addrs = addrs; + + err = register_fprobe_ips(&link->fp, addrs, cnt); + if (err) { + bpf_link_cleanup(&link_primer); + return err; + } + + return bpf_link_settle(&link_primer); + +error: + kfree(link); + kvfree(addrs); + return err; +} +#else /* !CONFIG_FPROBE */ +int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} +#endif diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 99fab54ae9c0..d77f47af7752 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -997,6 +997,7 @@ enum bpf_attach_type { BPF_SK_REUSEPORT_SELECT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, BPF_PERF_EVENT, + BPF_TRACE_KPROBE_MULTI, __MAX_BPF_ATTACH_TYPE }; @@ -1011,6 +1012,7 @@ enum bpf_link_type { BPF_LINK_TYPE_NETNS = 5, BPF_LINK_TYPE_XDP = 6, BPF_LINK_TYPE_PERF_EVENT = 7, + BPF_LINK_TYPE_KPROBE_MULTI = 8, MAX_BPF_LINK_TYPE, }; @@ -1118,6 +1120,11 @@ enum bpf_link_type { */ #define BPF_F_XDP_HAS_FRAGS (1U << 5) +/* link_create.kprobe_multi.flags used in LINK_CREATE command for + * BPF_TRACE_KPROBE_MULTI attach type to create return probe. + */ +#define BPF_F_KPROBE_MULTI_RETURN (1U << 0) + /* When BPF ldimm64's insn[0].src_reg != 0 then this can have * the following extensions: * @@ -1475,6 +1482,12 @@ union bpf_attr { */ __u64 bpf_cookie; } perf_event; + struct { + __u32 flags; + __u32 cnt; + __aligned_u64 syms; + __aligned_u64 addrs; + } kprobe_multi; }; } link_create; -- cgit v1.2.3 From 0563231f93c6d1f582b168a47753b345c1e20d81 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:44:54 -0400 Subject: tracing/events: Add __vstring() and __assign_vstr() helper macros There's several places that open code the following logic: TP_STRUCT__entry(__dynamic_array(char, msg, MSG_MAX)), TP_fast_assign(vsnprintf(__get_str(msg), MSG_MAX, vaf->fmt, *vaf->va);) To load a string created by variable array va_list. The main issue with this approach is that "MSG_MAX" usage in the __dynamic_array() portion. That actually just reserves the MSG_MAX in the event, and even wastes space because there's dynamic meta data also saved in the event to denote the offset and size of the dynamic array. It would have been better to just use a static __array() field. Instead, create __vstring() and __assign_vstr() that work like __string and __assign_str() but instead of taking a destination string to copy, take a format string and a va_list pointer and fill in the values. It uses the helper: #define __trace_event_vstr_len(fmt, va) \ ({ \ va_list __ap; \ int __ret; \ \ va_copy(__ap, *(va)); \ __ret = vsnprintf(NULL, 0, fmt, __ap) + 1; \ va_end(__ap); \ \ min(__ret, TRACE_EVENT_STR_MAX); \ }) To figure out the length to store the string. It may be slightly slower as it needs to run the vsnprintf() twice, but it now saves space on the ring buffer. Link: https://lkml.kernel.org/r/20220705224749.053570613@goodmis.org Cc: Dennis Dalessandro Cc: Ingo Molnar Cc: Andrew Morton Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: Kalle Valo Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Arend van Spriel Cc: Franky Lin Cc: Hante Meuleman Cc: Gregory Greenman Cc: Peter Chen Cc: Greg Kroah-Hartman Cc: Mathias Nyman Cc: Chunfeng Yun Cc: Bin Liu Cc: Marek Lindner Cc: Simon Wunderlich Cc: Antonio Quartulli Cc: Sven Eckelmann Cc: Johannes Berg Cc: Jim Cromie Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 18 ++++++++++++++++++ include/trace/stages/stage1_struct_define.h | 3 +++ include/trace/stages/stage2_data_offsets.h | 3 +++ include/trace/stages/stage4_event_fields.h | 3 +++ include/trace/stages/stage5_get_offsets.h | 4 ++++ include/trace/stages/stage6_event_callback.h | 7 +++++++ 6 files changed, 38 insertions(+) (limited to 'include/linux/trace_events.h') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index e6e95a9f07a5..b18759a673c6 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -916,6 +916,24 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, #endif +#define TRACE_EVENT_STR_MAX 512 + +/* + * gcc warns that you can not use a va_list in an inlined + * function. But lets me make it into a macro :-/ + */ +#define __trace_event_vstr_len(fmt, va) \ +({ \ + va_list __ap; \ + int __ret; \ + \ + va_copy(__ap, *(va)); \ + __ret = vsnprintf(NULL, 0, fmt, __ap) + 1; \ + va_end(__ap); \ + \ + min(__ret, TRACE_EVENT_STR_MAX); \ +}) + #endif /* _LINUX_TRACE_EVENT_H */ /* diff --git a/include/trace/stages/stage1_struct_define.h b/include/trace/stages/stage1_struct_define.h index a16783419687..1b7bab60434c 100644 --- a/include/trace/stages/stage1_struct_define.h +++ b/include/trace/stages/stage1_struct_define.h @@ -26,6 +26,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) diff --git a/include/trace/stages/stage2_data_offsets.h b/include/trace/stages/stage2_data_offsets.h index 42fd1e8813ec..1b7a8f764fdd 100644 --- a/include/trace/stages/stage2_data_offsets.h +++ b/include/trace/stages/stage2_data_offsets.h @@ -32,6 +32,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h index e80cdc397a43..c3790ec7a453 100644 --- a/include/trace/stages/stage4_event_fields.h +++ b/include/trace/stages/stage4_event_fields.h @@ -38,6 +38,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h index 7ee5931300e6..fba4c24ed9e6 100644 --- a/include/trace/stages/stage5_get_offsets.h +++ b/include/trace/stages/stage5_get_offsets.h @@ -39,6 +39,10 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, \ + __trace_event_vstr_len(fmt, ap)) + #undef __rel_dynamic_array #define __rel_dynamic_array(type, item, len) \ __item_length = (len) * sizeof(type); \ diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index e1724f73594b..0f51f6b3ab70 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -24,6 +24,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __assign_str #define __assign_str(dst, src) \ strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)"); @@ -35,6 +38,10 @@ __get_str(dst)[len] = '\0'; \ } while(0) +#undef __assign_vstr +#define __assign_vstr(dst, fmt, va) \ + vsnprintf(__get_str(dst), TRACE_EVENT_STR_MAX, fmt, *(va)) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) -- cgit v1.2.3 From dcf8e5633e2e69ad60b730ab5905608b756a032f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 23 Aug 2022 12:59:25 -0700 Subject: tracing: Define the is_signed_type() macro once There are two definitions of the is_signed_type() macro: one in and a second definition in . As suggested by Linus, move the definition of the is_signed_type() macro into the header file. Change the definition of the is_signed_type() macro to make sure that it does not trigger any sparse warnings with future versions of sparse for bitwise types. Link: https://lore.kernel.org/all/CAHk-=whjH6p+qzwUdx5SOVVHjS3WvzJQr6mDUwhEyTf6pJWzaQ@mail.gmail.com/ Link: https://lore.kernel.org/all/CAHk-=wjQGnVfb4jehFR0XyZikdQvCZouE96xR_nnf5kqaM5qqQ@mail.gmail.com/ Cc: Rasmus Villemoes Cc: Steven Rostedt Acked-by: Kees Cook Signed-off-by: Bart Van Assche Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 6 ++++++ include/linux/overflow.h | 1 - include/linux/trace_events.h | 2 -- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux/trace_events.h') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 01ce94b58b42..7713d7bcdaea 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -239,6 +239,12 @@ static inline void *offset_to_ptr(const int *off) /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +/* + * Whether 'type' is a signed type or an unsigned type. Supports scalar types, + * bool and also pointer types. + */ +#define is_signed_type(type) (((type)(-1)) < (__force type)1) + /* * This is needed in functions which generate the stack canary, see * arch/x86/kernel/smpboot.c::start_secondary() for an example. diff --git a/include/linux/overflow.h b/include/linux/overflow.h index f1221d11f8e5..0eb3b192f07a 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -30,7 +30,6 @@ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html - * credit to Christian Biere. */ -#define is_signed_type(type) (((type)(-1)) < (type)1) #define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type))) #define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) #define type_min(T) ((T)((T)-type_max(T)-(T)1)) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index b18759a673c6..8401dec93c15 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -814,8 +814,6 @@ extern int trace_add_event_call(struct trace_event_call *call); extern int trace_remove_event_call(struct trace_event_call *call); extern int trace_event_get_offsets(struct trace_event_call *call); -#define is_signed_type(type) (((type)(-1)) < (type)1) - int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); int trace_set_clr_event(const char *system, const char *event, int set); int trace_array_set_clr_event(struct trace_array *tr, const char *system, -- cgit v1.2.3 From f3ddb74ad0790030c9592229fb14d8c451f4e9a8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 27 Sep 2022 19:15:27 -0400 Subject: tracing: Wake up ring buffer waiters on closing of the file When the file that represents the ring buffer is closed, there may be waiters waiting on more input from the ring buffer. Call ring_buffer_wake_waiters() to wake up any waiters when the file is closed. Link: https://lkml.kernel.org/r/20220927231825.182416969@goodmis.org Cc: stable@vger.kernel.org Cc: Ingo Molnar Cc: Andrew Morton Fixes: e30f53aad2202 ("tracing: Do not busy wait in buffer splice") Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 1 + kernel/trace/trace.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux/trace_events.h') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 8401dec93c15..20749bd9db71 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -92,6 +92,7 @@ struct trace_iterator { unsigned int temp_size; char *fmt; /* modified format holder */ unsigned int fmt_size; + long wait_index; /* trace_seq for __print_flags() and __print_symbolic() etc. */ struct trace_seq tmp_seq; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index aed7ea6e6045..e101b0764b39 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8160,6 +8160,12 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) __trace_array_put(iter->tr); + iter->wait_index++; + /* Make sure the waiters see the new wait_index */ + smp_wmb(); + + ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file); + if (info->spare) ring_buffer_free_read_page(iter->array_buffer->buffer, info->spare_cpu, info->spare); @@ -8313,6 +8319,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, /* did we read anything? */ if (!spd.nr_pages) { + long wait_index; + if (ret) goto out; @@ -8320,10 +8328,17 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) goto out; + wait_index = READ_ONCE(iter->wait_index); + ret = wait_on_pipe(iter, iter->tr->buffer_percent); if (ret) goto out; + /* Make sure we see the new wait_index */ + smp_rmb(); + if (wait_index != iter->wait_index) + goto out; + goto again; } -- cgit v1.2.3