diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2018-03-28 22:55:20 +0200 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2018-03-28 22:55:21 +0200 |
commit | f6ef56589374670b7c1939720dfa00212bd80a5b (patch) | |
tree | f8f5e66c8fba220b34783b0d38518192bc53bf39 /include | |
parent | 6f5c39fa5cd4a78c5432021e981aa8f79437a32c (diff) | |
parent | 3bbe0869884ceebffd59d5519c1d560207c6e116 (diff) | |
download | lwn-f6ef56589374670b7c1939720dfa00212bd80a5b.tar.gz lwn-f6ef56589374670b7c1939720dfa00212bd80a5b.zip |
Merge branch 'bpf-raw-tracepoints'
Alexei Starovoitov says:
====================
v7->v8:
- moved 'u32 num_args' from 'struct tracepoint' into 'struct bpf_raw_event_map'
that increases memory overhead, but can be optimized/compressed later.
Now it's zero changes in tracepoint.[ch]
v6->v7:
- adopted Steven's bpf_raw_tp_map section approach to find tracepoint
and corresponding bpf probe function instead of kallsyms approach.
dropped kernel_tracepoint_find_by_name() patch
v5->v6:
- avoid changing semantics of for_each_kernel_tracepoint() function, instead
introduce kernel_tracepoint_find_by_name() helper
v4->v5:
- adopted Daniel's fancy REPEAT macro in bpf_trace.c in patch 6
v3->v4:
- adopted Linus's CAST_TO_U64 macro to cast any integer, pointer, or small
struct to u64. That nicely reduced the size of patch 1
v2->v3:
- with Linus's suggestion introduced generic COUNT_ARGS and CONCATENATE macros
(or rather moved them from apparmor)
that cleaned up patch 6
- added patch 4 to refactor trace_iwlwifi_dev_ucode_error() from 17 args to 4
Now any tracepoint with >12 args will have build error
v1->v2:
- simplified api by combing bpf_raw_tp_open(name) + bpf_attach(prog_fd) into
bpf_raw_tp_open(name, prog_fd) as suggested by Daniel.
That simplifies bpf_detach as well which is now simple close() of fd.
- fixed memory leak in error path which was spotted by Daniel.
- fixed bpf_get_stackid(), bpf_perf_event_output() called from raw tracepoints
- added more tests
- fixed allyesconfig build caught by buildbot
v1:
This patch set is a different way to address the pressing need to access
task_struct pointers in sched tracepoints from bpf programs.
The first approach simply added these pointers to sched tracepoints:
https://lkml.org/lkml/2017/12/14/753
which Peter nacked.
Few options were discussed and eventually the discussion converged on
doing bpf specific tracepoint_probe_register() probe functions.
Details here:
https://lkml.org/lkml/2017/12/20/929
Patch 1 is kernel wide cleanup of pass-struct-by-value into
pass-struct-by-reference into tracepoints.
Patches 2 and 3 are minor cleanups to address allyesconfig build
Patch 4 refactor trace_iwlwifi_dev_ucode_error from 17 to 4 args
Patch 5 introduces COUNT_ARGS macro
Patch 6 introduces BPF_RAW_TRACEPOINT api.
the auto-cleanup and multiple concurrent users are must have
features of tracing api. For bpf raw tracepoints it looks like:
// load bpf prog with BPF_PROG_TYPE_RAW_TRACEPOINT type
prog_fd = bpf_prog_load(...);
// receive anon_inode fd for given bpf_raw_tracepoint
// and attach bpf program to it
raw_tp_fd = bpf_raw_tracepoint_open("xdp_exception", prog_fd);
Ctrl-C of tracing daemon or cmdline tool will automatically
detach bpf program, unload it and unregister tracepoint probe.
More details in patch 6.
Patch 7 - trivial support in libbpf
Patches 8, 9 - user space tests
samples/bpf/test_overhead performance on 1 cpu:
tracepoint base kprobe+bpf tracepoint+bpf raw_tracepoint+bpf
task_rename 1.1M 769K 947K 1.0M
urandom_read 789K 697K 750K 755K
====================
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/vmlinux.lds.h | 10 | ||||
-rw-r--r-- | include/linux/bpf_types.h | 1 | ||||
-rw-r--r-- | include/linux/kernel.h | 7 | ||||
-rw-r--r-- | include/linux/trace_events.h | 42 | ||||
-rw-r--r-- | include/linux/tracepoint-defs.h | 6 | ||||
-rw-r--r-- | include/trace/bpf_probe.h | 92 | ||||
-rw-r--r-- | include/trace/define_trace.h | 1 | ||||
-rw-r--r-- | include/trace/events/f2fs.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 11 |
9 files changed, 171 insertions, 1 deletions
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 1ab0e520d6fc..8add3493a202 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -178,6 +178,15 @@ #define TRACE_SYSCALLS() #endif +#ifdef CONFIG_BPF_EVENTS +#define BPF_RAW_TP() STRUCT_ALIGN(); \ + VMLINUX_SYMBOL(__start__bpf_raw_tp) = .; \ + KEEP(*(__bpf_raw_tp_map)) \ + VMLINUX_SYMBOL(__stop__bpf_raw_tp) = .; +#else +#define BPF_RAW_TP() +#endif + #ifdef CONFIG_SERIAL_EARLYCON #define EARLYCON_TABLE() STRUCT_ALIGN(); \ VMLINUX_SYMBOL(__earlycon_table) = .; \ @@ -249,6 +258,7 @@ LIKELY_PROFILE() \ BRANCH_PROFILE() \ TRACE_PRINTKS() \ + BPF_RAW_TP() \ TRACEPOINT_STR() /* diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 5e2e8a49fb21..6d7243bfb0ff 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -19,6 +19,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg) BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) +BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) #endif #ifdef CONFIG_CGROUP_BPF BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3fd291503576..293fa0677fba 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -919,6 +919,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) +/* This counts to 12. Any more, it will return 13th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + /** * container_of - cast a member of a structure out to the containing structure * @ptr: the pointer to the member. diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 8a1442c4e513..b0357cd198b0 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -468,6 +468,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog); void perf_event_detach_bpf_prog(struct perf_event *event); int perf_event_query_prog_array(struct perf_event *event, void __user *info); +int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog); +int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog); +struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name); #else static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { @@ -487,6 +490,18 @@ perf_event_query_prog_array(struct perf_event *event, void __user *info) { return -EOPNOTSUPP; } +static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *p) +{ + return -EOPNOTSUPP; +} +static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *p) +{ + return -EOPNOTSUPP; +} +static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name) +{ + return NULL; +} #endif enum { @@ -546,6 +561,33 @@ extern void ftrace_profile_free_filter(struct perf_event *event); void perf_trace_buf_update(void *record, u16 type); void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp); +void bpf_trace_run1(struct bpf_prog *prog, u64 arg1); +void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2); +void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3); +void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4); +void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5); +void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6); +void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7); +void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8); +void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8, u64 arg9); +void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8, u64 arg9, u64 arg10); +void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8, u64 arg9, u64 arg10, u64 arg11); +void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2, + u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7, + u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12); void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, struct trace_event_call *call, u64 count, struct pt_regs *regs, struct hlist_head *head, diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h index 64ed7064f1fa..22c5a46e9693 100644 --- a/include/linux/tracepoint-defs.h +++ b/include/linux/tracepoint-defs.h @@ -35,4 +35,10 @@ struct tracepoint { struct tracepoint_func __rcu *funcs; }; +struct bpf_raw_event_map { + struct tracepoint *tp; + void *bpf_func; + u32 num_args; +} __aligned(32); + #endif diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h new file mode 100644 index 000000000000..505dae0bed80 --- /dev/null +++ b/include/trace/bpf_probe.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#undef TRACE_SYSTEM_VAR + +#ifdef CONFIG_BPF_EVENTS + +#undef __entry +#define __entry entry + +#undef __get_dynamic_array +#define __get_dynamic_array(field) \ + ((void *)__entry + (__entry->__data_loc_##field & 0xffff)) + +#undef __get_dynamic_array_len +#define __get_dynamic_array_len(field) \ + ((__entry->__data_loc_##field >> 16) & 0xffff) + +#undef __get_str +#define __get_str(field) ((char *)__get_dynamic_array(field)) + +#undef __get_bitmask +#define __get_bitmask(field) (char *)__get_dynamic_array(field) + +#undef __perf_count +#define __perf_count(c) (c) + +#undef __perf_task +#define __perf_task(t) (t) + +/* cast any integer, pointer, or small struct to u64 */ +#define UINTTYPE(size) \ + __typeof__(__builtin_choose_expr(size == 1, (u8)1, \ + __builtin_choose_expr(size == 2, (u16)2, \ + __builtin_choose_expr(size == 4, (u32)3, \ + __builtin_choose_expr(size == 8, (u64)4, \ + (void)5))))) +#define __CAST_TO_U64(x) ({ \ + typeof(x) __src = (x); \ + UINTTYPE(sizeof(x)) __dst; \ + memcpy(&__dst, &__src, sizeof(__dst)); \ + (u64)__dst; }) + +#define __CAST1(a,...) __CAST_TO_U64(a) +#define __CAST2(a,...) __CAST_TO_U64(a), __CAST1(__VA_ARGS__) +#define __CAST3(a,...) __CAST_TO_U64(a), __CAST2(__VA_ARGS__) +#define __CAST4(a,...) __CAST_TO_U64(a), __CAST3(__VA_ARGS__) +#define __CAST5(a,...) __CAST_TO_U64(a), __CAST4(__VA_ARGS__) +#define __CAST6(a,...) __CAST_TO_U64(a), __CAST5(__VA_ARGS__) +#define __CAST7(a,...) __CAST_TO_U64(a), __CAST6(__VA_ARGS__) +#define __CAST8(a,...) __CAST_TO_U64(a), __CAST7(__VA_ARGS__) +#define __CAST9(a,...) __CAST_TO_U64(a), __CAST8(__VA_ARGS__) +#define __CAST10(a,...) __CAST_TO_U64(a), __CAST9(__VA_ARGS__) +#define __CAST11(a,...) __CAST_TO_U64(a), __CAST10(__VA_ARGS__) +#define __CAST12(a,...) __CAST_TO_U64(a), __CAST11(__VA_ARGS__) +/* tracepoints with more than 12 arguments will hit build error */ +#define CAST_TO_U64(...) CONCATENATE(__CAST, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__) + +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +static notrace void \ +__bpf_trace_##call(void *__data, proto) \ +{ \ + struct bpf_prog *prog = __data; \ + CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(prog, CAST_TO_U64(args)); \ +} + +/* + * This part is compiled out, it is only here as a build time check + * to make sure that if the tracepoint handling changes, the + * bpf probe will fail to compile unless it too is updated. + */ +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ +static inline void bpf_test_probe_##call(void) \ +{ \ + check_trace_callback_type_##call(__bpf_trace_##template); \ +} \ +static struct bpf_raw_event_map __used \ + __attribute__((section("__bpf_raw_tp_map"))) \ +__bpf_trace_tp_map_##call = { \ + .tp = &__tracepoint_##call, \ + .bpf_func = (void *)__bpf_trace_##template, \ + .num_args = COUNT_ARGS(args), \ +}; + + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#endif /* CONFIG_BPF_EVENTS */ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index d9e3d4aa3f6e..cb30c5532144 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -95,6 +95,7 @@ #ifdef TRACEPOINTS_ENABLED #include <trace/trace_events.h> #include <trace/perf.h> +#include <trace/bpf_probe.h> #endif #undef TRACE_EVENT diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 06c87f9f720c..795698925d20 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -491,7 +491,7 @@ DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node, TRACE_EVENT(f2fs_truncate_partial_nodes, - TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err), + TP_PROTO(struct inode *inode, nid_t *nid, int depth, int err), TP_ARGS(inode, nid, depth, err), diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 18b7c510c511..1878201c2d77 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -94,6 +94,7 @@ enum bpf_cmd { BPF_MAP_GET_FD_BY_ID, BPF_OBJ_GET_INFO_BY_FD, BPF_PROG_QUERY, + BPF_RAW_TRACEPOINT_OPEN, }; enum bpf_map_type { @@ -134,6 +135,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_SKB, BPF_PROG_TYPE_CGROUP_DEVICE, BPF_PROG_TYPE_SK_MSG, + BPF_PROG_TYPE_RAW_TRACEPOINT, }; enum bpf_attach_type { @@ -344,6 +346,11 @@ union bpf_attr { __aligned_u64 prog_ids; __u32 prog_cnt; } query; + + struct { + __u64 name; + __u32 prog_fd; + } raw_tracepoint; } __attribute__((aligned(8))); /* BPF helper function descriptions: @@ -1152,4 +1159,8 @@ struct bpf_cgroup_dev_ctx { __u32 minor; }; +struct bpf_raw_tracepoint_args { + __u64 args[0]; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ |