summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-03-28 22:55:20 +0200
committerDaniel Borkmann <daniel@iogearbox.net>2018-03-28 22:55:21 +0200
commitf6ef56589374670b7c1939720dfa00212bd80a5b (patch)
treef8f5e66c8fba220b34783b0d38518192bc53bf39 /include
parent6f5c39fa5cd4a78c5432021e981aa8f79437a32c (diff)
parent3bbe0869884ceebffd59d5519c1d560207c6e116 (diff)
downloadlwn-f6ef56589374670b7c1939720dfa00212bd80a5b.tar.gz
lwn-f6ef56589374670b7c1939720dfa00212bd80a5b.zip
Merge branch 'bpf-raw-tracepoints'
Alexei Starovoitov says: ==================== v7->v8: - moved 'u32 num_args' from 'struct tracepoint' into 'struct bpf_raw_event_map' that increases memory overhead, but can be optimized/compressed later. Now it's zero changes in tracepoint.[ch] v6->v7: - adopted Steven's bpf_raw_tp_map section approach to find tracepoint and corresponding bpf probe function instead of kallsyms approach. dropped kernel_tracepoint_find_by_name() patch v5->v6: - avoid changing semantics of for_each_kernel_tracepoint() function, instead introduce kernel_tracepoint_find_by_name() helper v4->v5: - adopted Daniel's fancy REPEAT macro in bpf_trace.c in patch 6 v3->v4: - adopted Linus's CAST_TO_U64 macro to cast any integer, pointer, or small struct to u64. That nicely reduced the size of patch 1 v2->v3: - with Linus's suggestion introduced generic COUNT_ARGS and CONCATENATE macros (or rather moved them from apparmor) that cleaned up patch 6 - added patch 4 to refactor trace_iwlwifi_dev_ucode_error() from 17 args to 4 Now any tracepoint with >12 args will have build error v1->v2: - simplified api by combing bpf_raw_tp_open(name) + bpf_attach(prog_fd) into bpf_raw_tp_open(name, prog_fd) as suggested by Daniel. That simplifies bpf_detach as well which is now simple close() of fd. - fixed memory leak in error path which was spotted by Daniel. - fixed bpf_get_stackid(), bpf_perf_event_output() called from raw tracepoints - added more tests - fixed allyesconfig build caught by buildbot v1: This patch set is a different way to address the pressing need to access task_struct pointers in sched tracepoints from bpf programs. The first approach simply added these pointers to sched tracepoints: https://lkml.org/lkml/2017/12/14/753 which Peter nacked. Few options were discussed and eventually the discussion converged on doing bpf specific tracepoint_probe_register() probe functions. Details here: https://lkml.org/lkml/2017/12/20/929 Patch 1 is kernel wide cleanup of pass-struct-by-value into pass-struct-by-reference into tracepoints. Patches 2 and 3 are minor cleanups to address allyesconfig build Patch 4 refactor trace_iwlwifi_dev_ucode_error from 17 to 4 args Patch 5 introduces COUNT_ARGS macro Patch 6 introduces BPF_RAW_TRACEPOINT api. the auto-cleanup and multiple concurrent users are must have features of tracing api. For bpf raw tracepoints it looks like: // load bpf prog with BPF_PROG_TYPE_RAW_TRACEPOINT type prog_fd = bpf_prog_load(...); // receive anon_inode fd for given bpf_raw_tracepoint // and attach bpf program to it raw_tp_fd = bpf_raw_tracepoint_open("xdp_exception", prog_fd); Ctrl-C of tracing daemon or cmdline tool will automatically detach bpf program, unload it and unregister tracepoint probe. More details in patch 6. Patch 7 - trivial support in libbpf Patches 8, 9 - user space tests samples/bpf/test_overhead performance on 1 cpu: tracepoint base kprobe+bpf tracepoint+bpf raw_tracepoint+bpf task_rename 1.1M 769K 947K 1.0M urandom_read 789K 697K 750K 755K ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/vmlinux.lds.h10
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/kernel.h7
-rw-r--r--include/linux/trace_events.h42
-rw-r--r--include/linux/tracepoint-defs.h6
-rw-r--r--include/trace/bpf_probe.h92
-rw-r--r--include/trace/define_trace.h1
-rw-r--r--include/trace/events/f2fs.h2
-rw-r--r--include/uapi/linux/bpf.h11
9 files changed, 171 insertions, 1 deletions
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 1ab0e520d6fc..8add3493a202 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -178,6 +178,15 @@
#define TRACE_SYSCALLS()
#endif
+#ifdef CONFIG_BPF_EVENTS
+#define BPF_RAW_TP() STRUCT_ALIGN(); \
+ VMLINUX_SYMBOL(__start__bpf_raw_tp) = .; \
+ KEEP(*(__bpf_raw_tp_map)) \
+ VMLINUX_SYMBOL(__stop__bpf_raw_tp) = .;
+#else
+#define BPF_RAW_TP()
+#endif
+
#ifdef CONFIG_SERIAL_EARLYCON
#define EARLYCON_TABLE() STRUCT_ALIGN(); \
VMLINUX_SYMBOL(__earlycon_table) = .; \
@@ -249,6 +258,7 @@
LIKELY_PROFILE() \
BRANCH_PROFILE() \
TRACE_PRINTKS() \
+ BPF_RAW_TP() \
TRACEPOINT_STR()
/*
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5e2e8a49fb21..6d7243bfb0ff 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -19,6 +19,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
+BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fd291503576..293fa0677fba 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -919,6 +919,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
#define swap(a, b) \
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+/* This counts to 12. Any more, it will return 13th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+#define __CONCAT(a, b) a ## b
+#define CONCATENATE(a, b) __CONCAT(a, b)
+
/**
* container_of - cast a member of a structure out to the containing structure
* @ptr: the pointer to the member.
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 8a1442c4e513..b0357cd198b0 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -468,6 +468,9 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx);
int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog);
void perf_event_detach_bpf_prog(struct perf_event *event);
int perf_event_query_prog_array(struct perf_event *event, void __user *info);
+int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
+int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
+struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
#else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
@@ -487,6 +490,18 @@ perf_event_query_prog_array(struct perf_event *event, void __user *info)
{
return -EOPNOTSUPP;
}
+static inline int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *p)
+{
+ return -EOPNOTSUPP;
+}
+static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *p)
+{
+ return -EOPNOTSUPP;
+}
+static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
+{
+ return NULL;
+}
#endif
enum {
@@ -546,6 +561,33 @@ extern void ftrace_profile_free_filter(struct perf_event *event);
void perf_trace_buf_update(void *record, u16 type);
void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp);
+void bpf_trace_run1(struct bpf_prog *prog, u64 arg1);
+void bpf_trace_run2(struct bpf_prog *prog, u64 arg1, u64 arg2);
+void bpf_trace_run3(struct bpf_prog *prog, u64 arg1, u64 arg2,
+ u64 arg3);
+void bpf_trace_run4(struct bpf_prog *prog, u64 arg1, u64 arg2,
+ u64 arg3, u64 arg4);
+void bpf_trace_run5(struct bpf_prog *prog, u64 arg1, u64 arg2,
+ u64 arg3, u64 arg4, u64 arg5);
+void bpf_trace_run6(struct bpf_prog *prog, u64 arg1, u64 arg2,
+ u64 arg3, u64 arg4, u64 arg5, u64 arg6);
+void bpf_trace_run7(struct bpf_prog *prog, u64 arg1, u64 arg2,
+ u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7);
+void bpf_trace_run8(struct bpf_prog *prog, u64 arg1, u64 arg2,
+ u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+ u64 arg8);
+void bpf_trace_run9(struct bpf_prog *prog, u64 arg1, u64 arg2,
+ u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+ u64 arg8, u64 arg9);
+void bpf_trace_run10(struct bpf_prog *prog, u64 arg1, u64 arg2,
+ u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+ u64 arg8, u64 arg9, u64 arg10);
+void bpf_trace_run11(struct bpf_prog *prog, u64 arg1, u64 arg2,
+ u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+ u64 arg8, u64 arg9, u64 arg10, u64 arg11);
+void bpf_trace_run12(struct bpf_prog *prog, u64 arg1, u64 arg2,
+ u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7,
+ u64 arg8, u64 arg9, u64 arg10, u64 arg11, u64 arg12);
void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx,
struct trace_event_call *call, u64 count,
struct pt_regs *regs, struct hlist_head *head,
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 64ed7064f1fa..22c5a46e9693 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -35,4 +35,10 @@ struct tracepoint {
struct tracepoint_func __rcu *funcs;
};
+struct bpf_raw_event_map {
+ struct tracepoint *tp;
+ void *bpf_func;
+ u32 num_args;
+} __aligned(32);
+
#endif
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
new file mode 100644
index 000000000000..505dae0bed80
--- /dev/null
+++ b/include/trace/bpf_probe.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#undef TRACE_SYSTEM_VAR
+
+#ifdef CONFIG_BPF_EVENTS
+
+#undef __entry
+#define __entry entry
+
+#undef __get_dynamic_array
+#define __get_dynamic_array(field) \
+ ((void *)__entry + (__entry->__data_loc_##field & 0xffff))
+
+#undef __get_dynamic_array_len
+#define __get_dynamic_array_len(field) \
+ ((__entry->__data_loc_##field >> 16) & 0xffff)
+
+#undef __get_str
+#define __get_str(field) ((char *)__get_dynamic_array(field))
+
+#undef __get_bitmask
+#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+
+#undef __perf_count
+#define __perf_count(c) (c)
+
+#undef __perf_task
+#define __perf_task(t) (t)
+
+/* cast any integer, pointer, or small struct to u64 */
+#define UINTTYPE(size) \
+ __typeof__(__builtin_choose_expr(size == 1, (u8)1, \
+ __builtin_choose_expr(size == 2, (u16)2, \
+ __builtin_choose_expr(size == 4, (u32)3, \
+ __builtin_choose_expr(size == 8, (u64)4, \
+ (void)5)))))
+#define __CAST_TO_U64(x) ({ \
+ typeof(x) __src = (x); \
+ UINTTYPE(sizeof(x)) __dst; \
+ memcpy(&__dst, &__src, sizeof(__dst)); \
+ (u64)__dst; })
+
+#define __CAST1(a,...) __CAST_TO_U64(a)
+#define __CAST2(a,...) __CAST_TO_U64(a), __CAST1(__VA_ARGS__)
+#define __CAST3(a,...) __CAST_TO_U64(a), __CAST2(__VA_ARGS__)
+#define __CAST4(a,...) __CAST_TO_U64(a), __CAST3(__VA_ARGS__)
+#define __CAST5(a,...) __CAST_TO_U64(a), __CAST4(__VA_ARGS__)
+#define __CAST6(a,...) __CAST_TO_U64(a), __CAST5(__VA_ARGS__)
+#define __CAST7(a,...) __CAST_TO_U64(a), __CAST6(__VA_ARGS__)
+#define __CAST8(a,...) __CAST_TO_U64(a), __CAST7(__VA_ARGS__)
+#define __CAST9(a,...) __CAST_TO_U64(a), __CAST8(__VA_ARGS__)
+#define __CAST10(a,...) __CAST_TO_U64(a), __CAST9(__VA_ARGS__)
+#define __CAST11(a,...) __CAST_TO_U64(a), __CAST10(__VA_ARGS__)
+#define __CAST12(a,...) __CAST_TO_U64(a), __CAST11(__VA_ARGS__)
+/* tracepoints with more than 12 arguments will hit build error */
+#define CAST_TO_U64(...) CONCATENATE(__CAST, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__)
+
+#undef DECLARE_EVENT_CLASS
+#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+static notrace void \
+__bpf_trace_##call(void *__data, proto) \
+{ \
+ struct bpf_prog *prog = __data; \
+ CONCATENATE(bpf_trace_run, COUNT_ARGS(args))(prog, CAST_TO_U64(args)); \
+}
+
+/*
+ * This part is compiled out, it is only here as a build time check
+ * to make sure that if the tracepoint handling changes, the
+ * bpf probe will fail to compile unless it too is updated.
+ */
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args) \
+static inline void bpf_test_probe_##call(void) \
+{ \
+ check_trace_callback_type_##call(__bpf_trace_##template); \
+} \
+static struct bpf_raw_event_map __used \
+ __attribute__((section("__bpf_raw_tp_map"))) \
+__bpf_trace_tp_map_##call = { \
+ .tp = &__tracepoint_##call, \
+ .bpf_func = (void *)__bpf_trace_##template, \
+ .num_args = COUNT_ARGS(args), \
+};
+
+
+#undef DEFINE_EVENT_PRINT
+#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
+ DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+#endif /* CONFIG_BPF_EVENTS */
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index d9e3d4aa3f6e..cb30c5532144 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -95,6 +95,7 @@
#ifdef TRACEPOINTS_ENABLED
#include <trace/trace_events.h>
#include <trace/perf.h>
+#include <trace/bpf_probe.h>
#endif
#undef TRACE_EVENT
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 06c87f9f720c..795698925d20 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -491,7 +491,7 @@ DEFINE_EVENT(f2fs__truncate_node, f2fs_truncate_node,
TRACE_EVENT(f2fs_truncate_partial_nodes,
- TP_PROTO(struct inode *inode, nid_t nid[], int depth, int err),
+ TP_PROTO(struct inode *inode, nid_t *nid, int depth, int err),
TP_ARGS(inode, nid, depth, err),
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 18b7c510c511..1878201c2d77 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -94,6 +94,7 @@ enum bpf_cmd {
BPF_MAP_GET_FD_BY_ID,
BPF_OBJ_GET_INFO_BY_FD,
BPF_PROG_QUERY,
+ BPF_RAW_TRACEPOINT_OPEN,
};
enum bpf_map_type {
@@ -134,6 +135,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_CGROUP_DEVICE,
BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_RAW_TRACEPOINT,
};
enum bpf_attach_type {
@@ -344,6 +346,11 @@ union bpf_attr {
__aligned_u64 prog_ids;
__u32 prog_cnt;
} query;
+
+ struct {
+ __u64 name;
+ __u32 prog_fd;
+ } raw_tracepoint;
} __attribute__((aligned(8)));
/* BPF helper function descriptions:
@@ -1152,4 +1159,8 @@ struct bpf_cgroup_dev_ctx {
__u32 minor;
};
+struct bpf_raw_tracepoint_args {
+ __u64 args[0];
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */