perf bpf: Collect perf_evsel in BPF object files

This patch creates a 'struct perf_evsel' for every probe in a BPF object file(s) and fills 'struct evlist' with them. The previously introduced dummy event is now removed. After this patch, the following command: # perf record --event filter.o ls Can trace on each of the probes defined in filter.o. The core of this patch is bpf__foreach_tev(), which calls a callback function for each 'struct probe_trace_event' event for a bpf program with each associated file descriptors. The add_bpf_event() callback creates evsels by calling parse_events_add_tracepoint(). Since bpf-loader.c will not be built if libbpf is turned off, an empty bpf__foreach_tev() is defined in bpf-loader.h to avoid build errors. Committer notes: Before: # /tmp/oldperf record --event /tmp/foo.o -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.198 MB perf.data ] # perf evlist /tmp/foo.o # perf evlist -v /tmp/foo.o: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 I.e. we create just the PERF_TYPE_SOFTWARE (type: 1), PERF_COUNT_SW_DUMMY(config 0x9) event, now, with this patch: # perf record --event /tmp/foo.o -a usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.210 MB perf.data ] # perf evlist -v perf_bpf_probe:fork: type: 2, size: 112, config: 0x6bd, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|PERIOD|RAW, disabled: 1, inherit: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 # We now have a PERF_TYPE_SOFTWARE (type: 1), but the config states 0x6bd, which is how, after setting up the event via the kprobes interface, the 'perf_bpf_probe:fork' event is accessible via the perf_event_open syscall. This is all transient, as soon as the 'perf record' session ends, these probes will go away. To see how it looks like, lets try doing a neverending session, one that expects a control+C to end: # perf record --event /tmp/foo.o -a So, with that in place, we can use 'perf probe' to see what is in place: # perf probe -l perf_bpf_probe:fork (on _do_fork@acme/git/linux/kernel/fork.c) We also can use debugfs: [root@felicio ~]# cat /sys/kernel/debug/tracing/kprobe_events p:perf_bpf_probe/fork _text+638512 Ok, now lets stop and see if we got some forks: [root@felicio linux]# perf record --event /tmp/foo.o -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.325 MB perf.data (111 samples) ] [root@felicio linux]# perf script sshd 1271 [003] 81797.507678: perf_bpf_probe:fork: (ffffffff8109be30) sshd 18309 [000] 81797.524917: perf_bpf_probe:fork: (ffffffff8109be30) sshd 18309 [001] 81799.381603: perf_bpf_probe:fork: (ffffffff8109be30) sshd 18309 [001] 81799.408635: perf_bpf_probe:fork: (ffffffff8109be30) <SNIP> Sure enough, we have 111 forks :-) Callchains seems to work as well: # perf report --stdio --no-child # To display the perf.data header info, please use --header/--header-only options. # # Total Lost Samples: 0 # # Samples: 562 of event 'perf_bpf_probe:fork' # Event count (approx.): 562 # # Overhead Command Shared Object Symbol # ........ ........ ................ ............ # 44.66% sh [kernel.vmlinux] [k] _do_fork | ---_do_fork entry_SYSCALL_64_fastpath __libc_fork make_child 26.16% make [kernel.vmlinux] [k] _do_fork <SNIP> # Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David Ahern <dsahern@gmail.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kaixu Xia <xiakaixu@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1444826502-49291-7-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
author: Wang Nan <wangnan0@huawei.com> 2015-10-14 12:41:17 +0000
committer: Arnaldo Carvalho de Melo <acme@redhat.com> 2015-10-28 13:11:59 -0300
commit: 4edf30e39e6cff32390eaff6a1508969b3cd967b (patch)
tree: 1d5a60b14b9f808b3c0cece5761002ad32f0e274 /tools/perf
parent: 1e5e3ee8ff3877db6943032b54a6ac21c095affd (diff)
download: lwn-4edf30e39e6cff32390eaff6a1508969b3cd967b.tar.gz
lwn-4edf30e39e6cff32390eaff6a1508969b3cd967b.zip
3 files changed, 99 insertions, 7 deletions
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 727955858d00..aa784a498c48 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -255,6 +255,46 @@ int bpf__load(struct bpf_object *obj)
 	return 0;
 }
 
+int bpf__foreach_tev(struct bpf_object *obj,
+		     bpf_prog_iter_callback_t func,
+		     void *arg)
+{
+	struct bpf_program *prog;
+	int err;
+
+	bpf_object__for_each_program(prog, obj) {
+		struct probe_trace_event *tev;
+		struct perf_probe_event *pev;
+		struct bpf_prog_priv *priv;
+		int i, fd;
+
+		err = bpf_program__get_private(prog,
+				(void **)&priv);
+		if (err || !priv) {
+			pr_debug("bpf: failed to get private field\n");
+			return -EINVAL;
+		}
+
+		pev = &priv->pev;
+		for (i = 0; i < pev->ntevs; i++) {
+			tev = &pev->tevs[i];
+
+			fd = bpf_program__fd(prog);
+			if (fd < 0) {
+				pr_debug("bpf: failed to get file descriptor\n");
+				return fd;
+			}
+
+			err = (*func)(tev, fd, arg);
+			if (err) {
+				pr_debug("bpf: call back failed, stop iterate\n");
+				return err;
+			}
+		}
+	}
+	return 0;
+}
+
 #define bpf__strerror_head(err, buf, size) \
 	char sbuf[STRERR_BUFSIZE], *emsg;\
 	if (!size)\
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index b091ceb19c48..a8f25ee06fc5 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -8,11 +8,15 @@
 #include <linux/compiler.h>
 #include <linux/err.h>
 #include <string.h>
+#include "probe-event.h"
 #include "debug.h"
 
 struct bpf_object;
 #define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
 
+typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev,
+					int fd, void *arg);
+
 #ifdef HAVE_LIBBPF_SUPPORT
 struct bpf_object *bpf__prepare_load(const char *filename);
 
@@ -26,6 +30,8 @@ int bpf__strerror_probe(struct bpf_object *obj, int err,
 int bpf__load(struct bpf_object *obj);
 int bpf__strerror_load(struct bpf_object *obj, int err,
 		       char *buf, size_t size);
+int bpf__foreach_tev(struct bpf_object *obj,
+		     bpf_prog_iter_callback_t func, void *arg);
 #else
 static inline struct bpf_object *
 bpf__prepare_load(const char *filename __maybe_unused)
@@ -41,6 +47,14 @@ static inline int bpf__unprobe(struct bpf_object *obj __maybe_unused) { return 0
 static inline int bpf__load(struct bpf_object *obj __maybe_unused) { return 0; }
 
 static inline int
+bpf__foreach_tev(struct bpf_object *obj __maybe_unused,
+		 bpf_prog_iter_callback_t func __maybe_unused,
+		 void *arg __maybe_unused)
+{
+	return 0;
+}
+
+static inline int
 __bpf_strerror(char *buf, size_t size)
 {
 	if (!size)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index c3aabeb63e88..d97b03710331 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -530,12 +530,49 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
 	return ret;
 }
 
+struct __add_bpf_event_param {
+	struct parse_events_evlist *data;
+	struct list_head *list;
+};
+
+static int add_bpf_event(struct probe_trace_event *tev, int fd,
+			 void *_param)
+{
+	LIST_HEAD(new_evsels);
+	struct __add_bpf_event_param *param = _param;
+	struct parse_events_evlist *evlist = param->data;
+	struct list_head *list = param->list;
+	int err;
+
+	pr_debug("add bpf event %s:%s and attach bpf program %d\n",
+		 tev->group, tev->event, fd);
+
+	err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, tev->group,
+					  tev->event, evlist->error, NULL);
+	if (err) {
+		struct perf_evsel *evsel, *tmp;
+
+		pr_debug("Failed to add BPF event %s:%s\n",
+			 tev->group, tev->event);
+		list_for_each_entry_safe(evsel, tmp, &new_evsels, node) {
+			list_del(&evsel->node);
+			perf_evsel__delete(evsel);
+		}
+		return err;
+	}
+	pr_debug("adding %s:%s\n", tev->group, tev->event);
+
+	list_splice(&new_evsels, list);
+	return 0;
+}
+
 int parse_events_load_bpf_obj(struct parse_events_evlist *data,
 			      struct list_head *list,
 			      struct bpf_object *obj)
 {
 	int err;
 	char errbuf[BUFSIZ];
+	struct __add_bpf_event_param param = {data, list};
 	static bool registered_unprobe_atexit = false;
 
 	if (IS_ERR(obj) || !obj) {
@@ -567,13 +604,14 @@ int parse_events_load_bpf_obj(struct parse_events_evlist *data,
 		goto errout;
 	}
 
-	/*
-	 * Temporary add a dummy event here so we can check whether
-	 * basic bpf loader works. Following patches will replace
-	 * dummy event by useful evsels.
-	 */
-	return parse_events_add_numeric(data, list, PERF_TYPE_SOFTWARE,
-					PERF_COUNT_SW_DUMMY, NULL);
+	err = bpf__foreach_tev(obj, add_bpf_event, &param);
+	if (err) {
+		snprintf(errbuf, sizeof(errbuf),
+			 "Attach events in BPF object failed");
+		goto errout;
+	}
+
+	return 0;
 errout:
 	data->error->help = strdup("(add -v to see detail)");
 	data->error->str = strdup(errbuf);
author	Wang Nan <wangnan0@huawei.com>	2015-10-14 12:41:17 +0000
committer	Arnaldo Carvalho de Melo <acme@redhat.com>	2015-10-28 13:11:59 -0300
commit	4edf30e39e6cff32390eaff6a1508969b3cd967b (patch)
tree	1d5a60b14b9f808b3c0cece5761002ad32f0e274 /tools/perf
parent	1e5e3ee8ff3877db6943032b54a6ac21c095affd (diff)
download	lwn-4edf30e39e6cff32390eaff6a1508969b3cd967b.tar.gz lwn-4edf30e39e6cff32390eaff6a1508969b3cd967b.zip