diff options
author | Alexei Starovoitov <ast@plumgrid.com> | 2015-05-19 16:59:05 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-05-21 17:07:59 -0400 |
commit | 5bacd7805ab4f07a69c7ef4b1d45ce553d2b1c3a (patch) | |
tree | 6e2b12a280e4431e2a5c3d971a8d3ebad4c0b5bc /samples | |
parent | b52f00e6a7154308a08d0a2edab621f277801a2c (diff) | |
download | lwn-5bacd7805ab4f07a69c7ef4b1d45ce553d2b1c3a.tar.gz lwn-5bacd7805ab4f07a69c7ef4b1d45ce553d2b1c3a.zip |
samples/bpf: bpf_tail_call example for tracing
kprobe example that demonstrates how future seccomp programs may look like.
It attaches to seccomp_phase1() function and tail-calls other BPF programs
depending on syscall number.
Existing optimized classic BPF seccomp programs generated by Chrome look like:
if (sd.nr < 121) {
if (sd.nr < 57) {
if (sd.nr < 22) {
if (sd.nr < 7) {
if (sd.nr < 4) {
if (sd.nr < 1) {
check sys_read
} else {
if (sd.nr < 3) {
check sys_write and sys_open
} else {
check sys_close
}
}
} else {
} else {
} else {
} else {
} else {
}
the future seccomp using native eBPF may look like:
bpf_tail_call(&sd, &syscall_jmp_table, sd.nr);
which is simpler, faster and leaves more room for per-syscall checks.
Usage:
$ sudo ./tracex5
<...>-366 [001] d... 4.870033: : read(fd=1, buf=00007f6d5bebf000, size=771)
<...>-369 [003] d... 4.870066: : mmap
<...>-369 [003] d... 4.870077: : syscall=110 (one of get/set uid/pid/gid)
<...>-369 [003] d... 4.870089: : syscall=107 (one of get/set uid/pid/gid)
sh-369 [000] d... 4.891740: : read(fd=0, buf=00000000023d1000, size=512)
sh-369 [000] d... 4.891747: : write(fd=1, buf=00000000023d3000, size=512)
sh-369 [000] d... 4.891747: : read(fd=1, buf=00000000023d3000, size=512)
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples')
-rw-r--r-- | samples/bpf/Makefile | 4 | ||||
-rw-r--r-- | samples/bpf/bpf_helpers.h | 2 | ||||
-rw-r--r-- | samples/bpf/bpf_load.c | 57 | ||||
-rw-r--r-- | samples/bpf/tracex5_kern.c | 75 | ||||
-rw-r--r-- | samples/bpf/tracex5_user.c | 46 |
5 files changed, 172 insertions, 12 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 8fdbd73429dd..ded10d05617e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -10,6 +10,7 @@ hostprogs-y += tracex1 hostprogs-y += tracex2 hostprogs-y += tracex3 hostprogs-y += tracex4 +hostprogs-y += tracex5 test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -20,6 +21,7 @@ tracex1-objs := bpf_load.o libbpf.o tracex1_user.o tracex2-objs := bpf_load.o libbpf.o tracex2_user.o tracex3-objs := bpf_load.o libbpf.o tracex3_user.o tracex4-objs := bpf_load.o libbpf.o tracex4_user.o +tracex5-objs := bpf_load.o libbpf.o tracex5_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -29,6 +31,7 @@ always += tracex1_kern.o always += tracex2_kern.o always += tracex3_kern.o always += tracex4_kern.o +always += tracex5_kern.o always += tcbpf1_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -40,6 +43,7 @@ HOSTLOADLIBES_tracex1 += -lelf HOSTLOADLIBES_tracex2 += -lelf HOSTLOADLIBES_tracex3 += -lelf HOSTLOADLIBES_tracex4 += -lelf -lrt +HOSTLOADLIBES_tracex5 += -lelf # point this to your LLVM backend with bpf support LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index f960b5fb3ed8..699ed8dbdd64 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -21,6 +21,8 @@ static unsigned long long (*bpf_ktime_get_ns)(void) = (void *) BPF_FUNC_ktime_get_ns; static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = (void *) BPF_FUNC_trace_printk; +static void (*bpf_tail_call)(void *ctx, void *map, int index) = + (void *) BPF_FUNC_tail_call; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 38dac5a53b51..da86a8e0a95a 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -16,6 +16,7 @@ #include <sys/ioctl.h> #include <sys/mman.h> #include <poll.h> +#include <ctype.h> #include "libbpf.h" #include "bpf_helpers.h" #include "bpf_load.h" @@ -29,6 +30,19 @@ int map_fd[MAX_MAPS]; int prog_fd[MAX_PROGS]; int event_fd[MAX_PROGS]; int prog_cnt; +int prog_array_fd = -1; + +static int populate_prog_array(const char *event, int prog_fd) +{ + int ind = atoi(event), err; + + err = bpf_update_elem(prog_array_fd, &ind, &prog_fd, BPF_ANY); + if (err < 0) { + printf("failed to store prog_fd in prog_array\n"); + return -1; + } + return 0; +} static int load_and_attach(const char *event, struct bpf_insn *prog, int size) { @@ -54,12 +68,40 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) return -1; } + fd = bpf_prog_load(prog_type, prog, size, license, kern_version); + if (fd < 0) { + printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); + return -1; + } + + prog_fd[prog_cnt++] = fd; + + if (is_socket) { + event += 6; + if (*event != '/') + return 0; + event++; + if (!isdigit(*event)) { + printf("invalid prog number\n"); + return -1; + } + return populate_prog_array(event, fd); + } + if (is_kprobe || is_kretprobe) { if (is_kprobe) event += 7; else event += 10; + if (*event == 0) { + printf("event name cannot be empty\n"); + return -1; + } + + if (isdigit(*event)) + return populate_prog_array(event, fd); + snprintf(buf, sizeof(buf), "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", is_kprobe ? 'p' : 'r', event, event); @@ -71,18 +113,6 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) } } - fd = bpf_prog_load(prog_type, prog, size, license, kern_version); - - if (fd < 0) { - printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf); - return -1; - } - - prog_fd[prog_cnt++] = fd; - - if (is_socket) - return 0; - strcpy(buf, DEBUGFS); strcat(buf, "events/kprobes/"); strcat(buf, event); @@ -130,6 +160,9 @@ static int load_maps(struct bpf_map_def *maps, int len) maps[i].max_entries); if (map_fd[i] < 0) return 1; + + if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) + prog_array_fd = map_fd[i]; } return 0; } diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c new file mode 100644 index 000000000000..b71fe07a7a7a --- /dev/null +++ b/samples/bpf/tracex5_kern.c @@ -0,0 +1,75 @@ +/* Copyright (c) 2015 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/ptrace.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include <uapi/linux/seccomp.h> +#include "bpf_helpers.h" + +#define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F + +struct bpf_map_def SEC("maps") progs = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 1024, +}; + +SEC("kprobe/seccomp_phase1") +int bpf_prog1(struct pt_regs *ctx) +{ + struct seccomp_data sd = {}; + + bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + + /* dispatch into next BPF program depending on syscall number */ + bpf_tail_call(ctx, &progs, sd.nr); + + /* fall through -> unknown syscall */ + if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) { + char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n"; + bpf_trace_printk(fmt, sizeof(fmt), sd.nr); + } + return 0; +} + +/* we jump here when syscall number == __NR_write */ +PROG(__NR_write)(struct pt_regs *ctx) +{ + struct seccomp_data sd = {}; + + bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + if (sd.args[2] == 512) { + char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; + bpf_trace_printk(fmt, sizeof(fmt), + sd.args[0], sd.args[1], sd.args[2]); + } + return 0; +} + +PROG(__NR_read)(struct pt_regs *ctx) +{ + struct seccomp_data sd = {}; + + bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di); + if (sd.args[2] > 128 && sd.args[2] <= 1024) { + char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; + bpf_trace_printk(fmt, sizeof(fmt), + sd.args[0], sd.args[1], sd.args[2]); + } + return 0; +} + +PROG(__NR_mmap)(struct pt_regs *ctx) +{ + char fmt[] = "mmap\n"; + bpf_trace_printk(fmt, sizeof(fmt)); + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c new file mode 100644 index 000000000000..a04dd3cd4358 --- /dev/null +++ b/samples/bpf/tracex5_user.c @@ -0,0 +1,46 @@ +#include <stdio.h> +#include <linux/bpf.h> +#include <unistd.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <sys/prctl.h> +#include "libbpf.h" +#include "bpf_load.h" + +/* install fake seccomp program to enable seccomp code path inside the kernel, + * so that our kprobe attached to seccomp_phase1() can be triggered + */ +static void install_accept_all_seccomp(void) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), + .filter = filter, + }; + if (prctl(PR_SET_SECCOMP, 2, &prog)) + perror("prctl"); +} + +int main(int ac, char **argv) +{ + FILE *f; + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + install_accept_all_seccomp(); + + f = popen("dd if=/dev/zero of=/dev/null count=5", "r"); + (void) f; + + read_trace_pipe(); + + return 0; +} |