diff options
Diffstat (limited to 'samples')
-rw-r--r-- | samples/bpf/Makefile | 8 | ||||
-rw-r--r-- | samples/bpf/bpf_helpers.h | 20 | ||||
-rw-r--r-- | samples/bpf/bpf_load.c | 267 | ||||
-rw-r--r-- | samples/bpf/bpf_load.h | 29 | ||||
-rw-r--r-- | samples/bpf/cookie_uid_helper_example.c | 321 | ||||
-rw-r--r-- | samples/bpf/libbpf.h | 10 | ||||
-rw-r--r-- | samples/bpf/map_perf_test_kern.c | 104 | ||||
-rw-r--r-- | samples/bpf/map_perf_test_user.c | 255 | ||||
-rwxr-xr-x | samples/bpf/run_cookie_uid_helper_example.sh | 14 | ||||
-rw-r--r-- | samples/bpf/test_lru_dist.c | 4 | ||||
-rw-r--r-- | samples/bpf/test_map_in_map_kern.c | 173 | ||||
-rw-r--r-- | samples/bpf/test_map_in_map_user.c | 116 | ||||
-rw-r--r-- | samples/bpf/tracex2_user.c | 7 | ||||
-rw-r--r-- | samples/bpf/tracex3_user.c | 7 | ||||
-rw-r--r-- | samples/bpf/tracex4_user.c | 8 | ||||
-rw-r--r-- | samples/bpf/xdp1_user.c | 40 | ||||
-rw-r--r-- | samples/bpf/xdp_tx_iptunnel_user.c | 13 | ||||
-rw-r--r-- | samples/livepatch/livepatch-sample.c | 18 | ||||
-rw-r--r-- | samples/statx/test-statx.c | 12 |
19 files changed, 1313 insertions, 113 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 09e9d535bd74..6c7468eb3684 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -34,6 +34,8 @@ hostprogs-y += sampleip hostprogs-y += tc_l2_redirect hostprogs-y += lwt_len_hist hostprogs-y += xdp_tx_iptunnel +hostprogs-y += test_map_in_map +hostprogs-y += per_socket_stats_example # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o @@ -72,6 +74,8 @@ sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o +test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o +per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -105,6 +109,8 @@ always += trace_event_kern.o always += sampleip_kern.o always += lwt_len_hist_kern.o always += xdp_tx_iptunnel_kern.o +always += test_map_in_map_kern.o +always += cookie_uid_helper_example.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -139,6 +145,7 @@ HOSTLOADLIBES_sampleip += -lelf HOSTLOADLIBES_tc_l2_redirect += -l elf HOSTLOADLIBES_lwt_len_hist += -l elf HOSTLOADLIBES_xdp_tx_iptunnel += -lelf +HOSTLOADLIBES_test_map_in_map += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang @@ -182,4 +189,5 @@ $(obj)/%.o: $(src)/%.c -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ + -Wno-unknown-warning-option \ -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index faaffe2e139a..9a9c95f2c9fb 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -80,6 +80,7 @@ struct bpf_map_def { unsigned int value_size; unsigned int max_entries; unsigned int map_flags; + unsigned int inner_map_idx; }; static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = @@ -145,11 +146,30 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) = #define PT_REGS_SP(x) ((x)->sp) #define PT_REGS_IP(x) ((x)->nip) +#elif defined(__sparc__) + +#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) +#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) +#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) +#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) +#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) +#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) +#if defined(__arch64__) +#define PT_REGS_IP(x) ((x)->tpc) +#else +#define PT_REGS_IP(x) ((x)->pc) +#endif + #endif #ifdef __powerpc__ #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) #define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP +#elif defined(__sparc__) +#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) +#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP #else #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index b86ee54da2d1..74456b3eb89a 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -14,6 +14,7 @@ #include <linux/perf_event.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> +#include <linux/types.h> #include <sys/types.h> #include <sys/socket.h> #include <sys/syscall.h> @@ -21,6 +22,7 @@ #include <sys/mman.h> #include <poll.h> #include <ctype.h> +#include <assert.h> #include "libbpf.h" #include "bpf_load.h" #include "perf-sys.h" @@ -37,13 +39,8 @@ int event_fd[MAX_PROGS]; int prog_cnt; int prog_array_fd = -1; -struct bpf_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; -}; +struct bpf_map_data map_data[MAX_MAPS]; +int map_data_count = 0; static int populate_prog_array(const char *event, int prog_fd) { @@ -192,24 +189,45 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) return 0; } -static int load_maps(struct bpf_map_def *maps, int len) +static int load_maps(struct bpf_map_data *maps, int nr_maps, + fixup_map_cb fixup_map) { int i; - for (i = 0; i < len / sizeof(struct bpf_map_def); i++) { + for (i = 0; i < nr_maps; i++) { + if (fixup_map) { + fixup_map(&maps[i], i); + /* Allow userspace to assign map FD prior to creation */ + if (maps[i].fd != -1) { + map_fd[i] = maps[i].fd; + continue; + } + } - map_fd[i] = bpf_create_map(maps[i].type, - maps[i].key_size, - maps[i].value_size, - maps[i].max_entries, - maps[i].map_flags); + if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) { + int inner_map_fd = map_fd[maps[i].def.inner_map_idx]; + + map_fd[i] = bpf_create_map_in_map(maps[i].def.type, + maps[i].def.key_size, + inner_map_fd, + maps[i].def.max_entries, + maps[i].def.map_flags); + } else { + map_fd[i] = bpf_create_map(maps[i].def.type, + maps[i].def.key_size, + maps[i].def.value_size, + maps[i].def.max_entries, + maps[i].def.map_flags); + } if (map_fd[i] < 0) { printf("failed to create a map: %d %s\n", errno, strerror(errno)); return 1; } + maps[i].fd = map_fd[i]; - if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) + if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY) prog_array_fd = map_fd[i]; } return 0; @@ -239,7 +257,8 @@ static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname, } static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, - GElf_Shdr *shdr, struct bpf_insn *insn) + GElf_Shdr *shdr, struct bpf_insn *insn, + struct bpf_map_data *maps, int nr_maps) { int i, nrels; @@ -249,6 +268,8 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, GElf_Sym sym; GElf_Rel rel; unsigned int insn_idx; + bool match = false; + int j, map_idx; gelf_getrel(data, i, &rel); @@ -262,20 +283,157 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols, return 1; } insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; - insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)]; + + /* Match FD relocation against recorded map_data[] offset */ + for (map_idx = 0; map_idx < nr_maps; map_idx++) { + if (maps[map_idx].elf_offset == sym.st_value) { + match = true; + break; + } + } + if (match) { + insn[insn_idx].imm = maps[map_idx].fd; + } else { + printf("invalid relo for insn[%d] no map_data match\n", + insn_idx); + return 1; + } } return 0; } -int load_bpf_file(char *path) +static int cmp_symbols(const void *l, const void *r) { - int fd, i; + const GElf_Sym *lsym = (const GElf_Sym *)l; + const GElf_Sym *rsym = (const GElf_Sym *)r; + + if (lsym->st_value < rsym->st_value) + return -1; + else if (lsym->st_value > rsym->st_value) + return 1; + else + return 0; +} + +static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx, + Elf *elf, Elf_Data *symbols, int strtabidx) +{ + int map_sz_elf, map_sz_copy; + bool validate_zero = false; + Elf_Data *data_maps; + int i, nr_maps; + GElf_Sym *sym; + Elf_Scn *scn; + int copy_sz; + + if (maps_shndx < 0) + return -EINVAL; + if (!symbols) + return -EINVAL; + + /* Get data for maps section via elf index */ + scn = elf_getscn(elf, maps_shndx); + if (scn) + data_maps = elf_getdata(scn, NULL); + if (!scn || !data_maps) { + printf("Failed to get Elf_Data from maps section %d\n", + maps_shndx); + return -EINVAL; + } + + /* For each map get corrosponding symbol table entry */ + sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym)); + for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { + assert(nr_maps < MAX_MAPS+1); + if (!gelf_getsym(symbols, i, &sym[nr_maps])) + continue; + if (sym[nr_maps].st_shndx != maps_shndx) + continue; + /* Only increment iif maps section */ + nr_maps++; + } + + /* Align to map_fd[] order, via sort on offset in sym.st_value */ + qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols); + + /* Keeping compatible with ELF maps section changes + * ------------------------------------------------ + * The program size of struct bpf_map_def is known by loader + * code, but struct stored in ELF file can be different. + * + * Unfortunately sym[i].st_size is zero. To calculate the + * struct size stored in the ELF file, assume all struct have + * the same size, and simply divide with number of map + * symbols. + */ + map_sz_elf = data_maps->d_size / nr_maps; + map_sz_copy = sizeof(struct bpf_map_def); + if (map_sz_elf < map_sz_copy) { + /* + * Backward compat, loading older ELF file with + * smaller struct, keeping remaining bytes zero. + */ + map_sz_copy = map_sz_elf; + } else if (map_sz_elf > map_sz_copy) { + /* + * Forward compat, loading newer ELF file with larger + * struct with unknown features. Assume zero means + * feature not used. Thus, validate rest of struct + * data is zero. + */ + validate_zero = true; + } + + /* Memcpy relevant part of ELF maps data to loader maps */ + for (i = 0; i < nr_maps; i++) { + unsigned char *addr, *end; + struct bpf_map_def *def; + const char *map_name; + size_t offset; + + map_name = elf_strptr(elf, strtabidx, sym[i].st_name); + maps[i].name = strdup(map_name); + if (!maps[i].name) { + printf("strdup(%s): %s(%d)\n", map_name, + strerror(errno), errno); + free(sym); + return -errno; + } + + /* Symbol value is offset into ELF maps section data area */ + offset = sym[i].st_value; + def = (struct bpf_map_def *)(data_maps->d_buf + offset); + maps[i].elf_offset = offset; + memset(&maps[i].def, 0, sizeof(struct bpf_map_def)); + memcpy(&maps[i].def, def, map_sz_copy); + + /* Verify no newer features were requested */ + if (validate_zero) { + addr = (unsigned char*) def + map_sz_copy; + end = (unsigned char*) def + map_sz_elf; + for (; addr < end; addr++) { + if (*addr != 0) { + free(sym); + return -EFBIG; + } + } + } + } + + free(sym); + return nr_maps; +} + +static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) +{ + int fd, i, ret, maps_shndx = -1, strtabidx = -1; Elf *elf; GElf_Ehdr ehdr; GElf_Shdr shdr, shdr_prog; - Elf_Data *data, *data_prog, *symbols = NULL; + Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL; char *shname, *shname_prog; + int nr_maps = 0; /* reset global variables */ kern_version = 0; @@ -323,14 +481,41 @@ int load_bpf_file(char *path) } memcpy(&kern_version, data->d_buf, sizeof(int)); } else if (strcmp(shname, "maps") == 0) { - processed_sec[i] = true; - if (load_maps(data->d_buf, data->d_size)) - return 1; + int j; + + maps_shndx = i; + data_maps = data; + for (j = 0; j < MAX_MAPS; j++) + map_data[j].fd = -1; } else if (shdr.sh_type == SHT_SYMTAB) { + strtabidx = shdr.sh_link; symbols = data; } } + ret = 1; + + if (!symbols) { + printf("missing SHT_SYMTAB section\n"); + goto done; + } + + if (data_maps) { + nr_maps = load_elf_maps_section(map_data, maps_shndx, + elf, symbols, strtabidx); + if (nr_maps < 0) { + printf("Error: Failed loading ELF maps (errno:%d):%s\n", + nr_maps, strerror(-nr_maps)); + ret = 1; + goto done; + } + if (load_maps(map_data, nr_maps, fixup_map)) + goto done; + map_data_count = nr_maps; + + processed_sec[maps_shndx] = true; + } + /* load programs that need map fixup (relocations) */ for (i = 1; i < ehdr.e_shnum; i++) { if (processed_sec[i]) @@ -354,7 +539,8 @@ int load_bpf_file(char *path) processed_sec[shdr.sh_info] = true; processed_sec[i] = true; - if (parse_relo_and_apply(data, symbols, &shdr, insns)) + if (parse_relo_and_apply(data, symbols, &shdr, insns, + map_data, nr_maps)) continue; if (memcmp(shname_prog, "kprobe/", 7) == 0 || @@ -387,8 +573,20 @@ int load_bpf_file(char *path) load_and_attach(shname, data->d_buf, data->d_size); } + ret = 0; +done: close(fd); - return 0; + return ret; +} + +int load_bpf_file(char *path) +{ + return do_load_bpf_file(path, NULL); +} + +int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map) +{ + return do_load_bpf_file(path, fixup_map); } void read_trace_pipe(void) @@ -473,7 +671,7 @@ struct ksym *ksym_search(long key) return &syms[0]; } -int set_link_xdp_fd(int ifindex, int fd) +int set_link_xdp_fd(int ifindex, int fd, __u32 flags) { struct sockaddr_nl sa; int sock, seq = 0, len, ret = -1; @@ -509,15 +707,28 @@ int set_link_xdp_fd(int ifindex, int fd) req.nh.nlmsg_seq = ++seq; req.ifinfo.ifi_family = AF_UNSPEC; req.ifinfo.ifi_index = ifindex; + + /* started nested attribute for XDP */ nla = (struct nlattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len)); nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/; + nla->nla_len = NLA_HDRLEN; - nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN); + /* add XDP fd */ + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); nla_xdp->nla_type = 1/*IFLA_XDP_FD*/; nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); - nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len; + nla->nla_len += nla_xdp->nla_len; + + /* if user passed in any flags, add those too */ + if (flags) { + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); + memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); + nla->nla_len += nla_xdp->nla_len; + } req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index c827827299b3..ca0563d04744 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -6,12 +6,36 @@ #define MAX_MAPS 32 #define MAX_PROGS 32 -extern int map_fd[MAX_MAPS]; +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; + unsigned int inner_map_idx; +}; + +struct bpf_map_data { + int fd; + char *name; + size_t elf_offset; + struct bpf_map_def def; +}; + +typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx); + extern int prog_fd[MAX_PROGS]; extern int event_fd[MAX_PROGS]; extern char bpf_log_buf[BPF_LOG_BUF_SIZE]; extern int prog_cnt; +/* There is a one-to-one mapping between map_fd[] and map_data[]. + * The map_data[] just contains more rich info on the given map. + */ +extern int map_fd[MAX_MAPS]; +extern struct bpf_map_data map_data[MAX_MAPS]; +extern int map_data_count; + /* parses elf file compiled by llvm .c->.o * . parses 'maps' section and creates maps via BPF syscall * . parses 'license' section and passes it to syscall @@ -25,6 +49,7 @@ extern int prog_cnt; * returns zero on success */ int load_bpf_file(char *path); +int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map); void read_trace_pipe(void); struct ksym { @@ -34,5 +59,5 @@ struct ksym { int load_kallsyms(void); struct ksym *ksym_search(long key); -int set_link_xdp_fd(int ifindex, int fd); +int set_link_xdp_fd(int ifindex, int fd, __u32 flags); #endif diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c new file mode 100644 index 000000000000..b08ab4e88929 --- /dev/null +++ b/samples/bpf/cookie_uid_helper_example.c @@ -0,0 +1,321 @@ +/* This test is a demo of using get_socket_uid and get_socket_cookie + * helper function to do per socket based network traffic monitoring. + * It requires iptables version higher then 1.6.1. to load pinned eBPF + * program into the xt_bpf match. + * + * TEST: + * ./run_cookie_uid_helper_example.sh -option + * option: + * -t: do traffic monitoring test, the program will continuously + * print out network traffic happens after program started A sample + * output is shown below: + * + * cookie: 877, uid: 0x3e8, Pakcet Count: 20, Bytes Count: 11058 + * cookie: 132, uid: 0x0, Pakcet Count: 2, Bytes Count: 286 + * cookie: 812, uid: 0x3e8, Pakcet Count: 3, Bytes Count: 1726 + * cookie: 802, uid: 0x3e8, Pakcet Count: 2, Bytes Count: 104 + * cookie: 877, uid: 0x3e8, Pakcet Count: 20, Bytes Count: 11058 + * cookie: 831, uid: 0x3e8, Pakcet Count: 2, Bytes Count: 104 + * cookie: 0, uid: 0x0, Pakcet Count: 6, Bytes Count: 712 + * cookie: 880, uid: 0xfffe, Pakcet Count: 1, Bytes Count: 70 + * + * -s: do getsockopt SO_COOKIE test, the program will set up a pair of + * UDP sockets and send packets between them. And read out the traffic data + * directly from the ebpf map based on the socket cookie. + * + * Clean up: if using shell script, the script file will delete the iptables + * rule and unmount the bpf program when exit. Else the iptables rule need + * to be deleted by hand, see run_cookie_uid_helper_example.sh for detail. + */ + +#define _GNU_SOURCE + +#define offsetof(type, member) __builtin_offsetof(type, member) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <limits.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <net/if.h> +#include <signal.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <bpf/bpf.h> +#include "libbpf.h" + +#define PORT 8888 + +struct stats { + uint32_t uid; + uint64_t packets; + uint64_t bytes; +}; + +static int map_fd, prog_fd; + +static bool test_finish; + +static void maps_create(void) +{ + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(uint32_t), + sizeof(struct stats), 100, 0); + if (map_fd < 0) + error(1, errno, "map create failed!\n"); +} + +static void prog_load(void) +{ + static char log_buf[1 << 16]; + + struct bpf_insn prog[] = { + /* + * Save sk_buff for future usage. value stored in R6 to R10 will + * not be reset after a bpf helper function call. + */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* + * pc1: BPF_FUNC_get_socket_cookie takes one parameter, + * R1: sk_buff + */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_socket_cookie), + /* pc2-4: save &socketCookie to r7 for future usage*/ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), + /* + * pc5-8: set up the registers for BPF_FUNC_map_lookup_elem, + * it takes two parameters (R1: map_fd, R2: &socket_cookie) + */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* + * pc9. if r0 != 0x0, go to pc+14, since we have the cookie + * stored already + * Otherwise do pc10-22 to setup a new data entry. + */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 14), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_socket_uid), + /* + * Place a struct stats in the R10 stack and sequentially + * place the member value into the memory. Packets value + * is set by directly place a IMM value 1 into the stack. + */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, + -32 + (__s16)offsetof(struct stats, uid)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, + -32 + (__s16)offsetof(struct stats, packets), 1), + /* + * __sk_buff is a special struct used for eBPF program to + * directly access some sk_buff field. + */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, + -32 + (__s16)offsetof(struct stats, bytes)), + /* + * add new map entry using BPF_FUNC_map_update_elem, it takes + * 4 parameters (R1: map_fd, R2: &socket_cookie, R3: &stats, + * R4: flags) + */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -32), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_update_elem), + BPF_JMP_IMM(BPF_JA, 0, 0, 5), + /* + * pc24-30 update the packet info to a exist data entry, it can + * be done by directly write to pointers instead of using + * BPF_FUNC_map_update_elem helper function + */ + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1, + offsetof(struct stats, packets)), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_STX_XADD(BPF_DW, BPF_REG_9, BPF_REG_1, + offsetof(struct stats, bytes)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }; + prog_fd = bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + ARRAY_SIZE(prog), "GPL", 0, + log_buf, sizeof(log_buf)); + if (prog_fd < 0) + error(1, errno, "failed to load prog\n%s\n", log_buf); +} + +static void prog_attach_iptables(char *file) +{ + int ret; + char rules[100]; + + if (bpf_obj_pin(prog_fd, file)) + error(1, errno, "bpf_obj_pin"); + if (strlen(file) > 50) { + printf("file path too long: %s\n", file); + exit(1); + } + sprintf(rules, "iptables -A OUTPUT -m bpf --object-pinned %s -j ACCEPT", + file); + ret = system(rules); + if (ret < 0) { + printf("iptables rule update failed: %d/n", WEXITSTATUS(ret)); + exit(1); + } +} + +static void print_table(void) +{ + struct stats curEntry; + uint32_t curN = UINT32_MAX; + uint32_t nextN; + int res; + + while (bpf_map_get_next_key(map_fd, &curN, &nextN) > -1) { + curN = nextN; + res = bpf_map_lookup_elem(map_fd, &curN, &curEntry); + if (res < 0) { + error(1, errno, "fail to get entry value of Key: %u\n", + curN); + } else { + printf("cookie: %u, uid: 0x%x, Packet Count: %lu," + " Bytes Count: %lu\n", curN, curEntry.uid, + curEntry.packets, curEntry.bytes); + } + } +} + +static void udp_client(void) +{ + struct sockaddr_in si_other = {0}; + struct sockaddr_in si_me = {0}; + struct stats dataEntry; + int s_rcv, s_send, i, recv_len; + char message = 'a'; + char buf; + uint64_t cookie; + int res; + socklen_t cookie_len = sizeof(cookie); + socklen_t slen = sizeof(si_other); + + s_rcv = socket(PF_INET, SOCK_DGRAM, 0); + if (s_rcv < 0) + error(1, errno, "rcv socket creat failed!\n"); + si_other.sin_family = AF_INET; + si_other.sin_port = htons(PORT); + if (inet_aton("127.0.0.1", &si_other.sin_addr) == 0) + error(1, errno, "inet_aton\n"); + if (bind(s_rcv, (struct sockaddr *)&si_other, sizeof(si_other)) == -1) + error(1, errno, "bind\n"); + s_send = socket(PF_INET, SOCK_DGRAM, 0); + if (s_send < 0) + error(1, errno, "send socket creat failed!\n"); + res = getsockopt(s_send, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len); + if (res < 0) + printf("get cookie failed: %s\n", strerror(errno)); + res = bpf_map_lookup_elem(map_fd, &cookie, &dataEntry); + if (res != -1) + error(1, errno, "socket stat found while flow not active\n"); + for (i = 0; i < 10; i++) { + res = sendto(s_send, &message, sizeof(message), 0, + (struct sockaddr *)&si_other, slen); + if (res == -1) + error(1, errno, "send\n"); + if (res != sizeof(message)) + error(1, 0, "%uB != %luB\n", res, sizeof(message)); + recv_len = recvfrom(s_rcv, &buf, sizeof(buf), 0, + (struct sockaddr *)&si_me, &slen); + if (recv_len < 0) + error(1, errno, "revieve\n"); + res = memcmp(&(si_other.sin_addr), &(si_me.sin_addr), + sizeof(si_me.sin_addr)); + if (res != 0) + error(1, EFAULT, "sender addr error: %d\n", res); + printf("Message received: %c\n", buf); + res = bpf_map_lookup_elem(map_fd, &cookie, &dataEntry); + if (res < 0) + error(1, errno, "lookup sk stat failed, cookie: %lu\n", + cookie); + printf("cookie: %lu, uid: 0x%x, Packet Count: %lu," + " Bytes Count: %lu\n\n", cookie, dataEntry.uid, + dataEntry.packets, dataEntry.bytes); + } + close(s_send); + close(s_rcv); +} + +static int usage(void) +{ + printf("Usage: ./run_cookie_uid_helper_example.sh" + " bpfObjName -option\n" + " -t traffic monitor test\n" + " -s getsockopt cookie test\n"); + return 1; +} + +static void finish(int ret) +{ + test_finish = true; +} + +int main(int argc, char *argv[]) +{ + int opt; + bool cfg_test_traffic = false; + bool cfg_test_cookie = false; + + if (argc != 3) + return usage(); + while ((opt = getopt(argc, argv, "ts")) != -1) { + switch (opt) { + case 't': + cfg_test_traffic = true; + break; + case 's': + cfg_test_cookie = true; + break; + + default: + printf("unknown option %c\n", opt); + usage(); + return -1; + } + } + maps_create(); + prog_load(); + prog_attach_iptables(argv[2]); + if (cfg_test_traffic) { + if (signal(SIGINT, finish) == SIG_ERR) + error(1, errno, "register handler failed"); + while (!test_finish) { + print_table(); + printf("\n"); + sleep(1); + }; + } else if (cfg_test_cookie) { + udp_client(); + } + close(prog_fd); + close(map_fd); + return 0; +} diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 3705fba453a0..8ab36a04c174 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -135,6 +135,16 @@ struct bpf_insn; .off = OFF, \ .imm = 0 }) +/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ + +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Memory store, *(uint *) (dst_reg + off16) = imm32 */ #define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c index a91872a97742..245165817fbe 100644 --- a/samples/bpf/map_perf_test_kern.c +++ b/samples/bpf/map_perf_test_kern.c @@ -11,6 +11,7 @@ #include "bpf_helpers.h" #define MAX_ENTRIES 1000 +#define MAX_NR_CPUS 1024 struct bpf_map_def SEC("maps") hash_map = { .type = BPF_MAP_TYPE_HASH, @@ -26,7 +27,7 @@ struct bpf_map_def SEC("maps") lru_hash_map = { .max_entries = 10000, }; -struct bpf_map_def SEC("maps") percpu_lru_hash_map = { +struct bpf_map_def SEC("maps") nocommon_lru_hash_map = { .type = BPF_MAP_TYPE_LRU_HASH, .key_size = sizeof(u32), .value_size = sizeof(long), @@ -34,6 +35,19 @@ struct bpf_map_def SEC("maps") percpu_lru_hash_map = { .map_flags = BPF_F_NO_COMMON_LRU, }; +struct bpf_map_def SEC("maps") inner_lru_hash_map = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = MAX_ENTRIES, +}; + +struct bpf_map_def SEC("maps") array_of_lru_hashs = { + .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, + .key_size = sizeof(u32), + .max_entries = MAX_NR_CPUS, +}; + struct bpf_map_def SEC("maps") percpu_hash_map = { .type = BPF_MAP_TYPE_PERCPU_HASH, .key_size = sizeof(u32), @@ -65,6 +79,13 @@ struct bpf_map_def SEC("maps") lpm_trie_map_alloc = { .map_flags = BPF_F_NO_PREALLOC, }; +struct bpf_map_def SEC("maps") array_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = MAX_ENTRIES, +}; + SEC("kprobe/sys_getuid") int stress_hmap(struct pt_regs *ctx) { @@ -93,6 +114,7 @@ int stress_percpu_hmap(struct pt_regs *ctx) bpf_map_delete_elem(&percpu_hash_map, &key); return 0; } + SEC("kprobe/sys_getgid") int stress_hmap_alloc(struct pt_regs *ctx) { @@ -121,24 +143,56 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx) return 0; } -SEC("kprobe/sys_getpid") +SEC("kprobe/sys_connect") int stress_lru_hmap_alloc(struct pt_regs *ctx) { - u32 key = bpf_get_prandom_u32(); + struct sockaddr_in6 *in6; + u16 test_case, dst6[8]; + int addrlen, ret; + char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%d\n"; long val = 1; + u32 key = bpf_get_prandom_u32(); - bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY); + in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx); + addrlen = (int)PT_REGS_PARM3(ctx); - return 0; -} + if (addrlen != sizeof(*in6)) + return 0; -SEC("kprobe/sys_getppid") -int stress_percpu_lru_hmap_alloc(struct pt_regs *ctx) -{ - u32 key = bpf_get_prandom_u32(); - long val = 1; + ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr); + if (ret) + goto done; - bpf_map_update_elem(&percpu_lru_hash_map, &key, &val, BPF_ANY); + if (dst6[0] != 0xdead || dst6[1] != 0xbeef) + return 0; + + test_case = dst6[7]; + + if (test_case == 0) { + ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY); + } else if (test_case == 1) { + ret = bpf_map_update_elem(&nocommon_lru_hash_map, &key, &val, + BPF_ANY); + } else if (test_case == 2) { + void *nolocal_lru_map; + int cpu = bpf_get_smp_processor_id(); + + nolocal_lru_map = bpf_map_lookup_elem(&array_of_lru_hashs, + &cpu); + if (!nolocal_lru_map) { + ret = -ENOENT; + goto done; + } + + ret = bpf_map_update_elem(nolocal_lru_map, &key, &val, + BPF_ANY); + } else { + ret = -EINVAL; + } + +done: + if (ret) + bpf_trace_printk(fmt, sizeof(fmt), ret); return 0; } @@ -165,5 +219,31 @@ int stress_lpm_trie_map_alloc(struct pt_regs *ctx) return 0; } +SEC("kprobe/sys_getpgid") +int stress_hash_map_lookup(struct pt_regs *ctx) +{ + u32 key = 1, i; + long *value; + +#pragma clang loop unroll(full) + for (i = 0; i < 64; ++i) + value = bpf_map_lookup_elem(&hash_map, &key); + + return 0; +} + +SEC("kprobe/sys_getpgrp") +int stress_array_map_lookup(struct pt_regs *ctx) +{ + u32 key = 1, i; + long *value; + +#pragma clang loop unroll(full) + for (i = 0; i < 64; ++i) + value = bpf_map_lookup_elem(&array_map, &key); + + return 0; +} + char _license[] SEC("license") = "GPL"; u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 680260a91f50..1a8894b5ac51 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -18,10 +18,14 @@ #include <string.h> #include <time.h> #include <sys/resource.h> +#include <arpa/inet.h> +#include <errno.h> + #include "libbpf.h" #include "bpf_load.h" -#define MAX_CNT 1000000 +#define TEST_BIT(t) (1U << (t)) +#define MAX_NR_CPUS 1024 static __u64 time_get_ns(void) { @@ -31,15 +35,44 @@ static __u64 time_get_ns(void) return ts.tv_sec * 1000000000ull + ts.tv_nsec; } -#define HASH_PREALLOC (1 << 0) -#define PERCPU_HASH_PREALLOC (1 << 1) -#define HASH_KMALLOC (1 << 2) -#define PERCPU_HASH_KMALLOC (1 << 3) -#define LRU_HASH_PREALLOC (1 << 4) -#define PERCPU_LRU_HASH_PREALLOC (1 << 5) -#define LPM_KMALLOC (1 << 6) +enum test_type { + HASH_PREALLOC, + PERCPU_HASH_PREALLOC, + HASH_KMALLOC, + PERCPU_HASH_KMALLOC, + LRU_HASH_PREALLOC, + NOCOMMON_LRU_HASH_PREALLOC, + LPM_KMALLOC, + HASH_LOOKUP, + ARRAY_LOOKUP, + INNER_LRU_HASH_PREALLOC, + NR_TESTS, +}; + +const char *test_map_names[NR_TESTS] = { + [HASH_PREALLOC] = "hash_map", + [PERCPU_HASH_PREALLOC] = "percpu_hash_map", + [HASH_KMALLOC] = "hash_map_alloc", + [PERCPU_HASH_KMALLOC] = "percpu_hash_map_alloc", + [LRU_HASH_PREALLOC] = "lru_hash_map", + [NOCOMMON_LRU_HASH_PREALLOC] = "nocommon_lru_hash_map", + [LPM_KMALLOC] = "lpm_trie_map_alloc", + [HASH_LOOKUP] = "hash_map", + [ARRAY_LOOKUP] = "array_map", + [INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map", +}; static int test_flags = ~0; +static uint32_t num_map_entries; +static uint32_t inner_lru_hash_size; +static int inner_lru_hash_idx = -1; +static int array_of_lru_hashs_idx = -1; +static uint32_t max_cnt = 1000000; + +static int check_test_flags(enum test_type t) +{ + return test_flags & TEST_BIT(t); +} static void test_hash_prealloc(int cpu) { @@ -47,34 +80,89 @@ static void test_hash_prealloc(int cpu) int i; start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) + for (i = 0; i < max_cnt; i++) syscall(__NR_getuid); printf("%d:hash_map_perf pre-alloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); + cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); } -static void test_lru_hash_prealloc(int cpu) +static void do_test_lru(enum test_type test, int cpu) { + static int inner_lru_map_fds[MAX_NR_CPUS]; + + struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 }; + const char *test_name; __u64 start_time; - int i; + int i, ret; + + if (test == INNER_LRU_HASH_PREALLOC) { + int outer_fd = map_fd[array_of_lru_hashs_idx]; + + assert(cpu < MAX_NR_CPUS); + + if (cpu) { + inner_lru_map_fds[cpu] = + bpf_create_map(BPF_MAP_TYPE_LRU_HASH, + sizeof(uint32_t), sizeof(long), + inner_lru_hash_size, 0); + if (inner_lru_map_fds[cpu] == -1) { + printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n", + strerror(errno), errno); + exit(1); + } + } else { + inner_lru_map_fds[cpu] = map_fd[inner_lru_hash_idx]; + } + + ret = bpf_map_update_elem(outer_fd, &cpu, + &inner_lru_map_fds[cpu], + BPF_ANY); + if (ret) { + printf("cannot update ARRAY_OF_LRU_HASHS with key:%u. %s(%d)\n", + cpu, strerror(errno), errno); + exit(1); + } + } + + in6.sin6_addr.s6_addr16[0] = 0xdead; + in6.sin6_addr.s6_addr16[1] = 0xbeef; + + if (test == LRU_HASH_PREALLOC) { + test_name = "lru_hash_map_perf"; + in6.sin6_addr.s6_addr16[7] = 0; + } else if (test == NOCOMMON_LRU_HASH_PREALLOC) { + test_name = "nocommon_lru_hash_map_perf"; + in6.sin6_addr.s6_addr16[7] = 1; + } else if (test == INNER_LRU_HASH_PREALLOC) { + test_name = "inner_lru_hash_map_perf"; + in6.sin6_addr.s6_addr16[7] = 2; + } else { + assert(0); + } start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) - syscall(__NR_getpid); - printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); + for (i = 0; i < max_cnt; i++) { + ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6)); + assert(ret == -1 && errno == EBADF); + } + printf("%d:%s pre-alloc %lld events per sec\n", + cpu, test_name, + max_cnt * 1000000000ll / (time_get_ns() - start_time)); +} + +static void test_lru_hash_prealloc(int cpu) +{ + do_test_lru(LRU_HASH_PREALLOC, cpu); } -static void test_percpu_lru_hash_prealloc(int cpu) +static void test_nocommon_lru_hash_prealloc(int cpu) { - __u64 start_time; - int i; + do_test_lru(NOCOMMON_LRU_HASH_PREALLOC, cpu); +} - start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) - syscall(__NR_getppid); - printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); +static void test_inner_lru_hash_prealloc(int cpu) +{ + do_test_lru(INNER_LRU_HASH_PREALLOC, cpu); } static void test_percpu_hash_prealloc(int cpu) @@ -83,10 +171,10 @@ static void test_percpu_hash_prealloc(int cpu) int i; start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) + for (i = 0; i < max_cnt; i++) syscall(__NR_geteuid); printf("%d:percpu_hash_map_perf pre-alloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); + cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); } static void test_hash_kmalloc(int cpu) @@ -95,10 +183,10 @@ static void test_hash_kmalloc(int cpu) int i; start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) + for (i = 0; i < max_cnt; i++) syscall(__NR_getgid); printf("%d:hash_map_perf kmalloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); + cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); } static void test_percpu_hash_kmalloc(int cpu) @@ -107,10 +195,10 @@ static void test_percpu_hash_kmalloc(int cpu) int i; start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) + for (i = 0; i < max_cnt; i++) syscall(__NR_getegid); printf("%d:percpu_hash_map_perf kmalloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); + cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); } static void test_lpm_kmalloc(int cpu) @@ -119,40 +207,63 @@ static void test_lpm_kmalloc(int cpu) int i; start_time = time_get_ns(); - for (i = 0; i < MAX_CNT; i++) + for (i = 0; i < max_cnt; i++) syscall(__NR_gettid); printf("%d:lpm_perf kmalloc %lld events per sec\n", - cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); + cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); } -static void loop(int cpu) +static void test_hash_lookup(int cpu) { - cpu_set_t cpuset; - - CPU_ZERO(&cpuset); - CPU_SET(cpu, &cpuset); - sched_setaffinity(0, sizeof(cpuset), &cpuset); + __u64 start_time; + int i; - if (test_flags & HASH_PREALLOC) - test_hash_prealloc(cpu); + start_time = time_get_ns(); + for (i = 0; i < max_cnt; i++) + syscall(__NR_getpgid, 0); + printf("%d:hash_lookup %lld lookups per sec\n", + cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); +} - if (test_flags & PERCPU_HASH_PREALLOC) - test_percpu_hash_prealloc(cpu); +static void test_array_lookup(int cpu) +{ + __u64 start_time; + int i; - if (test_flags & HASH_KMALLOC) - test_hash_kmalloc(cpu); + start_time = time_get_ns(); + for (i = 0; i < max_cnt; i++) + syscall(__NR_getpgrp, 0); + printf("%d:array_lookup %lld lookups per sec\n", + cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); +} - if (test_flags & PERCPU_HASH_KMALLOC) - test_percpu_hash_kmalloc(cpu); +typedef void (*test_func)(int cpu); +const test_func test_funcs[] = { + [HASH_PREALLOC] = test_hash_prealloc, + [PERCPU_HASH_PREALLOC] = test_percpu_hash_prealloc, + [HASH_KMALLOC] = test_hash_kmalloc, + [PERCPU_HASH_KMALLOC] = test_percpu_hash_kmalloc, + [LRU_HASH_PREALLOC] = test_lru_hash_prealloc, + [NOCOMMON_LRU_HASH_PREALLOC] = test_nocommon_lru_hash_prealloc, + [LPM_KMALLOC] = test_lpm_kmalloc, + [HASH_LOOKUP] = test_hash_lookup, + [ARRAY_LOOKUP] = test_array_lookup, + [INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc, +}; - if (test_flags & LRU_HASH_PREALLOC) - test_lru_hash_prealloc(cpu); +static void loop(int cpu) +{ + cpu_set_t cpuset; + int i; - if (test_flags & PERCPU_LRU_HASH_PREALLOC) - test_percpu_lru_hash_prealloc(cpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + sched_setaffinity(0, sizeof(cpuset), &cpuset); - if (test_flags & LPM_KMALLOC) - test_lpm_kmalloc(cpu); + for (i = 0; i < NR_TESTS; i++) { + if (check_test_flags(i)) + test_funcs[i](cpu); + } } static void run_perf_test(int tasks) @@ -209,6 +320,38 @@ static void fill_lpm_trie(void) assert(!r); } +static void fixup_map(struct bpf_map_data *map, int idx) +{ + int i; + + if (!strcmp("inner_lru_hash_map", map->name)) { + inner_lru_hash_idx = idx; + inner_lru_hash_size = map->def.max_entries; + } + + if (!strcmp("array_of_lru_hashs", map->name)) { + if (inner_lru_hash_idx == -1) { + printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n"); + exit(1); + } + map->def.inner_map_idx = inner_lru_hash_idx; + array_of_lru_hashs_idx = idx; + } + + if (num_map_entries <= 0) + return; + + inner_lru_hash_size = num_map_entries; + + /* Only change the max_entries for the enabled test(s) */ + for (i = 0; i < NR_TESTS; i++) { + if (!strcmp(test_map_names[i], map->name) && + (check_test_flags(i))) { + map->def.max_entries = num_map_entries; + } + } +} + int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; @@ -224,7 +367,13 @@ int main(int argc, char **argv) if (argc > 2) num_cpu = atoi(argv[2]) ? : num_cpu; - if (load_bpf_file(filename)) { + if (argc > 3) + num_map_entries = atoi(argv[3]); + + if (argc > 4) + max_cnt = atoi(argv[4]); + + if (load_bpf_file_fixup_map(filename, fixup_map)) { printf("%s", bpf_log_buf); return 1; } diff --git a/samples/bpf/run_cookie_uid_helper_example.sh b/samples/bpf/run_cookie_uid_helper_example.sh new file mode 100755 index 000000000000..f898cfa2b1aa --- /dev/null +++ b/samples/bpf/run_cookie_uid_helper_example.sh @@ -0,0 +1,14 @@ +#!/bin/bash +local_dir="$(pwd)" +root_dir=$local_dir/../.. +mnt_dir=$(mktemp -d --tmp) + +on_exit() { + iptables -D OUTPUT -m bpf --object-pinned ${mnt_dir}/bpf_prog -j ACCEPT + umount ${mnt_dir} + rm -r ${mnt_dir} +} + +trap on_exit EXIT +mount -t bpf bpf ${mnt_dir} +./per_socket_stats_example ${mnt_dir}/bpf_prog $1 diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c index d96dc88d3b04..73c357142268 100644 --- a/samples/bpf/test_lru_dist.c +++ b/samples/bpf/test_lru_dist.c @@ -25,7 +25,9 @@ #include "bpf_util.h" #define min(a, b) ((a) < (b) ? (a) : (b)) -#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#ifndef offsetof +# define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#endif #define container_of(ptr, type, member) ({ \ const typeof( ((type *)0)->member ) *__mptr = (ptr); \ (type *)( (char *)__mptr - offsetof(type,member) );}) diff --git a/samples/bpf/test_map_in_map_kern.c b/samples/bpf/test_map_in_map_kern.c new file mode 100644 index 000000000000..42c44d091dd1 --- /dev/null +++ b/samples/bpf/test_map_in_map_kern.c @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#define KBUILD_MODNAME "foo" +#include <linux/ptrace.h> +#include <linux/version.h> +#include <uapi/linux/bpf.h> +#include <uapi/linux/in6.h> +#include "bpf_helpers.h" + +#define MAX_NR_PORTS 65536 + +/* map #0 */ +struct bpf_map_def SEC("maps") port_a = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(int), + .max_entries = MAX_NR_PORTS, +}; + +/* map #1 */ +struct bpf_map_def SEC("maps") port_h = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(int), + .max_entries = 1, +}; + +/* map #2 */ +struct bpf_map_def SEC("maps") reg_result_h = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(int), + .max_entries = 1, +}; + +/* map #3 */ +struct bpf_map_def SEC("maps") inline_result_h = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u32), + .value_size = sizeof(int), + .max_entries = 1, +}; + +/* map #4 */ /* Test case #0 */ +struct bpf_map_def SEC("maps") a_of_port_a = { + .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, + .key_size = sizeof(u32), + .inner_map_idx = 0, /* map_fd[0] is port_a */ + .max_entries = MAX_NR_PORTS, +}; + +/* map #5 */ /* Test case #1 */ +struct bpf_map_def SEC("maps") h_of_port_a = { + .type = BPF_MAP_TYPE_HASH_OF_MAPS, + .key_size = sizeof(u32), + .inner_map_idx = 0, /* map_fd[0] is port_a */ + .max_entries = 1, +}; + +/* map #6 */ /* Test case #2 */ +struct bpf_map_def SEC("maps") h_of_port_h = { + .type = BPF_MAP_TYPE_HASH_OF_MAPS, + .key_size = sizeof(u32), + .inner_map_idx = 1, /* map_fd[1] is port_h */ + .max_entries = 1, +}; + +static __always_inline int do_reg_lookup(void *inner_map, u32 port) +{ + int *result; + + result = bpf_map_lookup_elem(inner_map, &port); + return result ? *result : -ENOENT; +} + +static __always_inline int do_inline_array_lookup(void *inner_map, u32 port) +{ + int *result; + + if (inner_map != &port_a) + return -EINVAL; + + result = bpf_map_lookup_elem(&port_a, &port); + return result ? *result : -ENOENT; +} + +static __always_inline int do_inline_hash_lookup(void *inner_map, u32 port) +{ + int *result; + + if (inner_map != &port_h) + return -EINVAL; + + result = bpf_map_lookup_elem(&port_h, &port); + return result ? *result : -ENOENT; +} + +SEC("kprobe/sys_connect") +int trace_sys_connect(struct pt_regs *ctx) +{ + struct sockaddr_in6 *in6; + u16 test_case, port, dst6[8]; + int addrlen, ret, inline_ret, ret_key = 0; + u32 port_key; + void *outer_map, *inner_map; + bool inline_hash = false; + + in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx); + addrlen = (int)PT_REGS_PARM3(ctx); + + if (addrlen != sizeof(*in6)) + return 0; + + ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr); + if (ret) { + inline_ret = ret; + goto done; + } + + if (dst6[0] != 0xdead || dst6[1] != 0xbeef) + return 0; + + test_case = dst6[7]; + + ret = bpf_probe_read(&port, sizeof(port), &in6->sin6_port); + if (ret) { + inline_ret = ret; + goto done; + } + + port_key = port; + + ret = -ENOENT; + if (test_case == 0) { + outer_map = &a_of_port_a; + } else if (test_case == 1) { + outer_map = &h_of_port_a; + } else if (test_case == 2) { + outer_map = &h_of_port_h; + } else { + ret = __LINE__; + inline_ret = ret; + goto done; + } + + inner_map = bpf_map_lookup_elem(outer_map, &port_key); + if (!inner_map) { + ret = __LINE__; + inline_ret = ret; + goto done; + } + + ret = do_reg_lookup(inner_map, port_key); + + if (test_case == 0 || test_case == 1) + inline_ret = do_inline_array_lookup(inner_map, port_key); + else + inline_ret = do_inline_hash_lookup(inner_map, port_key); + +done: + bpf_map_update_elem(®_result_h, &ret_key, &ret, BPF_ANY); + bpf_map_update_elem(&inline_result_h, &ret_key, &inline_ret, BPF_ANY); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c new file mode 100644 index 000000000000..f62fdc2bd428 --- /dev/null +++ b/samples/bpf/test_map_in_map_user.c @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <sys/resource.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <stdint.h> +#include <assert.h> +#include <errno.h> +#include <stdlib.h> +#include <stdio.h> +#include "libbpf.h" +#include "bpf_load.h" + +#define PORT_A (map_fd[0]) +#define PORT_H (map_fd[1]) +#define REG_RESULT_H (map_fd[2]) +#define INLINE_RESULT_H (map_fd[3]) +#define A_OF_PORT_A (map_fd[4]) /* Test case #0 */ +#define H_OF_PORT_A (map_fd[5]) /* Test case #1 */ +#define H_OF_PORT_H (map_fd[6]) /* Test case #2 */ + +static const char * const test_names[] = { + "Array of Array", + "Hash of Array", + "Hash of Hash", +}; + +#define NR_TESTS (sizeof(test_names) / sizeof(*test_names)) + +static void populate_map(uint32_t port_key, int magic_result) +{ + int ret; + + ret = bpf_map_update_elem(PORT_A, &port_key, &magic_result, BPF_ANY); + assert(!ret); + + ret = bpf_map_update_elem(PORT_H, &port_key, &magic_result, + BPF_NOEXIST); + assert(!ret); + + ret = bpf_map_update_elem(A_OF_PORT_A, &port_key, &PORT_A, BPF_ANY); + assert(!ret); + + ret = bpf_map_update_elem(H_OF_PORT_A, &port_key, &PORT_A, BPF_NOEXIST); + assert(!ret); + + ret = bpf_map_update_elem(H_OF_PORT_H, &port_key, &PORT_H, BPF_NOEXIST); + assert(!ret); +} + +static void test_map_in_map(void) +{ + struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 }; + uint32_t result_key = 0, port_key; + int result, inline_result; + int magic_result = 0xfaceb00c; + int ret; + int i; + + port_key = rand() & 0x00FF; + populate_map(port_key, magic_result); + + in6.sin6_addr.s6_addr16[0] = 0xdead; + in6.sin6_addr.s6_addr16[1] = 0xbeef; + in6.sin6_port = port_key; + + for (i = 0; i < NR_TESTS; i++) { + printf("%s: ", test_names[i]); + + in6.sin6_addr.s6_addr16[7] = i; + ret = connect(-1, (struct sockaddr *)&in6, sizeof(in6)); + assert(ret == -1 && errno == EBADF); + + ret = bpf_map_lookup_elem(REG_RESULT_H, &result_key, &result); + assert(!ret); + + ret = bpf_map_lookup_elem(INLINE_RESULT_H, &result_key, + &inline_result); + assert(!ret); + + if (result != magic_result || inline_result != magic_result) { + printf("Error. result:%d inline_result:%d\n", + result, inline_result); + exit(1); + } + + bpf_map_delete_elem(REG_RESULT_H, &result_key); + bpf_map_delete_elem(INLINE_RESULT_H, &result_key); + + printf("Pass\n"); + } +} + +int main(int argc, char **argv) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + char filename[256]; + + assert(!setrlimit(RLIMIT_MEMLOCK, &r)); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + test_map_in_map(); + + return 0; +} diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index ded9804c5034..7fee0f1ba9a3 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -4,6 +4,7 @@ #include <signal.h> #include <linux/bpf.h> #include <string.h> +#include <sys/resource.h> #include "libbpf.h" #include "bpf_load.h" @@ -112,6 +113,7 @@ static void int_exit(int sig) int main(int ac, char **argv) { + struct rlimit r = {1024*1024, RLIM_INFINITY}; char filename[256]; long key, next_key, value; FILE *f; @@ -119,6 +121,11 @@ int main(int ac, char **argv) snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + signal(SIGINT, int_exit); /* start 'ping' in the background to have some kfree_skb events */ diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c index 8f7d199d5945..fe372239d505 100644 --- a/samples/bpf/tracex3_user.c +++ b/samples/bpf/tracex3_user.c @@ -11,6 +11,7 @@ #include <stdbool.h> #include <string.h> #include <linux/bpf.h> +#include <sys/resource.h> #include "libbpf.h" #include "bpf_load.h" @@ -112,11 +113,17 @@ static void print_hist(int fd) int main(int ac, char **argv) { + struct rlimit r = {1024*1024, RLIM_INFINITY}; char filename[256]; int i; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); return 1; diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c index 03449f773cb1..22c644f1f4c3 100644 --- a/samples/bpf/tracex4_user.c +++ b/samples/bpf/tracex4_user.c @@ -12,6 +12,8 @@ #include <string.h> #include <time.h> #include <linux/bpf.h> +#include <sys/resource.h> + #include "libbpf.h" #include "bpf_load.h" @@ -50,11 +52,17 @@ static void print_old_objects(int fd) int main(int ac, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256]; int i; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); + return 1; + } + if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); return 1; diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index d2be65d1fd86..378850c70eb8 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -5,6 +5,7 @@ * License as published by the Free Software Foundation. */ #include <linux/bpf.h> +#include <linux/if_link.h> #include <assert.h> #include <errno.h> #include <signal.h> @@ -12,16 +13,18 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <libgen.h> #include "bpf_load.h" #include "bpf_util.h" #include "libbpf.h" static int ifindex; +static __u32 xdp_flags; static void int_exit(int sig) { - set_link_xdp_fd(ifindex, -1); + set_link_xdp_fd(ifindex, -1, xdp_flags); exit(0); } @@ -54,18 +57,39 @@ static void poll_stats(int interval) } } -int main(int ac, char **argv) +static void usage(const char *prog) { - char filename[256]; + fprintf(stderr, + "usage: %s [OPTS] IFINDEX\n\n" + "OPTS:\n" + " -S use skb-mode\n", + prog); +} - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); +int main(int argc, char **argv) +{ + const char *optstr = "S"; + char filename[256]; + int opt; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + switch (opt) { + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + default: + usage(basename(argv[0])); + return 1; + } + } - if (ac != 2) { - printf("usage: %s IFINDEX\n", argv[0]); + if (optind == argc) { + usage(basename(argv[0])); return 1; } + ifindex = strtoul(argv[optind], NULL, 0); - ifindex = strtoul(argv[1], NULL, 0); + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); if (load_bpf_file(filename)) { printf("%s", bpf_log_buf); @@ -79,7 +103,7 @@ int main(int ac, char **argv) signal(SIGINT, int_exit); - if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) { + if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { printf("link set xdp fd failed\n"); return 1; } diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 70e192fc61aa..92b8bde9337c 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -5,6 +5,7 @@ * License as published by the Free Software Foundation. */ #include <linux/bpf.h> +#include <linux/if_link.h> #include <assert.h> #include <errno.h> #include <signal.h> @@ -24,11 +25,12 @@ #define STATS_INTERVAL_S 2U static int ifindex = -1; +static __u32 xdp_flags = 0; static void int_exit(int sig) { if (ifindex > -1) - set_link_xdp_fd(ifindex, -1); + set_link_xdp_fd(ifindex, -1, xdp_flags); exit(0); } @@ -136,7 +138,7 @@ int main(int argc, char **argv) { unsigned char opt_flags[256] = {}; unsigned int kill_after_s = 0; - const char *optstr = "i:a:p:s:d:m:T:P:h"; + const char *optstr = "i:a:p:s:d:m:T:P:Sh"; int min_port = 0, max_port = 0; struct iptnl_info tnl = {}; struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; @@ -201,6 +203,9 @@ int main(int argc, char **argv) case 'T': kill_after_s = atoi(optarg); break; + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; default: usage(argv[0]); return 1; @@ -243,14 +248,14 @@ int main(int argc, char **argv) } } - if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) { + if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { printf("link set xdp fd failed\n"); return 1; } poll_stats(kill_after_s); - set_link_xdp_fd(ifindex, -1); + set_link_xdp_fd(ifindex, -1, xdp_flags); return 0; } diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c index e34f871e69b1..84795223f15f 100644 --- a/samples/livepatch/livepatch-sample.c +++ b/samples/livepatch/livepatch-sample.c @@ -17,6 +17,8 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/livepatch.h> @@ -69,6 +71,21 @@ static int livepatch_init(void) { int ret; + if (!klp_have_reliable_stack() && !patch.immediate) { + /* + * WARNING: Be very careful when using 'patch.immediate' in + * your patches. It's ok to use it for simple patches like + * this, but for more complex patches which change function + * semantics, locking semantics, or data structures, it may not + * be safe. Use of this option will also prevent removal of + * the patch. + * + * See Documentation/livepatch/livepatch.txt for more details. + */ + patch.immediate = true; + pr_notice("The consistency model isn't supported for your architecture. Bypassing safety mechanisms and applying the patch immediately.\n"); + } + ret = klp_register_patch(&patch); if (ret) return ret; @@ -82,7 +99,6 @@ static int livepatch_init(void) static void livepatch_exit(void) { - WARN_ON(klp_disable_patch(&patch)); WARN_ON(klp_unregister_patch(&patch)); } diff --git a/samples/statx/test-statx.c b/samples/statx/test-statx.c index 8571d766331d..d4d77b09412c 100644 --- a/samples/statx/test-statx.c +++ b/samples/statx/test-statx.c @@ -141,8 +141,8 @@ static void dump_statx(struct statx *stx) if (stx->stx_mask & STATX_BTIME) print_time(" Birth: ", &stx->stx_btime); - if (stx->stx_attributes) { - unsigned char bits; + if (stx->stx_attributes_mask) { + unsigned char bits, mbits; int loop, byte; static char attr_representation[64 + 1] = @@ -160,14 +160,18 @@ static void dump_statx(struct statx *stx) printf("Attributes: %016llx (", stx->stx_attributes); for (byte = 64 - 8; byte >= 0; byte -= 8) { bits = stx->stx_attributes >> byte; + mbits = stx->stx_attributes_mask >> byte; for (loop = 7; loop >= 0; loop--) { int bit = byte + loop; - if (bits & 0x80) + if (!(mbits & 0x80)) + putchar('.'); /* Not supported */ + else if (bits & 0x80) putchar(attr_representation[63 - bit]); else - putchar('-'); + putchar('-'); /* Not set */ bits <<= 1; + mbits <<= 1; } if (byte) putchar(' '); |