diff options
Diffstat (limited to 'tools/lib/bpf')
31 files changed, 3264 insertions, 864 deletions
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index e2cd558ca0b4..c80204bb72a2 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ -libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_utils.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ usdt.o zip.o elf.o features.o btf_iter.o btf_relocate.o diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 359f73ead613..5846de364209 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -154,7 +154,7 @@ int bump_rlimit_memlock(void) memlock_bumped = true; - /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */ + /* zero memlock_rlim disables auto-bumping RLIMIT_MEMLOCK */ if (memlock_rlim == 0) return 0; @@ -172,7 +172,7 @@ int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd); + const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size); union bpf_attr attr; int fd; @@ -203,6 +203,8 @@ int bpf_map_create(enum bpf_map_type map_type, attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); attr.map_token_fd = OPTS_GET(opts, token_fd, 0); + attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL)); + attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0); fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); @@ -238,7 +240,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, fd_array_cnt); + const size_t attr_sz = offsetofend(union bpf_attr, keyring_id); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -792,6 +794,7 @@ int bpf_link_create(int prog_fd, int target_fd, case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_MODIFY_RETURN: + case BPF_TRACE_FSESSION: case BPF_LSM_MAC: attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0); if (!OPTS_ZEROED(opts, tracing)) @@ -837,6 +840,50 @@ int bpf_link_create(int prog_fd, int target_fd, if (!OPTS_ZEROED(opts, netkit)) return libbpf_err(-EINVAL); break; + case BPF_CGROUP_INET_INGRESS: + case BPF_CGROUP_INET_EGRESS: + case BPF_CGROUP_INET_SOCK_CREATE: + case BPF_CGROUP_INET_SOCK_RELEASE: + case BPF_CGROUP_INET4_BIND: + case BPF_CGROUP_INET6_BIND: + case BPF_CGROUP_INET4_POST_BIND: + case BPF_CGROUP_INET6_POST_BIND: + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: + case BPF_CGROUP_INET4_GETPEERNAME: + case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: + case BPF_CGROUP_INET4_GETSOCKNAME: + case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: + case BPF_CGROUP_SOCK_OPS: + case BPF_CGROUP_DEVICE: + case BPF_CGROUP_SYSCTL: + case BPF_CGROUP_GETSOCKOPT: + case BPF_CGROUP_SETSOCKOPT: + case BPF_LSM_CGROUP: + relative_fd = OPTS_GET(opts, cgroup.relative_fd, 0); + relative_id = OPTS_GET(opts, cgroup.relative_id, 0); + if (relative_fd && relative_id) + return libbpf_err(-EINVAL); + if (relative_id) { + attr.link_create.cgroup.relative_id = relative_id; + attr.link_create.flags |= BPF_F_ID; + } else { + attr.link_create.cgroup.relative_fd = relative_fd; + } + attr.link_create.cgroup.expected_revision = + OPTS_GET(opts, cgroup.expected_revision, 0); + if (!OPTS_ZEROED(opts, cgroup)) + return libbpf_err(-EINVAL); + break; default: if (!OPTS_ZEROED(opts, flags)) return libbpf_err(-EINVAL); @@ -1097,7 +1144,7 @@ int bpf_map_get_fd_by_id(__u32 id) int bpf_btf_get_fd_by_id_opts(__u32 id, const struct bpf_get_fd_by_id_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, open_flags); + const size_t attr_sz = offsetofend(union bpf_attr, fd_by_id_token_fd); union bpf_attr attr; int fd; @@ -1107,6 +1154,7 @@ int bpf_btf_get_fd_by_id_opts(__u32 id, memset(&attr, 0, attr_sz); attr.btf_id = id; attr.open_flags = OPTS_GET(opts, open_flags, 0); + attr.fd_by_id_token_fd = OPTS_GET(opts, token_fd, 0); fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); @@ -1330,3 +1378,42 @@ int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts) fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } + +int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len, + struct bpf_prog_stream_read_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_stream_read); + union bpf_attr attr; + int err; + + if (!OPTS_VALID(opts, bpf_prog_stream_read_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.prog_stream_read.stream_buf = ptr_to_u64(buf); + attr.prog_stream_read.stream_buf_len = buf_len; + attr.prog_stream_read.stream_id = stream_id; + attr.prog_stream_read.prog_fd = prog_fd; + + err = sys_bpf(BPF_PROG_STREAM_READ_BY_FD, &attr, attr_sz); + return libbpf_err_errno(err); +} + +int bpf_prog_assoc_struct_ops(int prog_fd, int map_fd, + struct bpf_prog_assoc_struct_ops_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_assoc_struct_ops); + union bpf_attr attr; + int err; + + if (!OPTS_VALID(opts, bpf_prog_assoc_struct_ops_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.prog_assoc_struct_ops.map_fd = map_fd; + attr.prog_assoc_struct_ops.prog_fd = prog_fd; + attr.prog_assoc_struct_ops.flags = OPTS_GET(opts, flags, 0); + + err = sys_bpf(BPF_PROG_ASSOC_STRUCT_OPS, &attr, attr_sz); + return libbpf_err_errno(err); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 435da95d2058..2c8e88ddb674 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -54,9 +54,12 @@ struct bpf_map_create_opts { __s32 value_type_btf_obj_fd; __u32 token_fd; + + const void *excl_prog_hash; + __u32 excl_prog_hash_size; size_t :0; }; -#define bpf_map_create_opts__last_field token_fd +#define bpf_map_create_opts__last_field excl_prog_hash_size LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, @@ -286,6 +289,14 @@ LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, * Update spin_lock-ed map elements. This must be * specified if the map value contains a spinlock. * + * **BPF_F_CPU** + * As for percpu maps, update value on the specified CPU. And the cpu + * info is embedded into the high 32 bits of **opts->elem_flags**. + * + * **BPF_F_ALL_CPUS** + * As for percpu maps, update value across all CPUs. This flag cannot + * be used with BPF_F_CPU at the same time. + * * @param fd BPF map file descriptor * @param keys pointer to an array of *count* keys * @param values pointer to an array of *count* values @@ -438,6 +449,11 @@ struct bpf_link_create_opts { __u32 relative_id; __u64 expected_revision; } netkit; + struct { + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + } cgroup; }; size_t :0; }; @@ -487,9 +503,10 @@ LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id); struct bpf_get_fd_by_id_opts { size_t sz; /* size of this struct for forward/backward compatibility */ __u32 open_flags; /* permissions requested for the operation on fd */ + __u32 token_fd; size_t :0; }; -#define bpf_get_fd_by_id_opts__last_field open_flags +#define bpf_get_fd_by_id_opts__last_field token_fd LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); LIBBPF_API int bpf_prog_get_fd_by_id_opts(__u32 id, @@ -703,6 +720,48 @@ struct bpf_token_create_opts { LIBBPF_API int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts); +struct bpf_prog_stream_read_opts { + size_t sz; + size_t :0; +}; +#define bpf_prog_stream_read_opts__last_field sz +/** + * @brief **bpf_prog_stream_read** reads data from the BPF stream of a given BPF + * program. + * + * @param prog_fd FD for the BPF program whose BPF stream is to be read. + * @param stream_id ID of the BPF stream to be read. + * @param buf Buffer to read data into from the BPF stream. + * @param buf_len Maximum number of bytes to read from the BPF stream. + * @param opts optional options, can be NULL + * + * @return The number of bytes read, on success; negative error code, otherwise + * (errno is also set to the error code) + */ +LIBBPF_API int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len, + struct bpf_prog_stream_read_opts *opts); + +struct bpf_prog_assoc_struct_ops_opts { + size_t sz; + __u32 flags; + size_t :0; +}; +#define bpf_prog_assoc_struct_ops_opts__last_field flags + +/** + * @brief **bpf_prog_assoc_struct_ops** associates a BPF program with a + * struct_ops map. + * + * @param prog_fd FD for the BPF program + * @param map_fd FD for the struct_ops map to be associated with the BPF program + * @param opts optional options, can be NULL + * + * @return 0 on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_prog_assoc_struct_ops(int prog_fd, int map_fd, + struct bpf_prog_assoc_struct_ops_opts *opts); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index c0e13cdf9660..b997c68bd945 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -388,7 +388,13 @@ extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak; #define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j #define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) +#if defined(__clang__) && (__clang_major__ >= 19) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#elif defined(__GNUC__) && (__GNUC__ >= 14) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#else #define ___type(...) typeof(___arrow(__VA_ARGS__)) +#endif #define ___read(read_fn, dst, src_type, src, accessor) \ read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index 6ff963a491d9..49af4260b8e6 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -4,6 +4,7 @@ #define __BPF_GEN_INTERNAL_H #include "bpf.h" +#include "libbpf_internal.h" struct ksym_relo_desc { const char *name; @@ -50,6 +51,7 @@ struct bpf_gen { __u32 nr_ksyms; int fd_array; int nr_fd_array; + int hash_insn_offset[SHA256_DWORD_SIZE]; }; void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps); diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 686824b8b413..9d160b5b9c0e 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -15,6 +15,14 @@ #define __array(name, val) typeof(val) *name[] #define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name +#ifndef likely +#define likely(x) (__builtin_expect(!!(x), 1)) +#endif + +#ifndef unlikely +#define unlikely(x) (__builtin_expect(!!(x), 0)) +#endif + /* * Helper macro to place programs, maps, license in * different sections in elf_bpf file. Section names @@ -207,6 +215,7 @@ enum libbpf_tristate { #define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull"))) #define __arg_nullable __attribute((btf_decl_tag("arg:nullable"))) #define __arg_trusted __attribute((btf_decl_tag("arg:trusted"))) +#define __arg_untrusted __attribute((btf_decl_tag("arg:untrusted"))) #define __arg_arena __attribute((btf_decl_tag("arg:arena"))) #ifndef ___bpf_concat @@ -306,6 +315,19 @@ enum libbpf_tristate { ___param, sizeof(___param)); \ }) +#define bpf_stream_printk(stream_id, fmt, args...) \ +({ \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[___bpf_narg(args)]; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ + \ + bpf_stream_vprintk(stream_id, ___fmt, ___param, sizeof(___param)); \ +}) + /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args * Otherwise use __bpf_vprintk */ diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index a8f6cd4841b0..dbe32a5d02cd 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -311,7 +311,7 @@ struct pt_regs___arm64 { #define __PT_RET_REG regs[31] #define __PT_FP_REG __unsupported__ #define __PT_RC_REG gpr[3] -#define __PT_SP_REG sp +#define __PT_SP_REG gpr[1] #define __PT_IP_REG nip #elif defined(bpf_target_sparc) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 48c66f3a9200..ceb57b46a878 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -12,6 +12,7 @@ #include <sys/utsname.h> #include <sys/param.h> #include <sys/stat.h> +#include <sys/mman.h> #include <linux/kernel.h> #include <linux/err.h> #include <linux/btf.h> @@ -22,13 +23,42 @@ #include "libbpf_internal.h" #include "hashmap.h" #include "strset.h" -#include "str_error.h" #define BTF_MAX_NR_TYPES 0x7fffffffU #define BTF_MAX_STR_OFFSET 0x7fffffffU static struct btf_type btf_void; +/* + * Describe how kinds are laid out; some have a singular element following the "struct btf_type", + * some have BTF_INFO_VLEN(t->info) elements. Specify sizes for both. Flags are currently unused. + * Kind layout can be optionally added to the BTF representation in a dedicated section to + * facilitate parsing. New kinds must be added here. + */ +static struct btf_layout layouts[NR_BTF_KINDS] = { +/* singular element size vlen element(s) size flags */ +[BTF_KIND_UNKN] = { 0, 0, 0 }, +[BTF_KIND_INT] = { sizeof(__u32), 0, 0 }, +[BTF_KIND_PTR] = { 0, 0, 0 }, +[BTF_KIND_ARRAY] = { sizeof(struct btf_array), 0, 0 }, +[BTF_KIND_STRUCT] = { 0, sizeof(struct btf_member), 0 }, +[BTF_KIND_UNION] = { 0, sizeof(struct btf_member), 0 }, +[BTF_KIND_ENUM] = { 0, sizeof(struct btf_enum), 0 }, +[BTF_KIND_FWD] = { 0, 0, 0 }, +[BTF_KIND_TYPEDEF] = { 0, 0, 0 }, +[BTF_KIND_VOLATILE] = { 0, 0, 0 }, +[BTF_KIND_CONST] = { 0, 0, 0 }, +[BTF_KIND_RESTRICT] = { 0, 0, 0 }, +[BTF_KIND_FUNC] = { 0, 0, 0 }, +[BTF_KIND_FUNC_PROTO] = { 0, sizeof(struct btf_param), 0 }, +[BTF_KIND_VAR] = { sizeof(struct btf_var), 0, 0 }, +[BTF_KIND_DATASEC] = { 0, sizeof(struct btf_var_secinfo), 0 }, +[BTF_KIND_FLOAT] = { 0, 0, 0 }, +[BTF_KIND_DECL_TAG] = { sizeof(struct btf_decl_tag), 0, 0 }, +[BTF_KIND_TYPE_TAG] = { 0, 0, 0 }, +[BTF_KIND_ENUM64] = { 0, sizeof(struct btf_enum64), 0 }, +}; + struct btf { /* raw BTF data in native endianness */ void *raw_data; @@ -40,42 +70,53 @@ struct btf { /* * When BTF is loaded from an ELF or raw memory it is stored - * in a contiguous memory block. The hdr, type_data, and, strs_data + * in a contiguous memory block. The type_data, layout and strs_data * point inside that memory region to their respective parts of BTF * representation: * - * +--------------------------------+ - * | Header | Types | Strings | - * +--------------------------------+ - * ^ ^ ^ - * | | | - * hdr | | - * types_data-+ | - * strs_data------------+ + * +----------------------------------------+---------------+ + * | Header | Types | Optional layout | Strings | + * +--------------------------------------------------------+ + * ^ ^ ^ ^ + * | | | | + * raw_data | | | + * types_data-+ | | + * layout---------------+ | + * strs_data--------------------------------+ + * + * A separate struct btf_header is embedded as btf->hdr, + * and header information is copied into it. This allows us + * to handle header data for various header formats; the original, + * the extended header with layout info, etc. * * If BTF data is later modified, e.g., due to types added or * removed, BTF deduplication performed, etc, this contiguous - * representation is broken up into three independently allocated - * memory regions to be able to modify them independently. + * representation is broken up into four independent memory + * regions. + * * raw_data is nulled out at that point, but can be later allocated * and cached again if user calls btf__raw_data(), at which point - * raw_data will contain a contiguous copy of header, types, and - * strings: + * raw_data will contain a contiguous copy of header, types, optional + * layout and strings. layout optionally points to a + * btf_layout array - this allows us to encode information about + * the kinds known at encoding time. If layout is NULL no + * layout information is encoded. * - * +----------+ +---------+ +-----------+ - * | Header | | Types | | Strings | - * +----------+ +---------+ +-----------+ - * ^ ^ ^ - * | | | - * hdr | | - * types_data----+ | - * strset__data(strs_set)-----+ + * +----------+ +---------+ +-----------+ +-----------+ + * | Header | | Types | | Layout | | Strings | + * +----------+ +---------+ +-----------+ +-----------+ + * ^ ^ ^ ^ + * | | | | + * hdr | | | + * types_data----+ | | + * layout---------------------+ | + * strset__data(strs_set)---------------------+ * - * +----------+---------+-----------+ - * | Header | Types | Strings | - * raw_data----->+----------+---------+-----------+ + * +----------+---------+-------------------+-----------+ + * | Header | Types | Optional Layout | Strings | + * raw_data----->+----------+---------+-------------------+-----------+ */ - struct btf_header *hdr; + struct btf_header hdr; void *types_data; size_t types_data_cap; /* used size stored in hdr->type_len */ @@ -92,6 +133,8 @@ struct btf { * - for split BTF counts number of types added on top of base BTF. */ __u32 nr_types; + /* the start IDs of named types in sorted BTF */ + int named_start_id; /* if not NULL, points to the base BTF on top of which the current * split BTF is based */ @@ -120,6 +163,20 @@ struct btf { /* whether base_btf should be freed in btf_free for this instance */ bool owns_base; + /* whether raw_data is a (read-only) mmap */ + bool raw_data_is_mmap; + + /* is BTF modifiable? i.e. is it split into separate sections as described above? */ + bool modifiable; + /* does BTF have header information we do not support? If so, disallow + * modification. + */ + bool has_hdr_extra; + /* Points either at raw kind layout data in parsed BTF (if present), or + * at an allocated kind layout array when BTF is modifiable. + */ + void *layout; + /* BTF object FD, if loaded into kernel */ int fd; @@ -211,7 +268,7 @@ static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off) return 0; } -static void btf_bswap_hdr(struct btf_header *h) +static void btf_bswap_hdr(struct btf_header *h, __u32 hdr_len) { h->magic = bswap_16(h->magic); h->hdr_len = bswap_32(h->hdr_len); @@ -219,66 +276,115 @@ static void btf_bswap_hdr(struct btf_header *h) h->type_len = bswap_32(h->type_len); h->str_off = bswap_32(h->str_off); h->str_len = bswap_32(h->str_len); + /* May be operating on raw data with hdr_len that does not include below fields */ + if (hdr_len >= sizeof(struct btf_header)) { + h->layout_off = bswap_32(h->layout_off); + h->layout_len = bswap_32(h->layout_len); + } } static int btf_parse_hdr(struct btf *btf) { - struct btf_header *hdr = btf->hdr; - __u32 meta_left; + struct btf_header *hdr = btf->raw_data; + __u32 hdr_len, meta_left; - if (btf->raw_size < sizeof(struct btf_header)) { + if (btf->raw_size < offsetofend(struct btf_header, str_len)) { pr_debug("BTF header not found\n"); return -EINVAL; } + hdr_len = hdr->hdr_len; + if (hdr->magic == bswap_16(BTF_MAGIC)) { btf->swapped_endian = true; - if (bswap_32(hdr->hdr_len) != sizeof(struct btf_header)) { + hdr_len = bswap_32(hdr->hdr_len); + if (hdr_len < offsetofend(struct btf_header, str_len)) { pr_warn("Can't load BTF with non-native endianness due to unsupported header length %u\n", - bswap_32(hdr->hdr_len)); + hdr_len); return -ENOTSUP; } - btf_bswap_hdr(hdr); } else if (hdr->magic != BTF_MAGIC) { pr_debug("Invalid BTF magic: %x\n", hdr->magic); return -EINVAL; } - if (btf->raw_size < hdr->hdr_len) { + if (btf->raw_size < hdr_len) { pr_debug("BTF header len %u larger than data size %u\n", - hdr->hdr_len, btf->raw_size); + hdr_len, btf->raw_size); return -EINVAL; } - meta_left = btf->raw_size - hdr->hdr_len; - if (meta_left < (long long)hdr->str_off + hdr->str_len) { + if (btf->swapped_endian) + btf_bswap_hdr(hdr, hdr_len); + + memcpy(&btf->hdr, hdr, min((size_t)hdr_len, sizeof(struct btf_header))); + + /* If unknown header data is found, modification is prohibited in + * btf_ensure_modifiable(). + */ + if (hdr_len > sizeof(struct btf_header)) { + __u8 *h = (__u8 *)hdr; + __u32 i; + + for (i = sizeof(struct btf_header); i < hdr_len; i++) { + if (!h[i]) + continue; + btf->has_hdr_extra = true; + pr_debug("Unknown BTF header data at offset %u; modification is disallowed\n", + i); + break; + } + } + + meta_left = btf->raw_size - hdr_len; + if (meta_left < (long long)btf->hdr.str_off + btf->hdr.str_len) { pr_debug("Invalid BTF total size: %u\n", btf->raw_size); return -EINVAL; } - if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) { + if ((long long)btf->hdr.type_off + btf->hdr.type_len > btf->hdr.str_off) { pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n", - hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len); + btf->hdr.type_off, btf->hdr.type_len, btf->hdr.str_off, + btf->hdr.str_len); return -EINVAL; } - if (hdr->type_off % 4) { + if (btf->hdr.type_off % 4) { pr_debug("BTF type section is not aligned to 4 bytes\n"); return -EINVAL; } + if (btf->hdr.layout_len == 0) + return 0; + + /* optional layout section sits between types and strings */ + if (btf->hdr.layout_off % 4) { + pr_debug("BTF layout section is not aligned to 4 bytes\n"); + return -EINVAL; + } + if (btf->hdr.layout_off < (long long)btf->hdr.type_off + btf->hdr.type_len) { + pr_debug("Invalid BTF data sections layout: type data at %u + %u, layout data at %u + %u\n", + btf->hdr.type_off, btf->hdr.type_len, + btf->hdr.layout_off, btf->hdr.layout_len); + return -EINVAL; + } + if ((long long)btf->hdr.layout_off + btf->hdr.layout_len > btf->hdr.str_off || + btf->hdr.layout_off > btf->hdr.str_off) { + pr_debug("Invalid BTF data sections layout: layout data at %u + %u, strings data at %u\n", + btf->hdr.layout_off, btf->hdr.layout_len, btf->hdr.str_off); + return -EINVAL; + } return 0; } static int btf_parse_str_sec(struct btf *btf) { - const struct btf_header *hdr = btf->hdr; const char *start = btf->strs_data; - const char *end = start + btf->hdr->str_len; + const char *end = start + btf->hdr.str_len; - if (btf->base_btf && hdr->str_len == 0) + if (btf->base_btf && btf->hdr.str_len == 0) return 0; - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) { + if (!btf->hdr.str_len || btf->hdr.str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) { pr_debug("Invalid BTF string section\n"); return -EINVAL; } @@ -289,7 +395,63 @@ static int btf_parse_str_sec(struct btf *btf) return 0; } -static int btf_type_size(const struct btf_type *t) +static int btf_parse_layout_sec(struct btf *btf) +{ + if (!btf->hdr.layout_len) + return 0; + + if (btf->hdr.layout_len % sizeof(struct btf_layout) != 0) { + pr_debug("Invalid BTF kind layout section\n"); + return -EINVAL; + } + btf->layout = btf->raw_data + btf->hdr.hdr_len + btf->hdr.layout_off; + + if (btf->swapped_endian) { + struct btf_layout *l, *end = btf->layout + btf->hdr.layout_len; + + for (l = btf->layout; l < end; l++) + l->flags = bswap_16(l->flags); + } + + return 0; +} + +/* for unknown kinds, consult kind layout. */ +static int btf_type_size_unknown(const struct btf *btf, const struct btf_type *t) +{ + __u32 l_cnt = btf->hdr.layout_len / sizeof(struct btf_layout); + struct btf_layout *l = btf->layout; + __u16 vlen = btf_vlen(t); + __u32 kind = btf_kind(t); + + /* Fall back to base BTF if needed as they share layout information */ + if (!l) { + struct btf *base_btf = btf->base_btf; + + if (base_btf) { + l = base_btf->layout; + l_cnt = base_btf->hdr.layout_len / sizeof(struct btf_layout); + } + } + if (!l || kind >= l_cnt) { + pr_debug("Unsupported BTF_KIND: %u\n", btf_kind(t)); + return -EINVAL; + } + if (l[kind].info_sz % 4) { + pr_debug("Unsupported info_sz %u for kind %u\n", + l[kind].info_sz, kind); + return -EINVAL; + } + if (l[kind].elem_sz % 4) { + pr_debug("Unsupported elem_sz %u for kind %u\n", + l[kind].elem_sz, kind); + return -EINVAL; + } + + return sizeof(struct btf_type) + l[kind].info_sz + vlen * l[kind].elem_sz; +} + +static int btf_type_size(const struct btf *btf, const struct btf_type *t) { const int base_size = sizeof(struct btf_type); __u16 vlen = btf_vlen(t); @@ -325,8 +487,7 @@ static int btf_type_size(const struct btf_type *t) case BTF_KIND_DECL_TAG: return base_size + sizeof(struct btf_decl_tag); default: - pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t)); - return -EINVAL; + return btf_type_size_unknown(btf, t); } } @@ -416,16 +577,15 @@ static int btf_bswap_type_rest(struct btf_type *t) static int btf_parse_type_sec(struct btf *btf) { - struct btf_header *hdr = btf->hdr; void *next_type = btf->types_data; - void *end_type = next_type + hdr->type_len; + void *end_type = next_type + btf->hdr.type_len; int err, type_size; while (next_type + sizeof(struct btf_type) <= end_type) { if (btf->swapped_endian) btf_bswap_type_base(next_type); - type_size = btf_type_size(next_type); + type_size = btf_type_size(btf, next_type); if (type_size < 0) return type_size; if (next_type + type_size > end_type) { @@ -586,8 +746,12 @@ static int btf_validate_type(const struct btf *btf, const struct btf_type *t, __ break; } default: - pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind); - return -EINVAL; + /* Kind may be represented in kind layout information. */ + if (btf_type_size_unknown(btf, t) < 0) { + pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind); + return -EINVAL; + } + break; } return 0; } @@ -894,46 +1058,105 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id) return type_id; } -__s32 btf__find_by_name(const struct btf *btf, const char *type_name) +static void btf_check_sorted(struct btf *btf) { - __u32 i, nr_types = btf__type_cnt(btf); + __u32 i, n, named_start_id = 0; - if (!strcmp(type_name, "void")) - return 0; + n = btf__type_cnt(btf); + for (i = btf->start_id + 1; i < n; i++) { + struct btf_type *ta = btf_type_by_id(btf, i - 1); + struct btf_type *tb = btf_type_by_id(btf, i); + const char *na = btf__str_by_offset(btf, ta->name_off); + const char *nb = btf__str_by_offset(btf, tb->name_off); - for (i = 1; i < nr_types; i++) { - const struct btf_type *t = btf__type_by_id(btf, i); - const char *name = btf__name_by_offset(btf, t->name_off); + if (strcmp(na, nb) > 0) + return; - if (name && !strcmp(type_name, name)) - return i; + if (named_start_id == 0 && na[0] != '\0') + named_start_id = i - 1; + if (named_start_id == 0 && nb[0] != '\0') + named_start_id = i; } - return libbpf_err(-ENOENT); + if (named_start_id) + btf->named_start_id = named_start_id; +} + +static __s32 btf_find_type_by_name_bsearch(const struct btf *btf, const char *name, + __s32 start_id) +{ + const struct btf_type *t; + const char *tname; + __s32 l, r, m; + + l = start_id; + r = btf__type_cnt(btf) - 1; + while (l <= r) { + m = l + (r - l) / 2; + t = btf_type_by_id(btf, m); + tname = btf__str_by_offset(btf, t->name_off); + if (strcmp(tname, name) >= 0) { + if (l == r) + return r; + r = m; + } else { + l = m + 1; + } + } + + return btf__type_cnt(btf); } static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id, - const char *type_name, __u32 kind) + const char *type_name, __s32 kind) { - __u32 i, nr_types = btf__type_cnt(btf); + __u32 nr_types = btf__type_cnt(btf); + const struct btf_type *t; + const char *tname; + __s32 id; - if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) - return 0; + if (start_id < btf->start_id) { + id = btf_find_by_name_kind(btf->base_btf, start_id, + type_name, kind); + if (id >= 0) + return id; + start_id = btf->start_id; + } - for (i = start_id; i < nr_types; i++) { - const struct btf_type *t = btf__type_by_id(btf, i); - const char *name; + if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0) + return 0; - if (btf_kind(t) != kind) - continue; - name = btf__name_by_offset(btf, t->name_off); - if (name && !strcmp(type_name, name)) - return i; + if (btf->named_start_id > 0 && type_name[0]) { + start_id = max(start_id, btf->named_start_id); + id = btf_find_type_by_name_bsearch(btf, type_name, start_id); + for (; id < nr_types; id++) { + t = btf__type_by_id(btf, id); + tname = btf__str_by_offset(btf, t->name_off); + if (strcmp(tname, type_name) != 0) + return libbpf_err(-ENOENT); + if (kind < 0 || btf_kind(t) == kind) + return id; + } + } else { + for (id = start_id; id < nr_types; id++) { + t = btf_type_by_id(btf, id); + if (kind > 0 && btf_kind(t) != kind) + continue; + tname = btf__str_by_offset(btf, t->name_off); + if (strcmp(tname, type_name) == 0) + return id; + } } return libbpf_err(-ENOENT); } +/* the kind value of -1 indicates that kind matching should be skipped */ +__s32 btf__find_by_name(const struct btf *btf, const char *type_name) +{ + return btf_find_by_name_kind(btf, 1, type_name, -1); +} + __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name, __u32 kind) { @@ -948,7 +1171,19 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, static bool btf_is_modifiable(const struct btf *btf) { - return (void *)btf->hdr != btf->raw_data; + /* BTF is modifiable if split into multiple sections */ + return btf->modifiable; +} + +static void btf_free_raw_data(struct btf *btf) +{ + if (btf->raw_data_is_mmap) { + munmap(btf->raw_data, btf->raw_size); + btf->raw_data_is_mmap = false; + } else { + free(btf->raw_data); + } + btf->raw_data = NULL; } void btf__free(struct btf *btf) @@ -961,16 +1196,16 @@ void btf__free(struct btf *btf) if (btf_is_modifiable(btf)) { /* if BTF was modified after loading, it will have a split - * in-memory representation for header, types, and strings + * in-memory representation for types, strings and layout * sections, so we need to free all of them individually. It * might still have a cached contiguous raw data present, * which will be unconditionally freed below. */ - free(btf->hdr); free(btf->types_data); strset__free(btf->strs_set); + free(btf->layout); } - free(btf->raw_data); + btf_free_raw_data(btf); free(btf->raw_data_swapped); free(btf->type_offs); if (btf->owns_base) @@ -978,8 +1213,11 @@ void btf__free(struct btf *btf) free(btf); } -static struct btf *btf_new_empty(struct btf *base_btf) +static struct btf *btf_new_empty(struct btf_new_opts *opts) { + bool add_layout = OPTS_GET(opts, add_layout, false); + struct btf *base_btf = OPTS_GET(opts, base_btf, NULL); + struct btf_header *hdr; struct btf *btf; btf = calloc(1, sizeof(*btf)); @@ -992,30 +1230,47 @@ static struct btf *btf_new_empty(struct btf *base_btf) btf->fd = -1; btf->ptr_sz = sizeof(void *); btf->swapped_endian = false; + btf->named_start_id = 0; if (base_btf) { btf->base_btf = base_btf; btf->start_id = btf__type_cnt(base_btf); - btf->start_str_off = base_btf->hdr->str_len; + btf->start_str_off = base_btf->hdr.str_len + base_btf->start_str_off; btf->swapped_endian = base_btf->swapped_endian; } /* +1 for empty string at offset 0 */ btf->raw_size = sizeof(struct btf_header) + (base_btf ? 0 : 1); + if (add_layout) + btf->raw_size += sizeof(layouts); btf->raw_data = calloc(1, btf->raw_size); if (!btf->raw_data) { free(btf); return ERR_PTR(-ENOMEM); } - btf->hdr = btf->raw_data; - btf->hdr->hdr_len = sizeof(struct btf_header); - btf->hdr->magic = BTF_MAGIC; - btf->hdr->version = BTF_VERSION; + hdr = btf->raw_data; + hdr->hdr_len = sizeof(struct btf_header); + hdr->magic = BTF_MAGIC; + hdr->version = BTF_VERSION; + + btf->types_data = btf->raw_data + hdr->hdr_len; + btf->strs_data = btf->raw_data + hdr->hdr_len; + hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */ + + if (add_layout) { + hdr->layout_len = sizeof(layouts); + btf->layout = layouts; + /* + * No need to swap endianness here as btf_get_raw_data() + * will do this for us if btf->swapped_endian is true. + */ + memcpy(btf->raw_data + hdr->hdr_len, layouts, sizeof(layouts)); + btf->strs_data += sizeof(layouts); + hdr->str_off += sizeof(layouts); + } - btf->types_data = btf->raw_data + btf->hdr->hdr_len; - btf->strs_data = btf->raw_data + btf->hdr->hdr_len; - btf->hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */ + memcpy(&btf->hdr, hdr, sizeof(*hdr)); return btf; } @@ -1027,10 +1282,22 @@ struct btf *btf__new_empty(void) struct btf *btf__new_empty_split(struct btf *base_btf) { - return libbpf_ptr(btf_new_empty(base_btf)); + LIBBPF_OPTS(btf_new_opts, opts); + + opts.base_btf = base_btf; + + return libbpf_ptr(btf_new_empty(&opts)); } -static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) +struct btf *btf__new_empty_opts(struct btf_new_opts *opts) +{ + if (!OPTS_VALID(opts, btf_new_opts)) + return libbpf_err_ptr(-EINVAL); + + return libbpf_ptr(btf_new_empty(opts)); +} + +static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap) { struct btf *btf; int err; @@ -1043,34 +1310,42 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) btf->start_id = 1; btf->start_str_off = 0; btf->fd = -1; + btf->named_start_id = 0; if (base_btf) { btf->base_btf = base_btf; btf->start_id = btf__type_cnt(base_btf); - btf->start_str_off = base_btf->hdr->str_len; + btf->start_str_off = base_btf->hdr.str_len + base_btf->start_str_off; } - btf->raw_data = malloc(size); - if (!btf->raw_data) { - err = -ENOMEM; - goto done; + if (is_mmap) { + btf->raw_data = (void *)data; + btf->raw_data_is_mmap = true; + } else { + btf->raw_data = malloc(size); + if (!btf->raw_data) { + err = -ENOMEM; + goto done; + } + memcpy(btf->raw_data, data, size); } - memcpy(btf->raw_data, data, size); + btf->raw_size = size; - btf->hdr = btf->raw_data; err = btf_parse_hdr(btf); if (err) goto done; - btf->strs_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->str_off; - btf->types_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->type_off; + btf->strs_data = btf->raw_data + btf->hdr.hdr_len + btf->hdr.str_off; + btf->types_data = btf->raw_data + btf->hdr.hdr_len + btf->hdr.type_off; err = btf_parse_str_sec(btf); + err = err ?: btf_parse_layout_sec(btf); err = err ?: btf_parse_type_sec(btf); err = err ?: btf_sanity_check(btf); if (err) goto done; + btf_check_sorted(btf); done: if (err) { @@ -1083,12 +1358,12 @@ done: struct btf *btf__new(const void *data, __u32 size) { - return libbpf_ptr(btf_new(data, size, NULL)); + return libbpf_ptr(btf_new(data, size, NULL, false)); } struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf) { - return libbpf_ptr(btf_new(data, size, base_btf)); + return libbpf_ptr(btf_new(data, size, base_btf, false)); } struct btf_elf_secs { @@ -1148,6 +1423,12 @@ static int btf_find_elf_sections(Elf *elf, const char *path, struct btf_elf_secs else continue; + if (sh.sh_type != SHT_PROGBITS) { + pr_warn("unexpected section type (%d) of section(%d, %s) from %s\n", + sh.sh_type, idx, name, path); + goto err; + } + data = elf_getdata(scn, 0); if (!data) { pr_warn("failed to get section(%d, %s) data from %s\n", @@ -1203,7 +1484,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, if (secs.btf_base_data) { dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size, - NULL); + NULL, false); if (IS_ERR(dist_base_btf)) { err = PTR_ERR(dist_base_btf); dist_base_btf = NULL; @@ -1212,7 +1493,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, } btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size, - dist_base_btf ?: base_btf); + dist_base_btf ?: base_btf, false); if (IS_ERR(btf)) { err = PTR_ERR(btf); goto done; @@ -1329,7 +1610,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf) } /* finally parse BTF data */ - btf = btf_new(data, sz, base_btf); + btf = btf_new(data, sz, base_btf, false); err_out: free(data); @@ -1348,6 +1629,37 @@ struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf) return libbpf_ptr(btf_parse_raw(path, base_btf)); } +static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf) +{ + struct stat st; + void *data; + struct btf *btf; + int fd, err; + + fd = open(path, O_RDONLY); + if (fd < 0) + return ERR_PTR(-errno); + + if (fstat(fd, &st) < 0) { + err = -errno; + close(fd); + return ERR_PTR(err); + } + + data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + err = -errno; + close(fd); + + if (data == MAP_FAILED) + return ERR_PTR(err); + + btf = btf_new(data, st.st_size, base_btf, true); + if (IS_ERR(btf)) + munmap(data, st.st_size); + + return btf; +} + static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext) { struct btf *btf; @@ -1480,7 +1792,7 @@ static const void *btf_strs_data(const struct btf *btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian) { - struct btf_header *hdr = btf->hdr; + const struct btf_header *hdr = &btf->hdr; struct btf_type *t; void *data, *p; __u32 data_sz; @@ -1493,14 +1805,17 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi } data_sz = hdr->hdr_len + hdr->type_len + hdr->str_len; + if (btf->layout) + data_sz += hdr->layout_len; + data = calloc(1, data_sz); if (!data) return NULL; p = data; - memcpy(p, hdr, hdr->hdr_len); + memcpy(p, hdr, min((__u32)sizeof(struct btf_header), hdr->hdr_len)); if (swap_endian) - btf_bswap_hdr(p); + btf_bswap_hdr(p, hdr->hdr_len); p += hdr->hdr_len; memcpy(p, btf->types_data, hdr->type_len); @@ -1518,8 +1833,18 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi } p += hdr->type_len; + if (btf->layout) { + memcpy(p, btf->layout, hdr->layout_len); + if (swap_endian) { + struct btf_layout *l, *end = p + hdr->layout_len; + + for (l = p; l < end ; l++) + l->flags = bswap_16(l->flags); + } + p += hdr->layout_len; + } + memcpy(p, btf_strs_data(btf), hdr->str_len); - p += hdr->str_len; *size = data_sz; return data; @@ -1554,7 +1879,7 @@ const char *btf__str_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->start_str_off) return btf__str_by_offset(btf->base_btf, offset); - else if (offset - btf->start_str_off < btf->hdr->str_len) + else if (offset - btf->start_str_off < btf->hdr.str_len) return btf_strs_data(btf) + (offset - btf->start_str_off); else return errno = EINVAL, NULL; @@ -1612,19 +1937,25 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf) goto exit_free; } - btf = btf_new(ptr, btf_info.btf_size, base_btf); + btf = btf_new(ptr, btf_info.btf_size, base_btf, false); exit_free: free(ptr); return btf; } -struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) +struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd) { struct btf *btf; int btf_fd; + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts); - btf_fd = bpf_btf_get_fd_by_id(id); + if (token_fd) { + opts.open_flags |= BPF_F_TOKEN_FD; + opts.token_fd = token_fd; + } + + btf_fd = bpf_btf_get_fd_by_id_opts(id, &opts); if (btf_fd < 0) return libbpf_err_ptr(-errno); @@ -1634,6 +1965,11 @@ struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) return libbpf_ptr(btf); } +struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) +{ + return btf_load_from_kernel(id, base_btf, 0); +} + struct btf *btf__load_from_kernel_by_id(__u32 id) { return btf__load_from_kernel_by_id_split(id, NULL); @@ -1641,23 +1977,22 @@ struct btf *btf__load_from_kernel_by_id(__u32 id) static void btf_invalidate_raw_data(struct btf *btf) { - if (btf->raw_data) { - free(btf->raw_data); - btf->raw_data = NULL; - } + if (btf->raw_data) + btf_free_raw_data(btf); if (btf->raw_data_swapped) { free(btf->raw_data_swapped); btf->raw_data_swapped = NULL; } + btf->named_start_id = 0; } /* Ensure BTF is ready to be modified (by splitting into a three memory - * regions for header, types, and strings). Also invalidate cached + * regions for types, strings and layout. Also invalidate cached * raw_data, if any. */ static int btf_ensure_modifiable(struct btf *btf) { - void *hdr, *types; + void *types, *layout = NULL; struct strset *set = NULL; int err = -ENOMEM; @@ -1667,45 +2002,58 @@ static int btf_ensure_modifiable(struct btf *btf) return 0; } - /* split raw data into three memory regions */ - hdr = malloc(btf->hdr->hdr_len); - types = malloc(btf->hdr->type_len); - if (!hdr || !types) + if (btf->has_hdr_extra) { + /* Additional BTF header data was found; not safe to modify. */ + return -EOPNOTSUPP; + } + + /* split raw data into memory regions; btf->hdr is done already. */ + types = malloc(btf->hdr.type_len); + if (!types) goto err_out; + memcpy(types, btf->types_data, btf->hdr.type_len); - memcpy(hdr, btf->hdr, btf->hdr->hdr_len); - memcpy(types, btf->types_data, btf->hdr->type_len); + if (btf->hdr.layout_len) { + layout = malloc(btf->hdr.layout_len); + if (!layout) + goto err_out; + memcpy(layout, btf->raw_data + btf->hdr.hdr_len + btf->hdr.layout_off, + btf->hdr.layout_len); + } /* build lookup index for all strings */ - set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr->str_len); + set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr.str_len); if (IS_ERR(set)) { err = PTR_ERR(set); goto err_out; } /* only when everything was successful, update internal state */ - btf->hdr = hdr; btf->types_data = types; - btf->types_data_cap = btf->hdr->type_len; + btf->types_data_cap = btf->hdr.type_len; btf->strs_data = NULL; btf->strs_set = set; + if (layout) + btf->layout = layout; /* if BTF was created from scratch, all strings are guaranteed to be * unique and deduplicated */ - if (btf->hdr->str_len == 0) + if (btf->hdr.str_len == 0) btf->strs_deduped = true; - if (!btf->base_btf && btf->hdr->str_len == 1) + if (!btf->base_btf && btf->hdr.str_len == 1) btf->strs_deduped = true; /* invalidate raw_data representation */ btf_invalidate_raw_data(btf); + btf->modifiable = true; + return 0; err_out: strset__free(set); - free(hdr); free(types); + free(layout); return err; } @@ -1718,6 +2066,7 @@ err_out: int btf__find_str(struct btf *btf, const char *s) { int off; + int err; if (btf->base_btf) { off = btf__find_str(btf->base_btf, s); @@ -1726,8 +2075,9 @@ int btf__find_str(struct btf *btf, const char *s) } /* BTF needs to be in a modifiable state to build string lookup index */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); off = strset__find_str(btf->strs_set, s); if (off < 0) @@ -1744,6 +2094,7 @@ int btf__find_str(struct btf *btf, const char *s) int btf__add_str(struct btf *btf, const char *s) { int off; + int err; if (btf->base_btf) { off = btf__find_str(btf->base_btf, s); @@ -1751,14 +2102,15 @@ int btf__add_str(struct btf *btf, const char *s) return off; } - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); off = strset__add_str(btf->strs_set, s); if (off < 0) return libbpf_err(off); - btf->hdr->str_len = strset__data_size(btf->strs_set); + btf->hdr.str_len = strset__data_size(btf->strs_set); return btf->start_str_off + off; } @@ -1766,7 +2118,7 @@ int btf__add_str(struct btf *btf, const char *s) static void *btf_add_type_mem(struct btf *btf, size_t add_sz) { return libbpf_add_mem(&btf->types_data, &btf->types_data_cap, 1, - btf->hdr->type_len, UINT_MAX, add_sz); + btf->hdr.type_len, UINT_MAX, add_sz); } static void btf_type_inc_vlen(struct btf_type *t) @@ -1774,16 +2126,31 @@ static void btf_type_inc_vlen(struct btf_type *t) t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t)); } +static void btf_hdr_update_type_len(struct btf *btf, int new_len) +{ + btf->hdr.type_len = new_len; + if (btf->layout) { + btf->hdr.layout_off = btf->hdr.type_off + new_len; + btf->hdr.str_off = btf->hdr.layout_off + btf->hdr.layout_len; + } else { + btf->hdr.str_off = btf->hdr.type_off + new_len; + } +} + +static void btf_hdr_update_str_len(struct btf *btf, int new_len) +{ + btf->hdr.str_len = new_len; +} + static int btf_commit_type(struct btf *btf, int data_sz) { int err; - err = btf_add_type_idx_entry(btf, btf->hdr->type_len); + err = btf_add_type_idx_entry(btf, btf->hdr.type_len); if (err) return libbpf_err(err); - btf->hdr->type_len += data_sz; - btf->hdr->str_off += data_sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + data_sz); btf->nr_types++; return btf->start_id + btf->nr_types - 1; } @@ -1832,13 +2199,14 @@ static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type) __u32 *str_off; int sz, err; - sz = btf_type_size(src_type); + sz = btf_type_size(p->src, src_type); if (sz < 0) return libbpf_err(sz); /* deconstruct BTF, if necessary, and invalidate raw_data */ - if (btf_ensure_modifiable(p->dst)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(p->dst); + if (err) + return libbpf_err(err); t = btf_add_type_mem(p->dst, sz); if (!t) @@ -1873,24 +2241,31 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) { struct btf_pipe p = { .src = src_btf, .dst = btf }; int data_sz, sz, cnt, i, err, old_strs_len; + __u32 src_start_id; __u32 *off; void *t; - /* appending split BTF isn't supported yet */ - if (src_btf->base_btf) - return libbpf_err(-ENOTSUP); + /* + * When appending split BTF, the destination must share the same base + * BTF so that base type ID references remain valid. + */ + if (src_btf->base_btf && src_btf->base_btf != btf->base_btf) + return libbpf_err(-EOPNOTSUPP); + + src_start_id = src_btf->base_btf ? btf__type_cnt(src_btf->base_btf) : 1; /* deconstruct BTF, if necessary, and invalidate raw_data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); /* remember original strings section size if we have to roll back * partial strings section changes */ - old_strs_len = btf->hdr->str_len; + old_strs_len = btf->hdr.str_len; - data_sz = src_btf->hdr->type_len; - cnt = btf__type_cnt(src_btf) - 1; + data_sz = src_btf->hdr.type_len; + cnt = src_btf->nr_types; /* pre-allocate enough memory for new types */ t = btf_add_type_mem(btf, data_sz); @@ -1914,7 +2289,7 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) struct btf_field_iter it; __u32 *type_id, *str_off; - sz = btf_type_size(t); + sz = btf_type_size(src_btf, t); if (sz < 0) { /* unlikely, has to be corrupted src_btf */ err = sz; @@ -1929,6 +2304,9 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) if (err) goto err_out; while ((str_off = btf_field_iter_next(&it))) { + /* don't remap strings from shared base BTF */ + if (*str_off < src_btf->start_str_off) + continue; err = btf_rewrite_str(&p, str_off); if (err) goto err_out; @@ -1943,11 +2321,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) if (!*type_id) /* nothing to do for VOID references */ continue; - /* we haven't updated btf's type count yet, so - * btf->start_id + btf->nr_types - 1 is the type ID offset we should - * add to all newly added BTF types - */ - *type_id += btf->start_id + btf->nr_types - 1; + /* don't remap types from shared base BTF */ + if (*type_id < src_start_id) + continue; + + *type_id += btf->start_id + btf->nr_types - src_start_id; } /* go to next type data and type offset index entry */ @@ -1963,8 +2341,7 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf) * update type count and various internal offsets and sizes to * "commit" the changes and made them visible to the outside world. */ - btf->hdr->type_len += data_sz; - btf->hdr->str_off += data_sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + data_sz); btf->nr_types += cnt; hashmap__free(p.str_off_map); @@ -1975,13 +2352,14 @@ err_out: /* zero out preallocated memory as if it was just allocated with * libbpf_add_mem() */ - memset(btf->types_data + btf->hdr->type_len, 0, data_sz); - memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len); + memset(btf->types_data + btf->hdr.type_len, 0, data_sz); + if (btf->strs_data) + memset(btf->strs_data + old_strs_len, 0, btf->hdr.str_len - old_strs_len); /* and now restore original strings section size; types data size * wasn't modified, so doesn't need restoring, see big comment above */ - btf->hdr->str_len = old_strs_len; + btf_hdr_update_str_len(btf, old_strs_len); hashmap__free(p.str_off_map); @@ -2001,9 +2379,10 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding { struct btf_type *t; int sz, name_off; + int err; /* non-empty name */ - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); /* byte_sz must be power of 2 */ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16) @@ -2012,8 +2391,9 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding return libbpf_err(-EINVAL); /* deconstruct BTF, if necessary, and invalidate raw_data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type) + sizeof(int); t = btf_add_type_mem(btf, sz); @@ -2049,9 +2429,10 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz) { struct btf_type *t; int sz, name_off; + int err; /* non-empty name */ - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); /* byte_sz must be one of the explicitly allowed values */ @@ -2059,8 +2440,9 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz) byte_sz != 16) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); @@ -2090,30 +2472,32 @@ static int validate_type_id(int id) } /* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */ -static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id) +static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id, int kflag) { struct btf_type *t; int sz, name_off = 0; + int err; if (validate_type_id(ref_type_id)) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); if (!t) return libbpf_err(-ENOMEM); - if (name && name[0]) { + if (!str_is_empty(name)) { name_off = btf__add_str(btf, name); if (name_off < 0) return name_off; } t->name_off = name_off; - t->info = btf_type_info(kind, 0, 0); + t->info = btf_type_info(kind, 0, kflag); t->type = ref_type_id; return btf_commit_type(btf, sz); @@ -2128,7 +2512,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref */ int btf__add_ptr(struct btf *btf, int ref_type_id) { - return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id, 0); } /* @@ -2144,13 +2528,15 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n { struct btf_type *t; struct btf_array *a; + int err; int sz; if (validate_type_id(index_type_id) || validate_type_id(elem_type_id)) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type) + sizeof(struct btf_array); t = btf_add_type_mem(btf, sz); @@ -2174,16 +2560,18 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32 { struct btf_type *t; int sz, name_off = 0; + int err; - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); if (!t) return libbpf_err(-ENOMEM); - if (name && name[0]) { + if (!str_is_empty(name)) { name_off = btf__add_str(btf, name); if (name_off < 0) return name_off; @@ -2256,6 +2644,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, struct btf_member *m; bool is_bitfield; int sz, name_off = 0; + int err; /* last type should be union/struct */ if (btf->nr_types == 0) @@ -2276,15 +2665,16 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, return libbpf_err(-EINVAL); /* decompose and invalidate raw data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_member); m = btf_add_type_mem(btf, sz); if (!m) return libbpf_err(-ENOMEM); - if (name && name[0]) { + if (!str_is_empty(name)) { name_off = btf__add_str(btf, name); if (name_off < 0) return name_off; @@ -2299,8 +2689,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id, /* update parent type's vlen and kflag */ t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t)); - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + sz); return 0; } @@ -2309,20 +2698,22 @@ static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz, { struct btf_type *t; int sz, name_off = 0; + int err; /* byte_sz must be power of 2 */ if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); if (!t) return libbpf_err(-ENOMEM); - if (name && name[0]) { + if (!str_is_empty(name)) { name_off = btf__add_str(btf, name); if (name_off < 0) return name_off; @@ -2371,6 +2762,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) struct btf_type *t; struct btf_enum *v; int sz, name_off; + int err; /* last type should be BTF_KIND_ENUM */ if (btf->nr_types == 0) @@ -2380,14 +2772,15 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) return libbpf_err(-EINVAL); /* non-empty name */ - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); if (value < INT_MIN || value > UINT_MAX) return libbpf_err(-E2BIG); /* decompose and invalidate raw data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_enum); v = btf_add_type_mem(btf, sz); @@ -2409,8 +2802,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value) if (value < 0) t->info = btf_type_info(btf_kind(t), btf_vlen(t), true); - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + sz); return 0; } @@ -2448,6 +2840,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) struct btf_enum64 *v; struct btf_type *t; int sz, name_off; + int err; /* last type should be BTF_KIND_ENUM64 */ if (btf->nr_types == 0) @@ -2457,12 +2850,13 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) return libbpf_err(-EINVAL); /* non-empty name */ - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); /* decompose and invalidate raw data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_enum64); v = btf_add_type_mem(btf, sz); @@ -2481,8 +2875,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) t = btf_last_type(btf); btf_type_inc_vlen(t); - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + sz); return 0; } @@ -2497,7 +2890,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value) */ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind) { - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); switch (fwd_kind) { @@ -2506,7 +2899,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind) struct btf_type *t; int id; - id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0); + id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0, 0); if (id <= 0) return id; t = btf_type_by_id(btf, id); @@ -2533,10 +2926,10 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind) */ int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id) { - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); - return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id, 0); } /* @@ -2548,7 +2941,7 @@ int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id) */ int btf__add_volatile(struct btf *btf, int ref_type_id) { - return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id, 0); } /* @@ -2560,7 +2953,7 @@ int btf__add_volatile(struct btf *btf, int ref_type_id) */ int btf__add_const(struct btf *btf, int ref_type_id) { - return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id, 0); } /* @@ -2572,7 +2965,7 @@ int btf__add_const(struct btf *btf, int ref_type_id) */ int btf__add_restrict(struct btf *btf, int ref_type_id) { - return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id, 0); } /* @@ -2585,10 +2978,27 @@ int btf__add_restrict(struct btf *btf, int ref_type_id) */ int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id) { - if (!value || !value[0]) + if (str_is_empty(value)) return libbpf_err(-EINVAL); - return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id); + return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 0); +} + +/* + * Append new BTF_KIND_TYPE_TAG type with: + * - *value*, non-empty/non-NULL tag value; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * Set info->kflag to 1, indicating this tag is an __attribute__ + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id) +{ + if (str_is_empty(value)) + return libbpf_err(-EINVAL); + + return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 1); } /* @@ -2604,13 +3014,13 @@ int btf__add_func(struct btf *btf, const char *name, { int id; - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL && linkage != BTF_FUNC_EXTERN) return libbpf_err(-EINVAL); - id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id); + id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id, 0); if (id > 0) { struct btf_type *t = btf_type_by_id(btf, id); @@ -2634,13 +3044,15 @@ int btf__add_func(struct btf *btf, const char *name, int btf__add_func_proto(struct btf *btf, int ret_type_id) { struct btf_type *t; + int err; int sz; if (validate_type_id(ret_type_id)) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); @@ -2670,6 +3082,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id) struct btf_type *t; struct btf_param *p; int sz, name_off = 0; + int err; if (validate_type_id(type_id)) return libbpf_err(-EINVAL); @@ -2682,15 +3095,16 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id) return libbpf_err(-EINVAL); /* decompose and invalidate raw data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_param); p = btf_add_type_mem(btf, sz); if (!p) return libbpf_err(-ENOMEM); - if (name && name[0]) { + if (!str_is_empty(name)) { name_off = btf__add_str(btf, name); if (name_off < 0) return name_off; @@ -2703,8 +3117,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id) t = btf_last_type(btf); btf_type_inc_vlen(t); - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + sz); return 0; } @@ -2723,9 +3136,10 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) struct btf_type *t; struct btf_var *v; int sz, name_off; + int err; /* non-empty name */ - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED && linkage != BTF_VAR_GLOBAL_EXTERN) @@ -2734,8 +3148,9 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id) return libbpf_err(-EINVAL); /* deconstruct BTF, if necessary, and invalidate raw_data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type) + sizeof(struct btf_var); t = btf_add_type_mem(btf, sz); @@ -2772,13 +3187,15 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz) { struct btf_type *t; int sz, name_off; + int err; /* non-empty name */ - if (!name || !name[0]) + if (str_is_empty(name)) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type); t = btf_add_type_mem(btf, sz); @@ -2811,6 +3228,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ { struct btf_type *t; struct btf_var_secinfo *v; + int err; int sz; /* last type should be BTF_KIND_DATASEC */ @@ -2824,8 +3242,9 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ return libbpf_err(-EINVAL); /* decompose and invalidate raw data */ - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_var_secinfo); v = btf_add_type_mem(btf, sz); @@ -2840,35 +3259,26 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __ t = btf_last_type(btf); btf_type_inc_vlen(t); - btf->hdr->type_len += sz; - btf->hdr->str_off += sz; + btf_hdr_update_type_len(btf, btf->hdr.type_len + sz); return 0; } -/* - * Append new BTF_KIND_DECL_TAG type with: - * - *value* - non-empty/non-NULL string; - * - *ref_type_id* - referenced type ID, it might not exist yet; - * - *component_idx* - -1 for tagging reference type, otherwise struct/union - * member or function argument index; - * Returns: - * - >0, type ID of newly added BTF type; - * - <0, on error. - */ -int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, - int component_idx) +static int btf_add_decl_tag(struct btf *btf, const char *value, int ref_type_id, + int component_idx, int kflag) { struct btf_type *t; int sz, value_off; + int err; - if (!value || !value[0] || component_idx < -1) + if (str_is_empty(value) || component_idx < -1) return libbpf_err(-EINVAL); if (validate_type_id(ref_type_id)) return libbpf_err(-EINVAL); - if (btf_ensure_modifiable(btf)) - return libbpf_err(-ENOMEM); + err = btf_ensure_modifiable(btf); + if (err) + return libbpf_err(err); sz = sizeof(struct btf_type) + sizeof(struct btf_decl_tag); t = btf_add_type_mem(btf, sz); @@ -2880,13 +3290,46 @@ int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, return value_off; t->name_off = value_off; - t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false); + t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, kflag); t->type = ref_type_id; btf_decl_tag(t)->component_idx = component_idx; return btf_commit_type(btf, sz); } +/* + * Append new BTF_KIND_DECL_TAG type with: + * - *value* - non-empty/non-NULL string; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * - *component_idx* - -1 for tagging reference type, otherwise struct/union + * member or function argument index; + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, + int component_idx) +{ + return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 0); +} + +/* + * Append new BTF_KIND_DECL_TAG type with: + * - *value* - non-empty/non-NULL string; + * - *ref_type_id* - referenced type ID, it might not exist yet; + * - *component_idx* - -1 for tagging reference type, otherwise struct/union + * member or function argument index; + * Set info->kflag to 1, indicating this tag is an __attribute__ + * Returns: + * - >0, type ID of newly added BTF type; + * - <0, on error. + */ +int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id, + int component_idx) +{ + return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 1); +} + struct btf_ext_sec_info_param { __u32 off; __u32 len; @@ -3015,8 +3458,6 @@ static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native) .desc = "line_info", }; struct btf_ext_sec_info_param core_relo = { - .off = btf_ext->hdr->core_relo_off, - .len = btf_ext->hdr->core_relo_len, .min_rec_size = sizeof(struct bpf_core_relo), .ext_info = &btf_ext->core_relo_info, .desc = "core_relo", @@ -3034,6 +3475,8 @@ static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native) if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) return 0; /* skip core relos parsing */ + core_relo.off = btf_ext->hdr->core_relo_off; + core_relo.len = btf_ext->hdr->core_relo_len; err = btf_ext_parse_sec_info(btf_ext, &core_relo, is_native); if (err) return err; @@ -3459,10 +3902,9 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) return libbpf_err(-EINVAL); } - if (btf_ensure_modifiable(btf)) { - err = -ENOMEM; + err = btf_ensure_modifiable(btf); + if (err) goto done; - } err = btf_dedup_prep(d); if (err) { @@ -3782,7 +4224,7 @@ static int btf_dedup_strings(struct btf_dedup *d) /* replace BTF string data and hash with deduped ones */ strset__free(d->btf->strs_set); - d->btf->hdr->str_len = strset__data_size(d->strs_set); + btf_hdr_update_str_len(d->btf, strset__data_size(d->strs_set)); d->btf->strs_set = d->strs_set; d->strs_set = NULL; d->btf->strs_deduped = true; @@ -3795,6 +4237,20 @@ err_out: return err; } +/* + * Calculate type signature hash of TYPEDEF, ignoring referenced type IDs, + * as referenced type IDs equivalence is established separately during type + * graph equivalence check algorithm. + */ +static long btf_hash_typedef(struct btf_type *t) +{ + long h; + + h = hash_combine(0, t->name_off); + h = hash_combine(h, t->info); + return h; +} + static long btf_hash_common(struct btf_type *t) { long h; @@ -3812,6 +4268,13 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) t1->size == t2->size; } +/* Check structural compatibility of two TYPEDEF. */ +static bool btf_equal_typedef(struct btf_type *t1, struct btf_type *t2) +{ + return t1->name_off == t2->name_off && + t1->info == t2->info; +} + /* Calculate type signature hash of INT or TAG. */ static long btf_hash_int_decl_tag(struct btf_type *t) { @@ -4299,46 +4762,120 @@ static inline __u16 btf_fwd_kind(struct btf_type *t) return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT; } -/* Check if given two types are identical ARRAY definitions */ -static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) +static bool btf_dedup_identical_types(struct btf_dedup *d, __u32 id1, __u32 id2, int depth) { struct btf_type *t1, *t2; - + int k1, k2; +recur: t1 = btf_type_by_id(d->btf, id1); t2 = btf_type_by_id(d->btf, id2); - if (!btf_is_array(t1) || !btf_is_array(t2)) + if (depth <= 0) { + pr_debug("Reached depth limit for identical type comparison for '%s'/'%s'\n", + btf__name_by_offset(d->btf, t1->name_off), + btf__name_by_offset(d->btf, t2->name_off)); return false; + } - return btf_equal_array(t1, t2); -} + k1 = btf_kind(t1); + k2 = btf_kind(t2); + if (k1 != k2) + return false; -/* Check if given two types are identical STRUCT/UNION definitions */ -static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2) -{ - const struct btf_member *m1, *m2; - struct btf_type *t1, *t2; - int n, i; + switch (k1) { + case BTF_KIND_UNKN: /* VOID */ + return true; + case BTF_KIND_INT: + return btf_equal_int_tag(t1, t2); + case BTF_KIND_ENUM: + case BTF_KIND_ENUM64: + return btf_compat_enum(t1, t2); + case BTF_KIND_FWD: + case BTF_KIND_FLOAT: + return btf_equal_common(t1, t2); + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + case BTF_KIND_TYPE_TAG: + if (t1->info != t2->info || t1->name_off != t2->name_off) + return false; + id1 = t1->type; + id2 = t2->type; + goto recur; + case BTF_KIND_ARRAY: { + struct btf_array *a1, *a2; - t1 = btf_type_by_id(d->btf, id1); - t2 = btf_type_by_id(d->btf, id2); + if (!btf_compat_array(t1, t2)) + return false; - if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2)) - return false; + a1 = btf_array(t1); + a2 = btf_array(t1); - if (!btf_shallow_equal_struct(t1, t2)) - return false; + if (a1->index_type != a2->index_type && + !btf_dedup_identical_types(d, a1->index_type, a2->index_type, depth - 1)) + return false; - m1 = btf_members(t1); - m2 = btf_members(t2); - for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { - if (m1->type != m2->type && - !btf_dedup_identical_arrays(d, m1->type, m2->type) && - !btf_dedup_identical_structs(d, m1->type, m2->type)) + if (a1->type != a2->type && + !btf_dedup_identical_types(d, a1->type, a2->type, depth - 1)) return false; + + return true; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m1, *m2; + int i, n; + + if (!btf_shallow_equal_struct(t1, t2)) + return false; + + m1 = btf_members(t1); + m2 = btf_members(t2); + for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { + if (m1->type == m2->type) + continue; + if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1)) { + if (t1->name_off) { + pr_debug("%s '%s' size=%d vlen=%d id1[%u] id2[%u] shallow-equal but not identical for field#%d '%s'\n", + k1 == BTF_KIND_STRUCT ? "STRUCT" : "UNION", + btf__name_by_offset(d->btf, t1->name_off), + t1->size, btf_vlen(t1), id1, id2, i, + btf__name_by_offset(d->btf, m1->name_off)); + } + return false; + } + } + return true; + } + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *p1, *p2; + int i, n; + + if (!btf_compat_fnproto(t1, t2)) + return false; + + if (t1->type != t2->type && + !btf_dedup_identical_types(d, t1->type, t2->type, depth - 1)) + return false; + + p1 = btf_params(t1); + p2 = btf_params(t2); + for (i = 0, n = btf_vlen(t1); i < n; i++, p1++, p2++) { + if (p1->type == p2->type) + continue; + if (!btf_dedup_identical_types(d, p1->type, p2->type, depth - 1)) + return false; + } + return true; + } + default: + return false; } - return true; } + /* * Check equivalence of BTF type graph formed by candidate struct/union (we'll * call it "candidate graph" in this description for brevity) to a type graph @@ -4457,19 +4994,13 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, * different fields within the *same* struct. This breaks type * equivalence check, which makes an assumption that candidate * types sub-graph has a consistent and deduped-by-compiler - * types within a single CU. So work around that by explicitly - * allowing identical array types here. + * types within a single CU. And similar situation can happen + * with struct/union sometimes, and event with pointers. + * So accommodate cases like this doing a structural + * comparison recursively, but avoiding being stuck in endless + * loops by limiting the depth up to which we check. */ - if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id)) - return 1; - /* It turns out that similar situation can happen with - * struct/union sometimes, sigh... Handle the case where - * structs/unions are exactly the same, down to the referenced - * type IDs. Anything more complicated (e.g., if referenced - * types are different, but equivalent) is *way more* - * complicated and requires a many-to-many equivalence mapping. - */ - if (btf_dedup_identical_structs(d, hypot_type_id, cand_id)) + if (btf_dedup_identical_types(d, hypot_type_id, cand_id, 16)) return 1; return 0; } @@ -4555,8 +5086,16 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, canon_m = btf_members(canon_type); for (i = 0; i < vlen; i++) { eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type); - if (eq <= 0) + if (eq <= 0) { + if (cand_type->name_off) { + pr_debug("%s '%s' size=%d vlen=%d cand_id[%u] canon_id[%u] shallow-equal but not equiv for field#%d '%s': %d\n", + cand_kind == BTF_KIND_STRUCT ? "STRUCT" : "UNION", + btf__name_by_offset(d->btf, cand_type->name_off), + cand_type->size, vlen, cand_id, canon_id, i, + btf__name_by_offset(d->btf, cand_m->name_off), eq); + } return eq; + } cand_m++; canon_m++; } @@ -4681,13 +5220,30 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d) } } +static inline long btf_hash_by_kind(struct btf_type *t, __u16 kind) +{ + if (kind == BTF_KIND_TYPEDEF) + return btf_hash_typedef(t); + else + return btf_hash_struct(t); +} + +static inline bool btf_equal_by_kind(struct btf_type *t1, struct btf_type *t2, __u16 kind) +{ + if (kind == BTF_KIND_TYPEDEF) + return btf_equal_typedef(t1, t2); + else + return btf_shallow_equal_struct(t1, t2); +} + /* - * Deduplicate struct/union types. + * Deduplicate struct/union and typedef types. * * For each struct/union type its type signature hash is calculated, taking * into account type's name, size, number, order and names of fields, but * ignoring type ID's referenced from fields, because they might not be deduped - * completely until after reference types deduplication phase. This type hash + * completely until after reference types deduplication phase. For each typedef + * type, the hash is computed based on the type’s name and size. This type hash * is used to iterate over all potential canonical types, sharing same hash. * For each canonical candidate we check whether type graphs that they form * (through referenced types in fields and so on) are equivalent using algorithm @@ -4719,18 +5275,20 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) t = btf_type_by_id(d->btf, type_id); kind = btf_kind(t); - if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) + if (kind != BTF_KIND_STRUCT && + kind != BTF_KIND_UNION && + kind != BTF_KIND_TYPEDEF) return 0; - h = btf_hash_struct(t); + h = btf_hash_by_kind(t, kind); for_each_dedup_cand(d, hash_entry, h) { __u32 cand_id = hash_entry->value; int eq; /* * Even though btf_dedup_is_equiv() checks for - * btf_shallow_equal_struct() internally when checking two - * structs (unions) for equivalence, we need to guard here + * btf_equal_by_kind() internally when checking two + * structs (unions) or typedefs for equivalence, we need to guard here * from picking matching FWD type as a dedup candidate. * This can happen due to hash collision. In such case just * relying on btf_dedup_is_equiv() would lead to potentially @@ -4738,7 +5296,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) * FWD and compatible STRUCT/UNION are considered equivalent. */ cand_type = btf_type_by_id(d->btf, cand_id); - if (!btf_shallow_equal_struct(t, cand_type)) + if (!btf_equal_by_kind(t, cand_type, kind)) continue; btf_dedup_clear_hypot_map(d); @@ -4776,18 +5334,18 @@ static int btf_dedup_struct_types(struct btf_dedup *d) /* * Deduplicate reference type. * - * Once all primitive and struct/union types got deduplicated, we can easily + * Once all primitive, struct/union and typedef types got deduplicated, we can easily * deduplicate all other (reference) BTF types. This is done in two steps: * * 1. Resolve all referenced type IDs into their canonical type IDs. This - * resolution can be done either immediately for primitive or struct/union types - * (because they were deduped in previous two phases) or recursively for + * resolution can be done either immediately for primitive, struct/union, and typedef + * types (because they were deduped in previous two phases) or recursively for * reference types. Recursion will always terminate at either primitive or - * struct/union type, at which point we can "unwind" chain of reference types - * one by one. There is no danger of encountering cycles because in C type - * system the only way to form type cycle is through struct/union, so any chain - * of reference types, even those taking part in a type cycle, will inevitably - * reach struct/union at some point. + * struct/union and typedef types, at which point we can "unwind" chain of reference + * types one by one. There is no danger of encountering cycles in C, as the only way to + * form a type cycle is through struct or union types. Go can form such cycles through + * typedef. Thus, any chain of reference types, even those taking part in a type cycle, + * will inevitably reach a struct/union or typedef type at some point. * * 2. Once all referenced type IDs are resolved into canonical ones, BTF type * becomes "stable", in the sense that no further deduplication will cause @@ -4819,7 +5377,6 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) case BTF_KIND_VOLATILE: case BTF_KIND_RESTRICT: case BTF_KIND_PTR: - case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: case BTF_KIND_TYPE_TAG: ref_type_id = btf_dedup_ref_type(d, t->type); @@ -5100,7 +5657,7 @@ static int btf_dedup_compact_types(struct btf_dedup *d) continue; t = btf__type_by_id(d->btf, id); - len = btf_type_size(t); + len = btf_type_size(d->btf, t); if (len < 0) return len; @@ -5114,14 +5671,17 @@ static int btf_dedup_compact_types(struct btf_dedup *d) /* shrink struct btf's internal types index and update btf_header */ d->btf->nr_types = next_type_id - d->btf->start_id; d->btf->type_offs_cap = d->btf->nr_types; - d->btf->hdr->type_len = p - d->btf->types_data; + d->btf->hdr.type_len = p - d->btf->types_data; new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap, sizeof(*new_offs)); if (d->btf->type_offs_cap && !new_offs) return -ENOMEM; d->btf->type_offs = new_offs; - d->btf->hdr->str_off = d->btf->hdr->type_len; - d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len; + if (d->btf->layout) + d->btf->hdr.layout_off = d->btf->hdr.type_off + d->btf->hdr.type_len; + d->btf->hdr.str_off = d->btf->hdr.type_off + d->btf->hdr.type_len + d->btf->hdr.layout_len; + d->btf->raw_size = d->btf->hdr.hdr_len + d->btf->hdr.type_off + d->btf->hdr.type_len + + d->btf->hdr.layout_len + d->btf->hdr.str_len; return 0; } @@ -5217,7 +5777,10 @@ struct btf *btf__load_vmlinux_btf(void) pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n", sysfs_btf_path); } else { - btf = btf__parse(sysfs_btf_path, NULL); + btf = btf_parse_raw_mmap(sysfs_btf_path, NULL); + if (IS_ERR(btf)) + btf = btf__parse(sysfs_btf_path, NULL); + if (!btf) { err = -errno; pr_warn("failed to read kernel BTF from '%s': %s\n", @@ -5576,7 +6139,7 @@ int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf, goto done; } dist.split_start_id = btf__type_cnt(old_base); - dist.split_start_str = old_base->hdr->str_len; + dist.split_start_str = old_base->hdr.str_len; /* Pass over src split BTF; generate the list of base BTF type ids it * references; these will constitute our distilled BTF set to be @@ -5645,14 +6208,14 @@ done: const struct btf_header *btf_header(const struct btf *btf) { - return btf->hdr; + return &btf->hdr; } void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) { btf->base_btf = (struct btf *)base_btf; btf->start_id = btf__type_cnt(base_btf); - btf->start_str_off = base_btf->hdr->str_len; + btf->start_str_off = base_btf->hdr.str_len + base_btf->start_str_off; } int btf__relocate(struct btf *btf, const struct btf *base_btf) @@ -5663,3 +6226,135 @@ int btf__relocate(struct btf *btf, const struct btf *base_btf) btf->owns_base = false; return libbpf_err(err); } + +struct btf_permute { + struct btf *btf; + __u32 *id_map; + __u32 start_offs; +}; + +/* Callback function to remap individual type ID references */ +static int btf_permute_remap_type_id(__u32 *type_id, void *ctx) +{ + struct btf_permute *p = ctx; + __u32 new_id = *type_id; + + /* refer to the base BTF or VOID type */ + if (new_id < p->btf->start_id) + return 0; + + if (new_id >= btf__type_cnt(p->btf)) + return -EINVAL; + + *type_id = p->id_map[new_id - p->btf->start_id + p->start_offs]; + return 0; +} + +int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt, + const struct btf_permute_opts *opts) +{ + struct btf_permute p; + struct btf_ext *btf_ext; + void *nt, *new_types = NULL; + __u32 *order_map = NULL; + int err = 0, i; + __u32 n, id, start_offs = 0; + + if (!OPTS_VALID(opts, btf_permute_opts)) + return libbpf_err(-EINVAL); + + if (btf__base_btf(btf)) { + n = btf->nr_types; + } else { + if (id_map[0] != 0) + return libbpf_err(-EINVAL); + n = btf__type_cnt(btf); + start_offs = 1; + } + + if (id_map_cnt != n) + return libbpf_err(-EINVAL); + + /* record the sequence of types */ + order_map = calloc(id_map_cnt, sizeof(*id_map)); + if (!order_map) { + err = -ENOMEM; + goto done; + } + + new_types = calloc(btf->hdr.type_len, 1); + if (!new_types) { + err = -ENOMEM; + goto done; + } + + err = btf_ensure_modifiable(btf); + if (err) + goto done; + + for (i = start_offs; i < id_map_cnt; i++) { + id = id_map[i]; + if (id < btf->start_id || id >= btf__type_cnt(btf)) { + err = -EINVAL; + goto done; + } + id -= btf->start_id - start_offs; + /* cannot be mapped to the same ID */ + if (order_map[id]) { + err = -EINVAL; + goto done; + } + order_map[id] = i + btf->start_id - start_offs; + } + + p.btf = btf; + p.id_map = id_map; + p.start_offs = start_offs; + nt = new_types; + for (i = start_offs; i < id_map_cnt; i++) { + struct btf_field_iter it; + const struct btf_type *t; + __u32 *type_id; + int type_size; + + id = order_map[i]; + t = btf__type_by_id(btf, id); + type_size = btf_type_size(btf, t); + memcpy(nt, t, type_size); + + /* fix up referenced IDs for BTF */ + err = btf_field_iter_init(&it, nt, BTF_FIELD_ITER_IDS); + if (err) + goto done; + while ((type_id = btf_field_iter_next(&it))) { + err = btf_permute_remap_type_id(type_id, &p); + if (err) + goto done; + } + + nt += type_size; + } + + /* fix up referenced IDs for btf_ext */ + btf_ext = OPTS_GET(opts, btf_ext, NULL); + if (btf_ext) { + err = btf_ext_visit_type_ids(btf_ext, btf_permute_remap_type_id, &p); + if (err) + goto done; + } + + for (nt = new_types, i = 0; i < id_map_cnt - start_offs; i++) { + btf->type_offs[i] = nt - new_types; + nt += btf_type_size(btf, nt); + } + + free(order_map); + free(btf->types_data); + btf->types_data = new_types; + return 0; + +done: + free(order_map); + free(new_types); + return libbpf_err(err); +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 47ee8f6ac489..a1f8deca2603 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -94,6 +94,7 @@ LIBBPF_API struct btf *btf__new_empty(void); * @brief **btf__new_empty_split()** creates an unpopulated BTF object from an * ELF BTF section except with a base BTF on top of which split BTF should be * based + * @param base_btf base BTF object * @return new BTF object instance which has to be eventually freed with * **btf__free()** * @@ -108,6 +109,26 @@ LIBBPF_API struct btf *btf__new_empty(void); */ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); +struct btf_new_opts { + size_t sz; + struct btf *base_btf; /* optional base BTF */ + bool add_layout; /* add BTF layout information */ + size_t:0; +}; +#define btf_new_opts__last_field add_layout + +/** + * @brief **btf__new_empty_opts()** creates an unpopulated BTF object with + * optional *base_btf* and BTF kind layout description if *add_layout* + * is set + * @return new BTF object instance which has to be eventually freed with + * **btf__free()** + * + * On error, NULL is returned and the thread-local `errno` variable is + * set to the error code. + */ +LIBBPF_API struct btf *btf__new_empty_opts(struct btf_new_opts *opts); + /** * @brief **btf__distill_base()** creates new versions of the split BTF * *src_btf* and its base BTF. The new base BTF will only contain the types @@ -115,6 +136,10 @@ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf); * When that split BTF is loaded against a (possibly changed) base, this * distilled base BTF will help update references to that (possibly changed) * base BTF. + * @param src_btf source split BTF object + * @param new_base_btf pointer to where the new base BTF object pointer will be stored + * @param new_split_btf pointer to where the new split BTF object pointer will be stored + * @return 0 on success; negative error code, otherwise * * Both the new split and its associated new base BTF must be freed by * the caller. @@ -227,6 +252,7 @@ LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id); LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id); +LIBBPF_API int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id); /* func and func_proto construction APIs */ LIBBPF_API int btf__add_func(struct btf *btf, const char *name, @@ -243,6 +269,8 @@ LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id, /* tag construction API */ LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, int component_idx); +LIBBPF_API int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id, + int component_idx); struct btf_dedup_opts { size_t sz; @@ -261,6 +289,9 @@ LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); * to base BTF kinds, and verify those references are compatible with * *base_btf*; if they are, *btf* is adjusted such that is re-parented to * *base_btf* and type ids and strings are adjusted to accommodate this. + * @param btf split BTF object to relocate + * @param base_btf base BTF object + * @return 0 on success; negative error code, otherwise * * If successful, 0 is returned and **btf** now has **base_btf** as its * base. @@ -270,6 +301,48 @@ LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); */ LIBBPF_API int btf__relocate(struct btf *btf, const struct btf *base_btf); +struct btf_permute_opts { + size_t sz; + /* optional .BTF.ext info along the main BTF info */ + struct btf_ext *btf_ext; + size_t :0; +}; +#define btf_permute_opts__last_field btf_ext + +/** + * @brief **btf__permute()** rearranges BTF types in-place according to a specified ID mapping + * @param btf BTF object to permute + * @param id_map Array mapping original type IDs to new IDs + * @param id_map_cnt Number of elements in @id_map + * @param opts Optional parameters, including BTF extension data for reference updates + * @return 0 on success, negative error code on failure + * + * **btf__permute()** reorders BTF types based on the provided @id_map array, + * updating all internal type references to maintain consistency. The function + * operates in-place, modifying the BTF object directly. + * + * For **base BTF**: + * - @id_map must include all types from ID 0 to `btf__type_cnt(btf) - 1` + * - @id_map_cnt must be `btf__type_cnt(btf)` + * - Mapping is defined as `id_map[original_id] = new_id` + * - `id_map[0]` must be 0 (void type cannot be moved) + * + * For **split BTF**: + * - @id_map must include only split types (types added on top of the base BTF) + * - @id_map_cnt must be `btf__type_cnt(btf) - btf__type_cnt(btf__base_btf(btf))` + * - Mapping is defined as `id_map[original_id - start_id] = new_id` + * - `start_id` equals `btf__type_cnt(btf__base_btf(btf))` + * + * After permutation, all type references within the BTF data and optional + * BTF extension (if provided via @opts) are updated automatically. + * + * On error, returns a negative error code and sets errno: + * - `-EINVAL`: Invalid parameters or invalid ID mapping + * - `-ENOMEM`: Memory allocation failure + */ +LIBBPF_API int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt, + const struct btf_permute_opts *opts); + struct btf_dump; struct btf_dump_opts { @@ -323,9 +396,10 @@ struct btf_dump_type_data_opts { bool compact; /* no newlines/indentation */ bool skip_names; /* skip member/type names */ bool emit_zeroes; /* show 0-valued fields */ + bool emit_strings; /* print char arrays as strings */ size_t :0; }; -#define btf_dump_type_data_opts__last_field emit_zeroes +#define btf_dump_type_data_opts__last_field emit_strings LIBBPF_API int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index a3fc6908f6c9..53c6624161d7 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -21,7 +21,6 @@ #include "hashmap.h" #include "libbpf.h" #include "libbpf_internal.h" -#include "str_error.h" static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; @@ -68,6 +67,7 @@ struct btf_dump_data { bool compact; bool skip_names; bool emit_zeroes; + bool emit_strings; __u8 indent_lvl; /* base indent level */ char indent_str[BTF_DATA_INDENT_STR_LEN]; /* below are used during iteration */ @@ -226,6 +226,9 @@ static void btf_dump_free_names(struct hashmap *map) size_t bkt; struct hashmap_entry *cur; + if (!map) + return; + hashmap__for_each_entry(map, cur, bkt) free((void *)cur->pkey); @@ -1494,7 +1497,10 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, case BTF_KIND_TYPE_TAG: btf_dump_emit_mods(d, decls); name = btf_name_of(d, t->name_off); - btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); + if (btf_kflag(t)) + btf_dump_printf(d, " __attribute__((%s))", name); + else + btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); break; case BTF_KIND_ARRAY: { const struct btf_array *a = btf_array(t); @@ -1756,9 +1762,18 @@ static int btf_dump_get_bitfield_value(struct btf_dump *d, __u16 left_shift_bits, right_shift_bits; const __u8 *bytes = data; __u8 nr_copy_bits; + __u8 start_bit, nr_bytes; __u64 num = 0; int i; + /* Calculate how many bytes cover the bitfield */ + start_bit = bits_offset % 8; + nr_bytes = (start_bit + bit_sz + 7) / 8; + + /* Bound check */ + if (data + nr_bytes > d->typed_dump->data_end) + return -E2BIG; + /* Maximum supported bitfield size is 64 bits */ if (t->size > 8) { pr_warn("unexpected bitfield size %d\n", t->size); @@ -2025,6 +2040,52 @@ static int btf_dump_var_data(struct btf_dump *d, return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0); } +static int btf_dump_string_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + const struct btf_array *array = btf_array(t); + const char *chars = data; + __u32 i; + + /* Make sure it is a NUL-terminated string. */ + for (i = 0; i < array->nelems; i++) { + if ((void *)(chars + i) >= d->typed_dump->data_end) + return -E2BIG; + if (chars[i] == '\0') + break; + } + if (i == array->nelems) { + /* The caller will print this as a regular array. */ + return -EINVAL; + } + + btf_dump_data_pfx(d); + btf_dump_printf(d, "\""); + + for (i = 0; i < array->nelems; i++) { + char c = chars[i]; + + if (c == '\0') { + /* + * When printing character arrays as strings, NUL bytes + * are always treated as string terminators; they are + * never printed. + */ + break; + } + if (isprint(c)) + btf_dump_printf(d, "%c", c); + else + btf_dump_printf(d, "\\x%02x", (__u8)c); + } + + btf_dump_printf(d, "\""); + + return 0; +} + static int btf_dump_array_data(struct btf_dump *d, const struct btf_type *t, __u32 id, @@ -2052,8 +2113,13 @@ static int btf_dump_array_data(struct btf_dump *d, * char arrays, so if size is 1 and element is * printable as a char, we'll do that. */ - if (elem_size == 1) + if (elem_size == 1) { + if (d->typed_dump->emit_strings && + btf_dump_string_data(d, t, id, data) == 0) { + return 0; + } d->typed_dump->is_array_char = true; + } } /* note that we increment depth before calling btf_dump_print() below; @@ -2541,6 +2607,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, d->typed_dump->compact = OPTS_GET(opts, compact, false); d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false); + d->typed_dump->emit_strings = OPTS_GET(opts, emit_strings, false); ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0); diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index 823f83ad819c..295dbda24580 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -9,7 +9,6 @@ #include <linux/kernel.h> #include "libbpf_internal.h" -#include "str_error.h" /* A SHT_GNU_versym section holds 16-bit words. This bit is set if * the symbol is hidden and can only be seen when referenced using an diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 760657f5224c..4f19a0d79b0c 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -6,7 +6,6 @@ #include "libbpf.h" #include "libbpf_common.h" #include "libbpf_internal.h" -#include "str_error.h" static inline __u64 ptr_to_u64(const void *ptr) { @@ -507,6 +506,115 @@ static int probe_kern_arg_ctx_tag(int token_fd) return probe_fd(prog_fd); } +static int probe_ldimm64_full_range_off(int token_fd) +{ + char log_buf[1024]; + int prog_fd, map_fd; + int ret; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, + .token_fd = token_fd, + .map_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + .log_buf = log_buf, + .log_size = sizeof(log_buf), + ); + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 1UL << 30), + BPF_EXIT_INSN(), + }; + int insn_cnt = ARRAY_SIZE(insns); + + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr", sizeof(int), 1, 1, &map_opts); + if (map_fd < 0) { + ret = -errno; + pr_warn("Error in %s(): %s. Couldn't create simple array map.\n", + __func__, errstr(ret)); + return ret; + } + insns[0].imm = map_fd; + + log_buf[0] = '\0'; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "global_reloc", "GPL", insns, insn_cnt, &prog_opts); + ret = -errno; + + close(map_fd); + + if (prog_fd >= 0) { + pr_warn("Error in %s(): Program loading unexpectedly succeeded.\n", __func__); + close(prog_fd); + return -EINVAL; + } + + /* + * Feature is allowed if we're not failing with the error message + * "direct value offset of %u is not allowed" removed in + * 12a1fe6e12db ("bpf/verifier: Do not limit maximum direct offset into arena map"). + * We should instead fail with "invalid access to map value pointer". + * Ensure we match with one of the two and we're not failing with a + * different, unexpected message. + */ + if (strstr(log_buf, "direct value offset of")) + return 0; + + if (!strstr(log_buf, "invalid access to map value pointer")) { + pr_warn("Error in %s(): Program unexpectedly failed with message: %s.\n", + __func__, log_buf); + return ret; + } + + return 1; +} + +#ifdef __x86_64__ + +#ifndef __NR_uprobe +#define __NR_uprobe 336 +#endif + +static int probe_uprobe_syscall(int token_fd) +{ + /* + * If kernel supports uprobe() syscall, it will return -ENXIO when called + * from the outside of a kernel-generated uprobe trampoline. + */ + return syscall(__NR_uprobe) < 0 && errno == ENXIO; +} +#else +static int probe_uprobe_syscall(int token_fd) +{ + return 0; +} +#endif + +static int probe_kern_btf_layout(int token_fd) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + struct btf_layout layout[] = { + { 0, 0, 0 }, + { sizeof(__u32), 0, 0 }, + }; + struct btf_header hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = sizeof(types), + .str_off = sizeof(types) + sizeof(layout), + .str_len = sizeof(strs), + .layout_off = sizeof(types), + .layout_len = sizeof(layout), + }; + + return probe_fd(libbpf__load_raw_btf_hdr(&hdr, (char *)types, strs, + (char *)layout, token_fd)); +} + typedef int (*feature_probe_fn)(int /* token_fd */); static struct kern_feature_cache feature_cache; @@ -582,6 +690,15 @@ static struct kern_feature_desc { [FEAT_BTF_QMARK_DATASEC] = { "BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec, }, + [FEAT_LDIMM64_FULL_RANGE_OFF] = { + "full range LDIMM64 support", probe_ldimm64_full_range_off, + }, + [FEAT_UPROBE_SYSCALL] = { + "kernel supports uprobe syscall", probe_uprobe_syscall, + }, + [FEAT_BTF_LAYOUT] = { + "kernel supports BTF layout", probe_kern_btf_layout, + }, }; bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 113ae4abd345..9478b8f78f26 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -4,6 +4,7 @@ #include <stdlib.h> #include <string.h> #include <errno.h> +#include <asm/byteorder.h> #include <linux/filter.h> #include <sys/param.h> #include "btf.h" @@ -13,8 +14,6 @@ #include "hashmap.h" #include "bpf_gen_internal.h" #include "skel_internal.h" -#include <asm/byteorder.h> -#include "str_error.h" #define MAX_USED_MAPS 64 #define MAX_USED_PROGS 32 @@ -110,6 +109,7 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in static int add_data(struct bpf_gen *gen, const void *data, __u32 size); static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off); +static void emit_signature_match(struct bpf_gen *gen); void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps) { @@ -152,6 +152,8 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */ emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); emit(gen, BPF_EXIT_INSN()); + if (OPTS_GET(gen->opts, gen_hash, false)) + emit_signature_match(gen); } static int add_data(struct bpf_gen *gen, const void *data, __u32 size) @@ -368,6 +370,8 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off) __emit_sys_close(gen); } +static void compute_sha_update_offsets(struct bpf_gen *gen); + int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) { int i; @@ -394,6 +398,9 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) blob_fd_array_off(gen, i)); emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); emit(gen, BPF_EXIT_INSN()); + if (OPTS_GET(gen->opts, gen_hash, false)) + compute_sha_update_offsets(gen); + pr_debug("gen: finish %s\n", errstr(gen->error)); if (!gen->error) { struct gen_loader_opts *opts = gen->opts; @@ -446,6 +453,22 @@ void bpf_gen__free(struct bpf_gen *gen) _val; \ }) +static void compute_sha_update_offsets(struct bpf_gen *gen) +{ + __u64 sha[SHA256_DWORD_SIZE]; + __u64 sha_dw; + int i; + + libbpf_sha256(gen->data_start, gen->data_cur - gen->data_start, (__u8 *)sha); + for (i = 0; i < SHA256_DWORD_SIZE; i++) { + struct bpf_insn *insn = + (struct bpf_insn *)(gen->insn_start + gen->hash_insn_offset[i]); + sha_dw = tgt_endian(sha[i]); + insn[0].imm = (__u32)sha_dw; + insn[1].imm = sha_dw >> 32; + } +} + void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, __u32 btf_raw_size) { @@ -557,6 +580,28 @@ void bpf_gen__map_create(struct bpf_gen *gen, emit_sys_close_stack(gen, stack_off(inner_map_fd)); } +static void emit_signature_match(struct bpf_gen *gen) +{ + __s64 off; + int i; + + for (i = 0; i < SHA256_DWORD_SIZE; i++) { + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX, + 0, 0, 0, 0)); + emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, i * sizeof(__u64))); + gen->hash_insn_offset[i] = gen->insn_cur - gen->insn_start; + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_3, 0, 0, 0, 0, 0)); + + off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 2; + if (is_simm16(off)) { + emit(gen, BPF_MOV64_IMM(BPF_REG_7, -EINVAL)); + emit(gen, BPF_JMP_REG(BPF_JNE, BPF_REG_2, BPF_REG_3, off)); + } else { + gen->error = -ERANGE; + } + } +} + void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name, enum bpf_attach_type type) { diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 194809da5172..3a80a018fc7d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -50,7 +50,6 @@ #include "libbpf.h" #include "bpf.h" #include "btf.h" -#include "str_error.h" #include "libbpf_internal.h" #include "hashmap.h" #include "bpf_gen_internal.h" @@ -60,6 +59,8 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define MAX_EVENT_NAME_LEN 64 + #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" #define BPF_INSN_SZ (sizeof(struct bpf_insn)) @@ -114,6 +115,7 @@ static const char * const attach_type_name[] = { [BPF_TRACE_FENTRY] = "trace_fentry", [BPF_TRACE_FEXIT] = "trace_fexit", [BPF_MODIFY_RETURN] = "modify_return", + [BPF_TRACE_FSESSION] = "trace_fsession", [BPF_LSM_MAC] = "lsm_mac", [BPF_LSM_CGROUP] = "lsm_cgroup", [BPF_SK_LOOKUP] = "sk_lookup", @@ -189,6 +191,7 @@ static const char * const map_type_name[] = { [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", [BPF_MAP_TYPE_ARENA] = "arena", + [BPF_MAP_TYPE_INSN_ARRAY] = "insn_array", }; static const char * const prog_type_name[] = { @@ -284,7 +287,7 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) old_errno = errno; va_start(args, format); - __libbpf_pr(level, format, args); + print_fn(level, format, args); va_end(args); errno = old_errno; @@ -316,8 +319,6 @@ static void pr_perm_msg(int err) buf); } -#define STRERR_BUFSIZE 128 - /* Copied from tools/perf/util/util.h */ #ifndef zfree # define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) @@ -370,6 +371,7 @@ enum reloc_type { RELO_EXTERN_CALL, RELO_SUBPROG_ADDR, RELO_CORE, + RELO_INSN_ARRAY, }; struct reloc_desc { @@ -379,8 +381,17 @@ struct reloc_desc { const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ struct { int map_idx; - int sym_off; - int ext_idx; + unsigned int sym_off; + /* + * The following two fields can be unionized, as the + * ext_idx field is used for extern symbols, and the + * sym_size is used for jump tables, which are never + * extern + */ + union { + int ext_idx; + int sym_size; + }; }; }; }; @@ -422,6 +433,11 @@ struct bpf_sec_def { libbpf_prog_attach_fn_t prog_attach_fn; }; +struct bpf_light_subprog { + __u32 sec_insn_off; + __u32 sub_insn_off; +}; + /* * bpf_prog should be a better name but it has been used in * linux/filter.h. @@ -494,6 +510,10 @@ struct bpf_program { __u32 line_info_rec_size; __u32 line_info_cnt; __u32 prog_flags; + __u8 hash[SHA256_DIGEST_LENGTH]; + + struct bpf_light_subprog *subprogs; + __u32 subprog_cnt; }; struct bpf_struct_ops { @@ -573,6 +593,7 @@ struct bpf_map { bool autocreate; bool autoattach; __u64 map_extra; + struct bpf_program *excl_prog; }; enum extern_type { @@ -595,7 +616,7 @@ struct extern_desc { int sym_idx; int btf_id; int sec_btf_id; - const char *name; + char *name; char *essent_name; bool is_set; bool is_weak; @@ -666,15 +687,23 @@ struct elf_state { int symbols_shndx; bool has_st_ops; int arena_data_shndx; + int jumptables_data_shndx; }; struct usdt_manager; +enum bpf_object_state { + OBJ_OPEN, + OBJ_PREPARED, + OBJ_LOADED, +}; + struct bpf_object { char name[BPF_OBJ_NAME_LEN]; char license[64]; __u32 kern_version; + enum bpf_object_state state; struct bpf_program *programs; size_t nr_programs; struct bpf_map *maps; @@ -686,7 +715,6 @@ struct bpf_object { int nr_extern; int kconfig_map_idx; - bool loaded; bool has_subcalls; bool has_rodata; @@ -727,9 +755,20 @@ struct bpf_object { struct usdt_manager *usdt_man; - struct bpf_map *arena_map; + int arena_map_idx; void *arena_data; size_t arena_data_sz; + size_t arena_data_off; + + void *jumptables_data; + size_t jumptables_data_sz; + + struct { + struct bpf_program *prog; + unsigned int sym_off; + int fd; + } *jumptable_maps; + size_t jumptable_map_cnt; struct kern_feature_cache *feat_cache; char *token_path; @@ -757,6 +796,7 @@ void bpf_program__unload(struct bpf_program *prog) zfree(&prog->func_info); zfree(&prog->line_info); + zfree(&prog->subprogs); } static void bpf_program__exit(struct bpf_program *prog) @@ -890,7 +930,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return -LIBBPF_ERRNO__FORMAT; } - if (sec_off + prog_sz > sec_sz) { + if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) { pr_warn("sec '%s': program at offset %zu crosses section boundary\n", sec_name, sec_off); return -LIBBPF_ERRNO__FORMAT; @@ -1005,35 +1045,33 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw, const struct btf_member *kern_data_member; struct btf *btf = NULL; __s32 kern_vtype_id, kern_type_id; - char tname[256]; + char tname[192], stname[256]; __u32 i; snprintf(tname, sizeof(tname), "%.*s", (int)bpf_core_essential_name_len(tname_raw), tname_raw); - kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT, - &btf, mod_btf); - if (kern_type_id < 0) { - pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", - tname); - return kern_type_id; - } - kern_type = btf__type_by_id(btf, kern_type_id); + snprintf(stname, sizeof(stname), "%s%s", STRUCT_OPS_VALUE_PREFIX, tname); - /* Find the corresponding "map_value" type that will be used - * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example, - * find "struct bpf_struct_ops_tcp_congestion_ops" from the - * btf_vmlinux. + /* Look for the corresponding "map_value" type that will be used + * in map_update(BPF_MAP_TYPE_STRUCT_OPS) first, figure out the btf + * and the mod_btf. + * For example, find "struct bpf_struct_ops_tcp_congestion_ops". */ - kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX, - tname, BTF_KIND_STRUCT); + kern_vtype_id = find_ksym_btf_id(obj, stname, BTF_KIND_STRUCT, &btf, mod_btf); if (kern_vtype_id < 0) { - pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n", - STRUCT_OPS_VALUE_PREFIX, tname); + pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", stname); return kern_vtype_id; } kern_vtype = btf__type_by_id(btf, kern_vtype_id); + kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + if (kern_type_id < 0) { + pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname); + return kern_type_id; + } + kern_type = btf__type_by_id(btf, kern_type_id); + /* Find "struct tcp_congestion_ops" from * struct bpf_struct_ops_tcp_congestion_ops { * [ ... ] @@ -1046,8 +1084,8 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw, break; } if (i == btf_vlen(kern_vtype)) { - pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n", - tname, STRUCT_OPS_VALUE_PREFIX, tname); + pr_warn("struct_ops init_kern: struct %s data is not found in struct %s\n", + tname, stname); return -EINVAL; } @@ -1509,9 +1547,10 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.btf_maps_shndx = -1; obj->kconfig_map_idx = -1; + obj->arena_map_idx = -1; obj->kern_version = get_kernel_version(); - obj->loaded = false; + obj->state = OBJ_OPEN; return obj; } @@ -1719,15 +1758,6 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam return ERR_PTR(-ENOENT); } -/* Some versions of Android don't provide memfd_create() in their libc - * implementation, so avoid complications and just go straight to Linux - * syscall. - */ -static int sys_memfd_create(const char *name, unsigned flags) -{ - return syscall(__NR_memfd_create, name, flags); -} - #ifndef MFD_CLOEXEC #define MFD_CLOEXEC 0x0001U #endif @@ -2106,7 +2136,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, } len = strlen(value); - if (value[len - 1] != '"') { + if (len < 2 || value[len - 1] != '"') { pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", ext->name, value); return -EINVAL; @@ -2875,7 +2905,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, var_extra = btf_var(var); map_name = btf__name_by_offset(obj->btf, var->name_off); - if (map_name == NULL || map_name[0] == '\0') { + if (str_is_empty(map_name)) { pr_warn("map #%d: empty name.\n", var_idx); return -EINVAL; } @@ -2963,10 +2993,11 @@ static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, void *data, size_t data_sz) { const long page_sz = sysconf(_SC_PAGE_SIZE); + const size_t data_alloc_sz = roundup(data_sz, page_sz); size_t mmap_sz; - mmap_sz = bpf_map_mmap_sz(obj->arena_map); - if (roundup(data_sz, page_sz) > mmap_sz) { + mmap_sz = bpf_map_mmap_sz(map); + if (data_alloc_sz > mmap_sz) { pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", sec_name, mmap_sz, data_sz); return -E2BIG; @@ -2999,7 +3030,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx); data = elf_sec_data(obj, scn); - if (!scn || !data) { + if (!data) { pr_warn("elf: failed to get %s map definitions for %s\n", MAPS_ELF_SEC, obj->path); return -EINVAL; @@ -3039,12 +3070,12 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, if (map->def.type != BPF_MAP_TYPE_ARENA) continue; - if (obj->arena_map) { + if (obj->arena_map_idx >= 0) { pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", - map->name, obj->arena_map->name); + map->name, obj->maps[obj->arena_map_idx].name); return -EINVAL; } - obj->arena_map = map; + obj->arena_map_idx = i; if (obj->efile.arena_data) { err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, @@ -3054,7 +3085,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return err; } } - if (obj->efile.arena_data && !obj->arena_map) { + if (obj->efile.arena_data && obj->arena_map_idx < 0) { pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n", ARENA_SEC); return -ENOENT; @@ -3107,12 +3138,14 @@ static bool btf_needs_sanitization(struct bpf_object *obj) bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); + bool has_layout = kernel_supports(obj, FEAT_BTF_LAYOUT); return !has_func || !has_datasec || !has_func_global || !has_float || - !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec; + !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec || + !has_layout; } -static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) +struct btf *bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *orig_btf) { bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC); bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC); @@ -3122,9 +3155,64 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64); bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC); + bool has_layout = kernel_supports(obj, FEAT_BTF_LAYOUT); int enum64_placeholder_id = 0; + const struct btf_header *hdr; + struct btf *btf = NULL; + const void *raw_data; struct btf_type *t; int i, j, vlen; + __u32 sz; + int err; + + /* clone BTF to sanitize a copy and leave the original intact */ + raw_data = btf__raw_data(orig_btf, &sz); + if (!raw_data) + return ERR_PTR(-ENOMEM); + /* btf_header() gives us endian-safe header info */ + hdr = btf_header(orig_btf); + + if (!has_layout && hdr->hdr_len >= sizeof(struct btf_header) && + (hdr->layout_len != 0 || hdr->layout_off != 0)) { + const struct btf_header *old_hdr = raw_data; + struct btf_header *new_hdr; + void *new_raw_data; + __u32 new_str_off; + + /* + * Need to rewrite BTF to exclude layout information and + * move string section to immediately after types. + */ + new_raw_data = malloc(sz); + if (!new_raw_data) + return ERR_PTR(-ENOMEM); + + memcpy(new_raw_data, raw_data, sz); + new_hdr = new_raw_data; + new_hdr->layout_off = 0; + new_hdr->layout_len = 0; + new_str_off = hdr->type_off + hdr->type_len; + /* Handle swapped endian case */ + if (old_hdr->magic != hdr->magic) + new_hdr->str_off = bswap_32(new_str_off); + else + new_hdr->str_off = new_str_off; + + memmove(new_raw_data + hdr->hdr_len + new_str_off, + new_raw_data + hdr->hdr_len + hdr->str_off, + hdr->str_len); + sz = hdr->hdr_len + hdr->type_off + hdr->type_len + hdr->str_len; + btf = btf__new(new_raw_data, sz); + free(new_raw_data); + } else { + btf = btf__new(raw_data, sz); + } + err = libbpf_get_error(btf); + if (err) + return ERR_PTR(err); + + /* enforce 8-byte pointers for BPF-targeted BTFs */ + btf__set_pointer_size(btf, 8); for (i = 1; i < btf__type_cnt(btf); i++) { t = (struct btf_type *)btf__type_by_id(btf, i); @@ -3202,9 +3290,10 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) if (enum64_placeholder_id == 0) { enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0); - if (enum64_placeholder_id < 0) - return enum64_placeholder_id; - + if (enum64_placeholder_id < 0) { + btf__free(btf); + return ERR_PTR(enum64_placeholder_id); + } t = (struct btf_type *)btf__type_by_id(btf, i); } @@ -3218,7 +3307,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) } } - return 0; + return btf; } static bool libbpf_needs_btf(const struct bpf_object *obj) @@ -3569,21 +3658,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) sanitize = btf_needs_sanitization(obj); if (sanitize) { - const void *raw_data; - __u32 sz; - - /* clone BTF to sanitize a copy and leave the original intact */ - raw_data = btf__raw_data(obj->btf, &sz); - kern_btf = btf__new(raw_data, sz); - err = libbpf_get_error(kern_btf); - if (err) - return err; - - /* enforce 8-byte pointers for BPF-targeted BTFs */ - btf__set_pointer_size(obj->btf, 8); - err = bpf_object__sanitize_btf(obj, kern_btf); - if (err) - return err; + kern_btf = bpf_object__sanitize_btf(obj, obj->btf); + if (IS_ERR(kern_btf)) + return PTR_ERR(kern_btf); } if (obj->gen_loader) { @@ -3945,6 +4022,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj) } else if (strcmp(name, ARENA_SEC) == 0) { obj->efile.arena_data = data; obj->efile.arena_data_shndx = idx; + } else if (strcmp(name, JUMPTABLES_SEC) == 0) { + obj->jumptables_data = malloc(data->d_size); + if (!obj->jumptables_data) + return -ENOMEM; + memcpy(obj->jumptables_data, data->d_buf, data->d_size); + obj->jumptables_data_sz = data->d_size; + obj->efile.jumptables_data_shndx = idx; } else { pr_info("elf: skipping unrecognized data section(%d) %s\n", idx, name); @@ -4241,7 +4325,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) if (!sym_is_extern(sym)) continue; ext_name = elf_sym_str(obj, sym->st_name); - if (!ext_name || !ext_name[0]) + if (str_is_empty(ext_name)) continue; ext = obj->externs; @@ -4260,7 +4344,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return ext->btf_id; } t = btf__type_by_id(obj->btf, ext->btf_id); - ext->name = btf__name_by_offset(obj->btf, t->name_off); + ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off)); + if (!ext->name) + return -ENOMEM; ext->sym_idx = i; ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; @@ -4483,6 +4569,44 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) } } +static int bpf_prog_compute_hash(struct bpf_program *prog) +{ + struct bpf_insn *purged; + int i, err = 0; + + purged = calloc(prog->insns_cnt, BPF_INSN_SZ); + if (!purged) + return -ENOMEM; + + /* If relocations have been done, the map_fd needs to be + * discarded for the digest calculation. + */ + for (i = 0; i < prog->insns_cnt; i++) { + purged[i] = prog->insns[i]; + if (purged[i].code == (BPF_LD | BPF_IMM | BPF_DW) && + (purged[i].src_reg == BPF_PSEUDO_MAP_FD || + purged[i].src_reg == BPF_PSEUDO_MAP_VALUE)) { + purged[i].imm = 0; + i++; + if (i >= prog->insns_cnt || + prog->insns[i].code != 0 || + prog->insns[i].dst_reg != 0 || + prog->insns[i].src_reg != 0 || + prog->insns[i].off != 0) { + err = -EINVAL; + goto out; + } + purged[i] = prog->insns[i]; + purged[i].imm = 0; + } + } + libbpf_sha256(purged, prog->insns_cnt * sizeof(struct bpf_insn), + prog->hash); +out: + free(purged); + return err; +} + static int bpf_program__record_reloc(struct bpf_program *prog, struct reloc_desc *reloc_desc, __u32 insn_idx, const char *sym_name, @@ -4580,10 +4704,30 @@ static int bpf_program__record_reloc(struct bpf_program *prog, /* arena data relocation */ if (shdr_idx == obj->efile.arena_data_shndx) { + if (obj->arena_map_idx < 0) { + pr_warn("prog '%s': bad arena data relocation at insn %u, no arena maps defined\n", + prog->name, insn_idx); + return -LIBBPF_ERRNO__RELOC; + } reloc_desc->type = RELO_DATA; reloc_desc->insn_idx = insn_idx; - reloc_desc->map_idx = obj->arena_map - obj->maps; + reloc_desc->map_idx = obj->arena_map_idx; + reloc_desc->sym_off = sym->st_value; + + map = &obj->maps[obj->arena_map_idx]; + pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n", + prog->name, obj->arena_map_idx, map->name, map->sec_idx, + map->sec_offset, insn_idx); + return 0; + } + + /* jump table data relocation */ + if (shdr_idx == obj->efile.jumptables_data_shndx) { + reloc_desc->type = RELO_INSN_ARRAY; + reloc_desc->insn_idx = insn_idx; + reloc_desc->map_idx = -1; reloc_desc->sym_off = sym->st_value; + reloc_desc->sym_size = sym->st_size; return 0; } @@ -4845,6 +4989,11 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) return 0; } +static bool map_is_created(const struct bpf_map *map) +{ + return map->obj->state >= OBJ_PREPARED || map->reused; +} + bool bpf_map__autocreate(const struct bpf_map *map) { return map->autocreate; @@ -4852,7 +5001,7 @@ bool bpf_map__autocreate(const struct bpf_map *map) int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) { - if (map->obj->loaded) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->autocreate = autocreate; @@ -4946,7 +5095,7 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map) int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) { - if (map->obj->loaded) + if (map_is_created(map)) return libbpf_err(-EBUSY); map->def.max_entries = max_entries; @@ -5054,12 +5203,20 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) */ return true; - if (obj->token_fd) + if (obj->feat_cache) return feat_supported(obj->feat_cache, feat_id); return feat_supported(NULL, feat_id); } +/* Used in testing to simulate missing features. */ +void bpf_object_set_feat_cache(struct bpf_object *obj, struct kern_feature_cache *cache) +{ + if (obj->feat_cache) + free(obj->feat_cache); + obj->feat_cache = cache; +} + static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) { struct bpf_map_info map_info; @@ -5076,6 +5233,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) return false; } + /* + * bpf_get_map_info_by_fd() for DEVMAP will always return flags with + * BPF_F_RDONLY_PROG set, but it generally is not set at map creation time. + * Thus, ignore the BPF_F_RDONLY_PROG flag in the flags returned from + * bpf_get_map_info_by_fd() when checking for compatibility with an + * existing DEVMAP. + */ + if (map->def.type == BPF_MAP_TYPE_DEVMAP || map->def.type == BPF_MAP_TYPE_DEVMAP_HASH) + map_info.map_flags &= ~BPF_F_RDONLY_PROG; + return (map_info.type == map->def.type && map_info.key_size == map->def.key_size && map_info.value_size == map->def.value_size && @@ -5191,11 +5358,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); -static bool map_is_created(const struct bpf_map *map) -{ - return map->obj->loaded || map->reused; -} - static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { LIBBPF_OPTS(bpf_map_create_opts, create_attr); @@ -5212,6 +5374,14 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.token_fd = obj->token_fd; if (obj->token_fd) create_attr.map_flags |= BPF_F_TOKEN_FD; + if (map->excl_prog) { + err = bpf_prog_compute_hash(map->excl_prog); + if (err) + return err; + + create_attr.excl_prog_hash = map->excl_prog->hash; + create_attr.excl_prog_hash_size = SHA256_DIGEST_LENGTH; + } if (bpf_map__is_struct_ops(map)) { create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; @@ -5511,7 +5681,8 @@ retry: return err; } if (obj->arena_data) { - memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz); + memcpy(map->mmaped + obj->arena_data_off, obj->arena_data, + obj->arena_data_sz); zfree(&obj->arena_data); } } @@ -5681,11 +5852,12 @@ static int load_module_btfs(struct bpf_object *obj) info.name = ptr_to_u64(name); info.name_len = sizeof(name); + btf = NULL; err = bpf_btf_get_info_by_fd(fd, &info, &len); if (err) { err = -errno; pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err)); - goto err_out; + break; } /* ignore non-module BTFs */ @@ -5699,15 +5871,15 @@ static int load_module_btfs(struct bpf_object *obj) if (err) { pr_warn("failed to load module [%s]'s BTF object #%d: %s\n", name, id, errstr(err)); - goto err_out; + break; } err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap, sizeof(*obj->btf_modules), obj->btf_module_cnt + 1); if (err) - goto err_out; + break; - mod_btf = &obj->btf_modules[obj->btf_module_cnt++]; + mod_btf = &obj->btf_modules[obj->btf_module_cnt]; mod_btf->btf = btf; mod_btf->id = id; @@ -5715,16 +5887,16 @@ static int load_module_btfs(struct bpf_object *obj) mod_btf->name = strdup(name); if (!mod_btf->name) { err = -ENOMEM; - goto err_out; + break; } - continue; + obj->btf_module_cnt++; + } -err_out: + if (err) { + btf__free(btf); close(fd); - return err; } - - return 0; + return err; } static struct bpf_core_cand_list * @@ -6079,6 +6251,157 @@ static void poison_kfunc_call(struct bpf_program *prog, int relo_idx, insn->imm = POISON_CALL_KFUNC_BASE + ext_idx; } +static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, unsigned int sym_off) +{ + size_t i; + + for (i = 0; i < obj->jumptable_map_cnt; i++) { + /* + * This might happen that same offset is used for two different + * programs (as jump tables can be the same). However, for + * different programs different maps should be created. + */ + if (obj->jumptable_maps[i].sym_off == sym_off && + obj->jumptable_maps[i].prog == prog) + return obj->jumptable_maps[i].fd; + } + + return -ENOENT; +} + +static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, unsigned int sym_off, int map_fd) +{ + size_t cnt = obj->jumptable_map_cnt; + size_t size = sizeof(obj->jumptable_maps[0]); + void *tmp; + + tmp = libbpf_reallocarray(obj->jumptable_maps, cnt + 1, size); + if (!tmp) + return -ENOMEM; + + obj->jumptable_maps = tmp; + obj->jumptable_maps[cnt].prog = prog; + obj->jumptable_maps[cnt].sym_off = sym_off; + obj->jumptable_maps[cnt].fd = map_fd; + obj->jumptable_map_cnt++; + + return 0; +} + +static int find_subprog_idx(struct bpf_program *prog, int insn_idx) +{ + int i; + + for (i = prog->subprog_cnt - 1; i >= 0; i--) { + if (insn_idx >= prog->subprogs[i].sub_insn_off) + return i; + } + + return -1; +} + +static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struct reloc_desc *relo) +{ + const __u32 jt_entry_size = 8; + unsigned int sym_off = relo->sym_off; + int jt_size = relo->sym_size; + __u32 max_entries = jt_size / jt_entry_size; + __u32 value_size = sizeof(struct bpf_insn_array_value); + struct bpf_insn_array_value val = {}; + int subprog_idx; + int map_fd, err; + __u64 insn_off; + __u64 *jt; + __u32 i; + + map_fd = find_jt_map(obj, prog, sym_off); + if (map_fd >= 0) + return map_fd; + + if (sym_off % jt_entry_size) { + pr_warn("map '.jumptables': jumptable start %u should be multiple of %u\n", + sym_off, jt_entry_size); + return -EINVAL; + } + + if (jt_size % jt_entry_size) { + pr_warn("map '.jumptables': jumptable size %d should be multiple of %u\n", + jt_size, jt_entry_size); + return -EINVAL; + } + + map_fd = bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, ".jumptables", + 4, value_size, max_entries, NULL); + if (map_fd < 0) + return map_fd; + + if (!obj->jumptables_data) { + pr_warn("map '.jumptables': ELF file is missing jump table data\n"); + err = -EINVAL; + goto err_close; + } + if (sym_off + jt_size > obj->jumptables_data_sz) { + pr_warn("map '.jumptables': jumptables_data size is %zd, trying to access %d\n", + obj->jumptables_data_sz, sym_off + jt_size); + err = -EINVAL; + goto err_close; + } + + subprog_idx = -1; /* main program */ + if (relo->insn_idx < 0 || relo->insn_idx >= prog->insns_cnt) { + pr_warn("map '.jumptables': invalid instruction index %d\n", relo->insn_idx); + err = -EINVAL; + goto err_close; + } + if (prog->subprogs) + subprog_idx = find_subprog_idx(prog, relo->insn_idx); + + jt = (__u64 *)(obj->jumptables_data + sym_off); + for (i = 0; i < max_entries; i++) { + /* + * The offset should be made to be relative to the beginning of + * the main function, not the subfunction. + */ + insn_off = jt[i]/sizeof(struct bpf_insn); + if (subprog_idx >= 0) { + insn_off -= prog->subprogs[subprog_idx].sec_insn_off; + insn_off += prog->subprogs[subprog_idx].sub_insn_off; + } else { + insn_off -= prog->sec_insn_off; + } + + /* + * LLVM-generated jump tables contain u64 records, however + * should contain values that fit in u32. + */ + if (insn_off > UINT32_MAX) { + pr_warn("map '.jumptables': invalid jump table value 0x%llx at offset %u\n", + (long long)jt[i], sym_off + i * jt_entry_size); + err = -EINVAL; + goto err_close; + } + + val.orig_off = insn_off; + err = bpf_map_update_elem(map_fd, &i, &val, 0); + if (err) + goto err_close; + } + + err = bpf_map_freeze(map_fd); + if (err) + goto err_close; + + err = add_jt_map(obj, prog, sym_off, map_fd); + if (err) + goto err_close; + + return map_fd; + +err_close: + close(map_fd); + return err; +} + /* Relocate data references within program code: * - map references; * - global variable references; @@ -6112,6 +6435,10 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_DATA: map = &obj->maps[relo->map_idx]; insn[1].imm = insn[0].imm + relo->sym_off; + + if (relo->map_idx == obj->arena_map_idx) + insn[1].imm += obj->arena_data_off; + if (obj->gen_loader) { insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE; insn[0].imm = relo->map_idx; @@ -6170,6 +6497,20 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog) case RELO_CORE: /* will be handled by bpf_program_record_relos() */ break; + case RELO_INSN_ARRAY: { + int map_fd; + + map_fd = create_jt_map(obj, prog, relo); + if (map_fd < 0) { + pr_warn("prog '%s': relo #%d: can't create jump table: sym_off %u\n", + prog->name, i, relo->sym_off); + return map_fd; + } + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn->imm = map_fd; + insn->off = 0; + } + break; default: pr_warn("prog '%s': relo #%d: bad relo type %d\n", prog->name, i, relo->type); @@ -6367,36 +6708,62 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra return 0; } +static int save_subprog_offsets(struct bpf_program *main_prog, struct bpf_program *subprog) +{ + size_t size = sizeof(main_prog->subprogs[0]); + int cnt = main_prog->subprog_cnt; + void *tmp; + + tmp = libbpf_reallocarray(main_prog->subprogs, cnt + 1, size); + if (!tmp) + return -ENOMEM; + + main_prog->subprogs = tmp; + main_prog->subprogs[cnt].sec_insn_off = subprog->sec_insn_off; + main_prog->subprogs[cnt].sub_insn_off = subprog->sub_insn_off; + main_prog->subprog_cnt++; + + return 0; +} + static int bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, struct bpf_program *subprog) { - struct bpf_insn *insns; - size_t new_cnt; - int err; + struct bpf_insn *insns; + size_t new_cnt; + int err; - subprog->sub_insn_off = main_prog->insns_cnt; + subprog->sub_insn_off = main_prog->insns_cnt; - new_cnt = main_prog->insns_cnt + subprog->insns_cnt; - insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); - if (!insns) { - pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); - return -ENOMEM; - } - main_prog->insns = insns; - main_prog->insns_cnt = new_cnt; + new_cnt = main_prog->insns_cnt + subprog->insns_cnt; + insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); + return -ENOMEM; + } + main_prog->insns = insns; + main_prog->insns_cnt = new_cnt; + + memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, + subprog->insns_cnt * sizeof(*insns)); - memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, - subprog->insns_cnt * sizeof(*insns)); + pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", + main_prog->name, subprog->insns_cnt, subprog->name); + + /* The subprog insns are now appended. Append its relos too. */ + err = append_subprog_relos(main_prog, subprog); + if (err) + return err; - pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", - main_prog->name, subprog->insns_cnt, subprog->name); + err = save_subprog_offsets(main_prog, subprog); + if (err) { + pr_warn("prog '%s': failed to add subprog offsets: %s\n", + main_prog->name, errstr(err)); + return err; + } - /* The subprog insns are now appended. Append its relos too. */ - err = append_subprog_relos(main_prog, subprog); - if (err) - return err; - return 0; + return 0; } static int @@ -7073,6 +7440,14 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat bpf_object__sort_relos(obj); } + /* place globals at the end of the arena (if supported) */ + if (obj->arena_map_idx >= 0 && kernel_supports(obj, FEAT_LDIMM64_FULL_RANGE_OFF)) { + struct bpf_map *arena_map = &obj->maps[obj->arena_map_idx]; + + obj->arena_data_off = bpf_map_mmap_sz(arena_map) - + roundup(obj->arena_data_sz, sysconf(_SC_PAGE_SIZE)); + } + /* Before relocating calls pre-process relocations and mark * few ld_imm64 instructions that points to subprogs. * Otherwise bpf_object__reloc_code() later would have to consider @@ -7897,13 +8272,6 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i]; - err = bpf_object__sanitize_prog(obj, prog); - if (err) - return err; - } - - for (i = 0; i < obj->nr_programs; i++) { - prog = &obj->programs[i]; if (prog_is_subprog(obj, prog)) continue; if (!prog->autoload) { @@ -7927,6 +8295,21 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) return 0; } +static int bpf_object_prepare_progs(struct bpf_object *obj) +{ + struct bpf_program *prog; + size_t i; + int err; + + for (i = 0; i < obj->nr_programs; i++) { + prog = &obj->programs[i]; + err = bpf_object__sanitize_prog(obj, prog); + if (err) + return err; + } + return 0; +} + static const struct bpf_sec_def *find_sec_def(const char *sec_name); static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts) @@ -8172,7 +8555,7 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type, struct bpf_object *obj = ctx; const struct btf_type *t; struct extern_desc *ext; - char *res; + const char *res; res = strstr(sym_name, ".llvm."); if (sym_type == 'd' && res) @@ -8543,14 +8926,77 @@ static int bpf_object_prepare_struct_ops(struct bpf_object *obj) return 0; } +static void bpf_object_unpin(struct bpf_object *obj) +{ + int i; + + /* unpin any maps that were auto-pinned during load */ + for (i = 0; i < obj->nr_maps; i++) + if (obj->maps[i].pinned && !obj->maps[i].reused) + bpf_map__unpin(&obj->maps[i], NULL); +} + +static void bpf_object_post_load_cleanup(struct bpf_object *obj) +{ + int i; + + /* clean up fd_array */ + zfree(&obj->fd_array); + + /* clean up module BTFs */ + for (i = 0; i < obj->btf_module_cnt; i++) { + close(obj->btf_modules[i].fd); + btf__free(obj->btf_modules[i].btf); + free(obj->btf_modules[i].name); + } + obj->btf_module_cnt = 0; + zfree(&obj->btf_modules); + + /* clean up vmlinux BTF */ + btf__free(obj->btf_vmlinux); + obj->btf_vmlinux = NULL; +} + +static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path) +{ + int err; + + if (obj->state >= OBJ_PREPARED) { + pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name); + return -EINVAL; + } + + err = bpf_object_prepare_token(obj); + err = err ? : bpf_object__probe_loading(obj); + err = err ? : bpf_object__load_vmlinux_btf(obj, false); + err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); + err = err ? : bpf_object__sanitize_maps(obj); + err = err ? : bpf_object__init_kern_struct_ops_maps(obj); + err = err ? : bpf_object_adjust_struct_ops_autoload(obj); + err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); + err = err ? : bpf_object__sanitize_and_load_btf(obj); + err = err ? : bpf_object__create_maps(obj); + err = err ? : bpf_object_prepare_progs(obj); + + if (err) { + bpf_object_unpin(obj); + bpf_object_unload(obj); + obj->state = OBJ_LOADED; + return err; + } + + obj->state = OBJ_PREPARED; + return 0; +} + static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path) { - int err, i; + int err; if (!obj) return libbpf_err(-EINVAL); - if (obj->loaded) { + if (obj->state >= OBJ_LOADED) { pr_warn("object '%s': load can't be attempted twice\n", obj->name); return libbpf_err(-EINVAL); } @@ -8565,17 +9011,12 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch return libbpf_err(-LIBBPF_ERRNO__ENDIAN); } - err = bpf_object_prepare_token(obj); - err = err ? : bpf_object__probe_loading(obj); - err = err ? : bpf_object__load_vmlinux_btf(obj, false); - err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); - err = err ? : bpf_object__sanitize_maps(obj); - err = err ? : bpf_object__init_kern_struct_ops_maps(obj); - err = err ? : bpf_object_adjust_struct_ops_autoload(obj); - err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); - err = err ? : bpf_object__sanitize_and_load_btf(obj); - err = err ? : bpf_object__create_maps(obj); - err = err ? : bpf_object__load_progs(obj, extra_log_level); + if (obj->state < OBJ_PREPARED) { + err = bpf_object_prepare(obj, target_btf_path); + if (err) + return libbpf_err(err); + } + err = bpf_object__load_progs(obj, extra_log_level); err = err ? : bpf_object_init_prog_arrays(obj); err = err ? : bpf_object_prepare_struct_ops(obj); @@ -8587,36 +9028,22 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps); } - /* clean up fd_array */ - zfree(&obj->fd_array); + bpf_object_post_load_cleanup(obj); + obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */ - /* clean up module BTFs */ - for (i = 0; i < obj->btf_module_cnt; i++) { - close(obj->btf_modules[i].fd); - btf__free(obj->btf_modules[i].btf); - free(obj->btf_modules[i].name); + if (err) { + bpf_object_unpin(obj); + bpf_object_unload(obj); + pr_warn("failed to load object '%s'\n", obj->path); + return libbpf_err(err); } - free(obj->btf_modules); - - /* clean up vmlinux BTF */ - btf__free(obj->btf_vmlinux); - obj->btf_vmlinux = NULL; - - obj->loaded = true; /* doesn't matter if successfully or not */ - - if (err) - goto out; return 0; -out: - /* unpin any maps that were auto-pinned during load */ - for (i = 0; i < obj->nr_maps; i++) - if (obj->maps[i].pinned && !obj->maps[i].reused) - bpf_map__unpin(&obj->maps[i], NULL); +} - bpf_object_unload(obj); - pr_warn("failed to load object '%s'\n", obj->path); - return libbpf_err(err); +int bpf_object__prepare(struct bpf_object *obj) +{ + return libbpf_err(bpf_object_prepare(obj, NULL)); } int bpf_object__load(struct bpf_object *obj) @@ -8866,7 +9293,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) if (!obj) return libbpf_err(-ENOENT); - if (!obj->loaded) { + if (obj->state < OBJ_PREPARED) { pr_warn("object not yet loaded; load it first\n"); return libbpf_err(-ENOENT); } @@ -8945,7 +9372,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) if (!obj) return libbpf_err(-ENOENT); - if (!obj->loaded) { + if (obj->state < OBJ_LOADED) { pr_warn("object not yet loaded; load it first\n"); return libbpf_err(-ENOENT); } @@ -9064,6 +9491,13 @@ void bpf_object__close(struct bpf_object *obj) if (IS_ERR_OR_NULL(obj)) return; + /* + * if user called bpf_object__prepare() without ever getting to + * bpf_object__load(), we need to clean up stuff that is normally + * cleaned up at the end of loading step + */ + bpf_object_post_load_cleanup(obj); + usdt_manager_free(obj->usdt_man); obj->usdt_man = NULL; @@ -9080,8 +9514,10 @@ void bpf_object__close(struct bpf_object *obj) zfree(&obj->btf_custom_path); zfree(&obj->kconfig); - for (i = 0; i < obj->nr_extern; i++) + for (i = 0; i < obj->nr_extern; i++) { + zfree(&obj->externs[i].name); zfree(&obj->externs[i].essent_name); + } zfree(&obj->externs); obj->nr_extern = 0; @@ -9102,6 +9538,13 @@ void bpf_object__close(struct bpf_object *obj) zfree(&obj->arena_data); + zfree(&obj->jumptables_data); + obj->jumptables_data_sz = 0; + + for (i = 0; i < obj->jumptable_map_cnt; i++) + close(obj->jumptable_maps[i].fd); + zfree(&obj->jumptable_maps); + free(obj); } @@ -9132,7 +9575,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj) int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version) { - if (obj->loaded) + if (obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); obj->kern_version = kern_version; @@ -9145,12 +9588,12 @@ int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) struct bpf_gen *gen; if (!opts) - return -EFAULT; + return libbpf_err(-EFAULT); if (!OPTS_VALID(opts, gen_loader_opts)) - return -EINVAL; - gen = calloc(sizeof(*gen), 1); + return libbpf_err(-EINVAL); + gen = calloc(1, sizeof(*gen)); if (!gen) - return -ENOMEM; + return libbpf_err(-ENOMEM); gen->opts = opts; gen->swapped_endian = !is_native_endianness(obj); obj->gen_loader = gen; @@ -9229,7 +9672,7 @@ bool bpf_program__autoload(const struct bpf_program *prog) int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); prog->autoload = autoload; @@ -9261,14 +9704,14 @@ int bpf_program__set_insns(struct bpf_program *prog, { struct bpf_insn *insns; - if (prog->obj->loaded) - return -EBUSY; + if (prog->obj->state >= OBJ_LOADED) + return libbpf_err(-EBUSY); insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns)); /* NULL is a valid return from reallocarray if the new count is zero */ if (!insns && new_insn_cnt) { pr_warn("prog '%s': failed to realloc prog code\n", prog->name); - return -ENOMEM; + return libbpf_err(-ENOMEM); } memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns)); @@ -9304,7 +9747,7 @@ static int last_custom_sec_def_handler_id; int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); /* if type is not changed, do nothing */ @@ -9335,7 +9778,7 @@ enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program int bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->expected_attach_type = type; @@ -9349,7 +9792,7 @@ __u32 bpf_program__flags(const struct bpf_program *prog) int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->prog_flags = flags; @@ -9363,7 +9806,7 @@ __u32 bpf_program__log_level(const struct bpf_program *prog) int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) { - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EBUSY); prog->log_level = log_level; @@ -9379,17 +9822,146 @@ const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_siz int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) { if (log_size && !log_buf) - return -EINVAL; + return libbpf_err(-EINVAL); if (prog->log_size > UINT_MAX) - return -EINVAL; - if (prog->obj->loaded) - return -EBUSY; + return libbpf_err(-EINVAL); + if (prog->obj->state >= OBJ_LOADED) + return libbpf_err(-EBUSY); prog->log_buf = log_buf; prog->log_size = log_size; return 0; } +struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog) +{ + if (prog->func_info_rec_size != sizeof(struct bpf_func_info)) + return libbpf_err_ptr(-EOPNOTSUPP); + return prog->func_info; +} + +__u32 bpf_program__func_info_cnt(const struct bpf_program *prog) +{ + return prog->func_info_cnt; +} + +struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog) +{ + if (prog->line_info_rec_size != sizeof(struct bpf_line_info)) + return libbpf_err_ptr(-EOPNOTSUPP); + return prog->line_info; +} + +__u32 bpf_program__line_info_cnt(const struct bpf_program *prog) +{ + return prog->line_info_cnt; +} + +int bpf_program__clone(struct bpf_program *prog, const struct bpf_prog_load_opts *opts) +{ + LIBBPF_OPTS(bpf_prog_load_opts, attr); + struct bpf_object *obj; + const void *info; + __u32 info_cnt, info_rec_size; + int err, fd, prog_btf_fd; + + if (!prog) + return libbpf_err(-EINVAL); + + if (!OPTS_VALID(opts, bpf_prog_load_opts)) + return libbpf_err(-EINVAL); + + obj = prog->obj; + if (obj->state < OBJ_PREPARED) + return libbpf_err(-EINVAL); + + /* + * Caller-provided opts take priority; fall back to + * prog/object defaults when the caller leaves them zero. + */ + attr.attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0) ?: prog->attach_prog_fd; + attr.prog_flags = OPTS_GET(opts, prog_flags, 0) ?: prog->prog_flags; + attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0) ?: prog->prog_ifindex; + attr.kern_version = OPTS_GET(opts, kern_version, 0) ?: obj->kern_version; + attr.fd_array = OPTS_GET(opts, fd_array, NULL) ?: obj->fd_array; + attr.fd_array_cnt = OPTS_GET(opts, fd_array_cnt, 0) ?: obj->fd_array_cnt; + attr.token_fd = OPTS_GET(opts, token_fd, 0) ?: obj->token_fd; + if (attr.token_fd) + attr.prog_flags |= BPF_F_TOKEN_FD; + + prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0); + if (!prog_btf_fd && obj->btf) + prog_btf_fd = btf__fd(obj->btf); + + /* BTF func/line info: only pass if kernel supports it */ + if (kernel_supports(obj, FEAT_BTF_FUNC) && prog_btf_fd > 0) { + attr.prog_btf_fd = prog_btf_fd; + + /* func_info/line_info triples: all-or-nothing from caller */ + info = OPTS_GET(opts, func_info, NULL); + info_cnt = OPTS_GET(opts, func_info_cnt, 0); + info_rec_size = OPTS_GET(opts, func_info_rec_size, 0); + if (!!info != !!info_cnt || !!info != !!info_rec_size) { + pr_warn("prog '%s': func_info, func_info_cnt, and func_info_rec_size must all be specified or all omitted\n", + prog->name); + return libbpf_err(-EINVAL); + } + attr.func_info = info ?: prog->func_info; + attr.func_info_cnt = info ? info_cnt : prog->func_info_cnt; + attr.func_info_rec_size = info ? info_rec_size : prog->func_info_rec_size; + + info = OPTS_GET(opts, line_info, NULL); + info_cnt = OPTS_GET(opts, line_info_cnt, 0); + info_rec_size = OPTS_GET(opts, line_info_rec_size, 0); + if (!!info != !!info_cnt || !!info != !!info_rec_size) { + pr_warn("prog '%s': line_info, line_info_cnt, and line_info_rec_size must all be specified or all omitted\n", + prog->name); + return libbpf_err(-EINVAL); + } + attr.line_info = info ?: prog->line_info; + attr.line_info_cnt = info ? info_cnt : prog->line_info_cnt; + attr.line_info_rec_size = info ? info_rec_size : prog->line_info_rec_size; + } + + /* Logging is caller-controlled; no fallback to prog/obj log settings */ + attr.log_buf = OPTS_GET(opts, log_buf, NULL); + attr.log_size = OPTS_GET(opts, log_size, 0); + attr.log_level = OPTS_GET(opts, log_level, 0); + + /* + * Fields below may be mutated by prog_prepare_load_fn: + * Seed them from prog/obj defaults here; + * Later override with caller-provided opts. + */ + attr.expected_attach_type = prog->expected_attach_type; + attr.attach_btf_id = prog->attach_btf_id; + attr.attach_btf_obj_fd = prog->attach_btf_obj_fd; + + if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { + err = prog->sec_def->prog_prepare_load_fn(prog, &attr, prog->sec_def->cookie); + if (err) + return libbpf_err(err); + } + + /* Re-apply caller overrides for output fields */ + if (OPTS_GET(opts, expected_attach_type, 0)) + attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0); + if (OPTS_GET(opts, attach_btf_id, 0)) + attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0); + if (OPTS_GET(opts, attach_btf_obj_fd, 0)) + attr.attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0); + + /* + * Unlike bpf_object_load_prog(), we intentionally do not call bpf_prog_bind_map() + * for RODATA maps here to avoid mutating the object's state. Callers can bind the + * required maps themselves using bpf_prog_bind_map(). + */ + fd = bpf_prog_load(prog->type, prog->name, obj->license, prog->insns, prog->insns_cnt, + &attr); + + return libbpf_err(fd); +} + #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \ .sec = (char *)sec_pfx, \ .prog_type = BPF_PROG_TYPE_##ptype, \ @@ -9457,6 +10029,8 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), + SEC_DEF("fsession+", TRACING, BPF_TRACE_FSESSION, SEC_ATTACH_BTF, attach_trace), + SEC_DEF("fsession.s+", TRACING, BPF_TRACE_FSESSION, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace), SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace), SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm), SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), @@ -9959,7 +10533,7 @@ int libbpf_find_vmlinux_btf_id(const char *name, return libbpf_err(err); } -static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) +static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd, int token_fd) { struct bpf_prog_info info; __u32 info_len = sizeof(info); @@ -9979,7 +10553,7 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) pr_warn("The target program doesn't have BTF\n"); goto out; } - btf = btf__load_from_kernel_by_id(info.btf_id); + btf = btf_load_from_kernel(info.btf_id, NULL, token_fd); err = libbpf_get_error(btf); if (err) { pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err)); @@ -9999,7 +10573,7 @@ static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name, enum bpf_attach_type attach_type, int *btf_obj_fd, int *btf_type_id) { - int ret, i, mod_len; + int ret, i, mod_len = 0; const char *fn_name, *mod_name = NULL; fn_name = strchr(attach_name, ':'); @@ -10062,7 +10636,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac pr_warn("prog '%s': attach program FD is not set\n", prog->name); return -EINVAL; } - err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); + err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd, prog->obj->token_fd); if (err < 0) { pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n", prog->name, attach_prog_fd, attach_name, errstr(err)); @@ -10299,7 +10873,7 @@ static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) int bpf_map__set_value_size(struct bpf_map *map, __u32 size) { - if (map->obj->loaded || map->reused) + if (map_is_created(map)) return libbpf_err(-EBUSY); if (map->mmaped) { @@ -10307,7 +10881,7 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size) int err; if (map->def.type != BPF_MAP_TYPE_ARRAY) - return -EOPNOTSUPP; + return libbpf_err(-EOPNOTSUPP); mmap_old_sz = bpf_map_mmap_sz(map); mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries); @@ -10315,7 +10889,7 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size) if (err) { pr_warn("map '%s': failed to resize memory-mapped region: %s\n", bpf_map__name(map), errstr(err)); - return err; + return libbpf_err(err); } err = map_btf_datasec_resize(map, size); if (err && err != -ENOENT) { @@ -10345,7 +10919,7 @@ int bpf_map__set_initial_value(struct bpf_map *map, { size_t actual_sz; - if (map->obj->loaded || map->reused) + if (map_is_created(map)) return libbpf_err(-EBUSY); if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG) @@ -10417,6 +10991,27 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) return 0; } +int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog) +{ + if (map_is_created(map)) { + pr_warn("exclusive programs must be set before map creation\n"); + return libbpf_err(-EINVAL); + } + + if (map->obj != prog->obj) { + pr_warn("excl_prog and map must be from the same bpf object\n"); + return libbpf_err(-EINVAL); + } + + map->excl_prog = prog; + return 0; +} + +struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map) +{ + return map->excl_prog; +} + static struct bpf_map * __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) { @@ -10496,7 +11091,7 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name) } static int validate_map_op(const struct bpf_map *map, size_t key_sz, - size_t value_sz, bool check_value_sz) + size_t value_sz, bool check_value_sz, __u64 flags) { if (!map_is_created(map)) /* map is not yet created */ return -ENOENT; @@ -10523,6 +11118,20 @@ static int validate_map_op(const struct bpf_map *map, size_t key_sz, int num_cpu = libbpf_num_possible_cpus(); size_t elem_sz = roundup(map->def.value_size, 8); + if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) { + if ((flags & BPF_F_CPU) && (flags & BPF_F_ALL_CPUS)) { + pr_warn("map '%s': BPF_F_CPU and BPF_F_ALL_CPUS are mutually exclusive\n", + map->name); + return -EINVAL; + } + if (map->def.value_size != value_sz) { + pr_warn("map '%s': unexpected value size %zu provided for either BPF_F_CPU or BPF_F_ALL_CPUS, expected %u\n", + map->name, value_sz, map->def.value_size); + return -EINVAL; + } + break; + } + if (value_sz != num_cpu * elem_sz) { pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n", map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz); @@ -10547,7 +11156,7 @@ int bpf_map__lookup_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, value_sz, true); + err = validate_map_op(map, key_sz, value_sz, true, flags); if (err) return libbpf_err(err); @@ -10560,7 +11169,7 @@ int bpf_map__update_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, value_sz, true); + err = validate_map_op(map, key_sz, value_sz, true, flags); if (err) return libbpf_err(err); @@ -10572,7 +11181,7 @@ int bpf_map__delete_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */, flags); if (err) return libbpf_err(err); @@ -10585,7 +11194,7 @@ int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, value_sz, true); + err = validate_map_op(map, key_sz, value_sz, true, flags); if (err) return libbpf_err(err); @@ -10597,7 +11206,7 @@ int bpf_map__get_next_key(const struct bpf_map *map, { int err; - err = validate_map_op(map, key_sz, 0, false /* check_value_sz */); + err = validate_map_op(map, key_sz, 0, false /* check_value_sz */, 0); if (err) return libbpf_err(err); @@ -10868,11 +11477,14 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p } link->link.fd = pfd; } - if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { - err = -errno; - pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", - prog->name, pfd, errstr(err)); - goto err_out; + + if (!OPTS_GET(opts, dont_enable, false)) { + if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { + err = -errno; + pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", + prog->name, pfd, errstr(err)); + goto err_out; + } } return &link->link; @@ -11056,16 +11668,16 @@ static const char *tracefs_available_filter_functions_addrs(void) : TRACEFS"/available_filter_functions_addrs"; } -static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *kfunc_name, size_t offset) +static void gen_probe_legacy_event_name(char *buf, size_t buf_sz, + const char *name, size_t offset) { static int index = 0; int i; - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, - __sync_fetch_and_add(&index, 1)); + snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(), + __sync_fetch_and_add(&index, 1), name, offset); - /* sanitize binary_path in the probe name */ + /* sanitize name in the probe name */ for (i = 0; buf[i]; i++) { if (!isalnum(buf[i])) buf[i] = '_'; @@ -11151,8 +11763,6 @@ static const char *arch_specific_syscall_pfx(void) return "ia32"; #elif defined(__s390x__) return "s390x"; -#elif defined(__s390__) - return "s390"; #elif defined(__arm__) return "arm"; #elif defined(__aarch64__) @@ -11190,9 +11800,9 @@ int probe_kern_syscall_wrapper(int token_fd) return pfd >= 0 ? 1 : 0; } else { /* legacy mode */ - char probe_name[128]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0) return 0; @@ -11242,16 +11852,18 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, default: return libbpf_err_ptr(-EINVAL); } + if (!func_name && legacy) + return libbpf_err_ptr(-EOPNOTSUPP); if (!legacy) { pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, offset, -1 /* pid */, 0 /* ref_ctr_off */); } else { - char probe_name[256]; + char probe_name[MAX_EVENT_NAME_LEN]; - gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), - func_name, offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + func_name, offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -11261,21 +11873,21 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, offset, -1 /* pid */); } if (pfd < 0) { - err = -errno; - pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", + err = pfd; + pr_warn("prog '%s': failed to create %s '%s%s0x%zx' perf event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", - func_name, offset, - errstr(err)); + func_name ?: "", func_name ? "+" : "", + offset, errstr(err)); goto err_out; } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); err = libbpf_get_error(link); if (err) { close(pfd); - pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", + pr_warn("prog '%s': failed to attach to %s '%s%s0x%zx': %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", - func_name, offset, - errstr(err)); + func_name ?: "", func_name ? "+" : "", + offset, errstr(err)); goto err_clean_legacy; } if (legacy) { @@ -11400,7 +12012,8 @@ static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type, * * [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG") */ - char sym_trim[256], *psym_trim = sym_trim, *sym_sfx; + char sym_trim[256], *psym_trim = sym_trim; + const char *sym_sfx; if (!(sym_sfx = strstr(sym_name, ".llvm."))) return 0; @@ -11590,7 +12203,16 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, if (addrs && syms) return libbpf_err_ptr(-EINVAL); - if (pattern) { + /* + * Exact function name (no wildcards) without unique_match: + * bypass kallsyms parsing and pass the symbol directly to the + * kernel via syms[] array. When unique_match is set, fall + * through to the slow path which detects duplicate symbols. + */ + if (pattern && !strpbrk(pattern, "*?") && !unique_match) { + syms = &pattern; + cnt = 1; + } else if (pattern) { if (has_available_filter_functions_addrs()) err = libbpf_available_kprobes_parse(&res); else @@ -11633,6 +12255,14 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); if (link_fd < 0) { err = -errno; + /* + * Normalize error code: when exact name bypasses kallsyms + * parsing, kernel returns ESRCH from ftrace_lookup_symbols(). + * Convert to ENOENT for API consistency with the pattern + * matching path which returns ENOENT from userspace. + */ + if (err == -ESRCH) + err = -ENOENT; pr_warn("prog '%s': failed to attach: %s\n", prog->name, errstr(err)); goto error; @@ -11795,20 +12425,6 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru return ret; } -static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, - const char *binary_path, uint64_t offset) -{ - int i; - - snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); - - /* sanitize binary_path in the probe name */ - for (i = 0; buf[i]; i++) { - if (!isalnum(buf[i])) - buf[i] = '_'; - } -} - static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset) { @@ -11953,8 +12569,6 @@ static const char *arch_specific_lib_paths(void) return "/lib/i386-linux-gnu"; #elif defined(__s390x__) return "/lib/s390x-linux-gnu"; -#elif defined(__s390__) - return "/lib/s390-linux-gnu"; #elif defined(__arm__) && defined(__SOFTFP__) return "/lib/arm-linux-gnueabi"; #elif defined(__arm__) && !defined(__SOFTFP__) @@ -11999,7 +12613,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz) if (!search_paths[i]) continue; for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) { - char *next_path; + const char *next_path; int seg_len; if (s[0] == ':') @@ -12232,13 +12846,14 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid, ref_ctr_off); } else { - char probe_name[PATH_MAX + 64]; + char probe_name[MAX_EVENT_NAME_LEN]; if (ref_ctr_off) return libbpf_err_ptr(-EINVAL); - gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), - binary_path, func_offset); + gen_probe_legacy_event_name(probe_name, sizeof(probe_name), + strrchr(binary_path, '/') ? : binary_path, + func_offset); legacy_probe = strdup(probe_name); if (!legacy_probe) @@ -12248,7 +12863,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, binary_path, func_offset, pid); } if (pfd < 0) { - err = -errno; + err = pfd; pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, @@ -12769,6 +13384,34 @@ struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifi } struct bpf_link * +bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd, + const struct bpf_cgroup_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); + __u32 relative_id; + int relative_fd; + + if (!OPTS_VALID(opts, bpf_cgroup_opts)) + return libbpf_err_ptr(-EINVAL); + + relative_id = OPTS_GET(opts, relative_id, 0); + relative_fd = OPTS_GET(opts, relative_fd, 0); + + if (relative_fd && relative_id) { + pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + link_create_opts.cgroup.expected_revision = OPTS_GET(opts, expected_revision, 0); + link_create_opts.cgroup.relative_fd = relative_fd; + link_create_opts.cgroup.relative_id = relative_id; + link_create_opts.flags = OPTS_GET(opts, flags, 0); + + return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", &link_create_opts); +} + +struct bpf_link * bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, const struct bpf_tcx_opts *opts) { @@ -12858,7 +13501,7 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, if (target_fd) { LIBBPF_OPTS(bpf_link_create_opts, target_opts); - btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd); + btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd, prog->obj->token_fd); if (btf_id < 0) return libbpf_err_ptr(btf_id); @@ -13070,17 +13713,17 @@ int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map) int err; if (!bpf_map__is_struct_ops(map)) - return -EINVAL; + return libbpf_err(-EINVAL); if (map->fd < 0) { pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name); - return -EINVAL; + return libbpf_err(-EINVAL); } st_ops_link = container_of(link, struct bpf_link_struct_ops, link); /* Ensure the type of a link is correct */ if (st_ops_link->map_fd < 0) - return -EINVAL; + return libbpf_err(-EINVAL); err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0); /* It can be EBUSY if the map has been used to create or @@ -13306,7 +13949,6 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; - attr.sample_period = sample_period; attr.wakeup_events = sample_period; p.attr = &attr; @@ -13666,12 +14308,12 @@ int bpf_program__set_attach_target(struct bpf_program *prog, if (!prog || attach_prog_fd < 0) return libbpf_err(-EINVAL); - if (prog->obj->loaded) + if (prog->obj->state >= OBJ_LOADED) return libbpf_err(-EINVAL); if (attach_prog_fd && !attach_func_name) { - /* remember attach_prog_fd and let bpf_program__load() find - * BTF ID during the program load + /* Store attach_prog_fd. The BTF ID will be resolved later during + * the normal object/program load phase. */ prog->attach_prog_fd = attach_prog_fd; return 0; @@ -13679,7 +14321,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog, if (attach_prog_fd) { btf_id = libbpf_find_prog_btf_id(attach_func_name, - attach_prog_fd); + attach_prog_fd, prog->obj->token_fd); if (btf_id < 0) return libbpf_err(btf_id); } else { @@ -13703,6 +14345,37 @@ int bpf_program__set_attach_target(struct bpf_program *prog, return 0; } +int bpf_program__assoc_struct_ops(struct bpf_program *prog, struct bpf_map *map, + struct bpf_prog_assoc_struct_ops_opts *opts) +{ + int prog_fd, map_fd; + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + pr_warn("prog '%s': can't associate BPF program without FD (was it loaded?)\n", + prog->name); + return libbpf_err(-EINVAL); + } + + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) { + pr_warn("prog '%s': can't associate struct_ops program\n", prog->name); + return libbpf_err(-EINVAL); + } + + map_fd = bpf_map__fd(map); + if (map_fd < 0) { + pr_warn("map '%s': can't associate BPF map without FD (was it created?)\n", map->name); + return libbpf_err(-EINVAL); + } + + if (!bpf_map__is_struct_ops(map)) { + pr_warn("map '%s': can't associate non-struct_ops map\n", map->name); + return libbpf_err(-EINVAL); + } + + return bpf_prog_assoc_struct_ops(prog_fd, map_fd, opts); +} + int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) { int err = 0, n, len, start, end = -1; @@ -13968,7 +14641,10 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) if (!map_skel->mmaped) continue; - *map_skel->mmaped = map->mmaped; + if (map->def.type == BPF_MAP_TYPE_ARENA) + *map_skel->mmaped = map->mmaped + map->obj->arena_data_off; + else + *map_skel->mmaped = map->mmaped; } return 0; @@ -14034,6 +14710,12 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) } link = map_skel->link; + if (!link) { + pr_warn("map '%s': BPF map skeleton link is uninitialized\n", + bpf_map__name(map)); + continue; + } + if (*link) continue; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 3020ee45303a..bba4e8464396 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -24,8 +24,25 @@ extern "C" { #endif +/** + * @brief **libbpf_major_version()** provides the major version of libbpf. + * @return An integer, the major version number + */ LIBBPF_API __u32 libbpf_major_version(void); + +/** + * @brief **libbpf_minor_version()** provides the minor version of libbpf. + * @return An integer, the minor version number + */ LIBBPF_API __u32 libbpf_minor_version(void); + +/** + * @brief **libbpf_version_string()** provides the version of libbpf in a + * human-readable form, e.g., "v1.7". + * @return Pointer to a static string containing the version + * + * The format is *not* a part of a stable API and may change in the future. + */ LIBBPF_API const char *libbpf_version_string(void); enum libbpf_errno { @@ -49,6 +66,14 @@ enum libbpf_errno { __LIBBPF_ERRNO__END, }; +/** + * @brief **libbpf_strerror()** converts the provided error code into a + * human-readable string. + * @param err The error code to convert + * @param buf Pointer to a buffer where the error message will be stored + * @param size The number of bytes in the buffer + * @return 0, on success; negative error code, otherwise + */ LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); /** @@ -242,6 +267,19 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts); /** + * @brief **bpf_object__prepare()** prepares BPF object for loading: + * performs ELF processing, relocations, prepares final state of BPF program + * instructions (accessible with bpf_program__insns()), creates and + * (potentially) pins maps. Leaves BPF object in the state ready for program + * loading. + * @param obj Pointer to a valid BPF object instance returned by + * **bpf_object__open*()** API + * @return 0, on success; negative error code, otherwise, error code is + * stored in errno + */ +LIBBPF_API int bpf_object__prepare(struct bpf_object *obj); + +/** * @brief **bpf_object__load()** loads BPF object into kernel. * @param obj Pointer to a valid BPF object instance returned by * **bpf_object__open*()** APIs @@ -410,7 +448,7 @@ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); /** * @brief **bpf_program__unpin()** unpins the BPF program from a file - * in the BPFFS specified by a path. This decrements the programs + * in the BPFFS specified by a path. This decrements program's in-kernel * reference count. * * The file pinning the BPF program can also be unlinked by a different @@ -443,14 +481,12 @@ LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); /** * @brief **bpf_link__unpin()** unpins the BPF link from a file - * in the BPFFS specified by a path. This decrements the links - * reference count. + * in the BPFFS. This decrements link's in-kernel reference count. * * The file pinning the BPF link can also be unlinked by a different * process in which case this function will return an error. * - * @param prog BPF program to unpin - * @param path file path to the pin in a BPF file system + * @param link BPF link to unpin * @return 0, on success; negative error code, otherwise */ LIBBPF_API int bpf_link__unpin(struct bpf_link *link); @@ -486,9 +522,11 @@ struct bpf_perf_event_opts { __u64 bpf_cookie; /* don't use BPF link when attach BPF program */ bool force_ioctl_attach; + /* don't automatically enable the event */ + bool dont_enable; size_t :0; }; -#define bpf_perf_event_opts__last_field force_ioctl_attach +#define bpf_perf_event_opts__last_field dont_enable LIBBPF_API struct bpf_link * bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd); @@ -519,7 +557,7 @@ struct bpf_kprobe_opts { size_t sz; /* custom user-provided value fetchable through bpf_get_attach_cookie() */ __u64 bpf_cookie; - /* function's offset to install kprobe to */ + /* function offset, or raw address if func_name == NULL */ size_t offset; /* kprobe is return probe */ bool retprobe; @@ -527,11 +565,36 @@ struct bpf_kprobe_opts { enum probe_attach_mode attach_mode; size_t :0; }; + #define bpf_kprobe_opts__last_field attach_mode +/** + * @brief **bpf_program__attach_kprobe()** attaches a BPF program to a + * kernel function entry or return. + * + * @param prog BPF program to attach + * @param retprobe Attach to function return + * @param func_name Name of the kernel function to attach to + * @return Reference to the newly created BPF link; or NULL is returned on + * error, error code is stored in errno + */ LIBBPF_API struct bpf_link * bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe, const char *func_name); + +/** + * @brief **bpf_program__attach_kprobe_opts()** is just like + * bpf_program__attach_kprobe() except with an options struct + * for various configurations. + * + * @param prog BPF program to attach + * @param func_name Name of the kernel function to attach to. If NULL, + * opts->offset is treated as a raw kernel address. Raw-address attach + * is supported with PROBE_ATTACH_MODE_PERF and PROBE_ATTACH_MODE_LINK. + * @param opts Options for altering program attachment + * @return Reference to the newly created BPF link; or NULL is returned on + * error, error code is stored in errno + */ LIBBPF_API struct bpf_link * bpf_program__attach_kprobe_opts(const struct bpf_program *prog, const char *func_name, @@ -864,6 +927,21 @@ LIBBPF_API struct bpf_link * bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, const struct bpf_netkit_opts *opts); +struct bpf_cgroup_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + __u32 flags; + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + size_t :0; +}; +#define bpf_cgroup_opts__last_field expected_revision + +LIBBPF_API struct bpf_link * +bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd, + const struct bpf_cgroup_opts *opts); + struct bpf_map; LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map); @@ -927,6 +1005,12 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); +LIBBPF_API struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog); +LIBBPF_API __u32 bpf_program__func_info_cnt(const struct bpf_program *prog); + +LIBBPF_API struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog); +LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog); + /** * @brief **bpf_program__set_attach_target()** sets BTF-based attach target * for supported BPF program types: @@ -934,14 +1018,35 @@ LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, * - fentry/fexit/fmod_ret; * - lsm; * - freplace. - * @param prog BPF program to set the attach type for - * @param type attach type to set the BPF map to have + * @param prog BPF program to configure; must be not yet loaded. + * @param attach_prog_fd FD of target BPF program (for freplace/extension). + * If >0 and func name omitted, defers BTF ID resolution. + * @param attach_func_name Target function name. Used either with + * attach_prog_fd to find destination BTF type ID in that BPF program, or + * alone (no attach_prog_fd) to resolve kernel (vmlinux/module) BTF ID. + * Must be provided if attach_prog_fd is 0. * @return error code; or 0 if no error occurred. */ LIBBPF_API int bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd, const char *attach_func_name); +struct bpf_prog_assoc_struct_ops_opts; /* defined in bpf.h */ + +/** + * @brief **bpf_program__assoc_struct_ops()** associates a BPF program with a + * struct_ops map. + * + * @param prog BPF program + * @param map struct_ops map to be associated with the BPF program + * @param opts optional options, can be NULL + * + * @return 0, on success; negative error code, otherwise + */ +LIBBPF_API int +bpf_program__assoc_struct_ops(struct bpf_program *prog, struct bpf_map *map, + struct bpf_prog_assoc_struct_ops_opts *opts); + /** * @brief **bpf_object__find_map_by_name()** returns BPF map of * the given name, if it exists within the passed BPF object @@ -1037,6 +1142,7 @@ LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map); /** * @brief **bpf_map__set_value_size()** sets map value size. * @param map the BPF map instance + * @param size the new value size * @return 0, on success; negative error, otherwise * * There is a special case for maps with associated memory-mapped regions, like @@ -1135,13 +1241,14 @@ LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map); * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** * @param value pointer to memory in which looked up value will be stored * @param value_sz size in byte of value data memory; it has to match BPF map - * definition's **value_size**. For per-CPU BPF maps value size has to be - * a product of BPF map value size and number of possible CPUs in the system - * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for - * per-CPU values value size has to be aligned up to closest 8 bytes for - * alignment reasons, so expected size is: `round_up(value_size, 8) - * * libbpf_num_possible_cpus()`. - * @flags extra flags passed to kernel for this operation + * definition's **value_size**. For per-CPU BPF maps, value size can be + * `value_size` if either **BPF_F_CPU** or **BPF_F_ALL_CPUS** is specified + * in **flags**, otherwise a product of BPF map value size and number of + * possible CPUs in the system (could be fetched with + * **libbpf_num_possible_cpus()**). Note also that for per-CPU values value + * size has to be aligned up to closest 8 bytes, so expected size is: + * `round_up(value_size, 8) * libbpf_num_possible_cpus()`. + * @param flags extra flags passed to kernel for this operation * @return 0, on success; negative error, otherwise * * **bpf_map__lookup_elem()** is high-level equivalent of @@ -1158,14 +1265,8 @@ LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map, * @param key pointer to memory containing bytes of the key * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** * @param value pointer to memory containing bytes of the value - * @param value_sz size in byte of value data memory; it has to match BPF map - * definition's **value_size**. For per-CPU BPF maps value size has to be - * a product of BPF map value size and number of possible CPUs in the system - * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for - * per-CPU values value size has to be aligned up to closest 8 bytes for - * alignment reasons, so expected size is: `round_up(value_size, 8) - * * libbpf_num_possible_cpus()`. - * @flags extra flags passed to kernel for this operation + * @param value_sz refer to **bpf_map__lookup_elem**'s description.' + * @param flags extra flags passed to kernel for this operation * @return 0, on success; negative error, otherwise * * **bpf_map__update_elem()** is high-level equivalent of @@ -1181,7 +1282,7 @@ LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map, * @param map BPF map to delete element from * @param key pointer to memory containing bytes of the key * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size** - * @flags extra flags passed to kernel for this operation + * @param flags extra flags passed to kernel for this operation * @return 0, on success; negative error, otherwise * * **bpf_map__delete_elem()** is high-level equivalent of @@ -1204,7 +1305,7 @@ LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map, * per-CPU values value size has to be aligned up to closest 8 bytes for * alignment reasons, so expected size is: `round_up(value_size, 8) * * libbpf_num_possible_cpus()`. - * @flags extra flags passed to kernel for this operation + * @param flags extra flags passed to kernel for this operation * @return 0, on success; negative error, otherwise * * **bpf_map__lookup_and_delete_elem()** is high-level equivalent of @@ -1230,6 +1331,28 @@ LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map, */ LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map, const void *cur_key, void *next_key, size_t key_sz); +/** + * @brief **bpf_map__set_exclusive_program()** sets a map to be exclusive to the + * specified program. This must be called *before* the map is created. + * + * @param map BPF map to make exclusive. + * @param prog BPF program to be the exclusive user of the map. Must belong + * to the same bpf_object as the map. + * @return 0 on success; a negative error code otherwise. + * + * This function must be called after the BPF object is opened but before + * it is loaded. Once the object is loaded, only the specified program + * will be able to access the map's contents. + */ +LIBBPF_API int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog); + +/** + * @brief **bpf_map__exclusive_program()** returns the exclusive program + * that is registered with the map (if any). + * @param map BPF map to which the exclusive program is registered. + * @return the registered exclusive program. + */ +LIBBPF_API struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map); struct bpf_xdp_set_link_opts { size_t sz; @@ -1270,6 +1393,7 @@ enum bpf_tc_attach_point { BPF_TC_INGRESS = 1 << 0, BPF_TC_EGRESS = 1 << 1, BPF_TC_CUSTOM = 1 << 2, + BPF_TC_QDISC = 1 << 3, }; #define BPF_TC_PARENT(a, b) \ @@ -1284,9 +1408,11 @@ struct bpf_tc_hook { int ifindex; enum bpf_tc_attach_point attach_point; __u32 parent; + __u32 handle; + const char *qdisc; size_t :0; }; -#define bpf_tc_hook__last_field parent +#define bpf_tc_hook__last_field qdisc struct bpf_tc_opts { size_t sz; @@ -1551,6 +1677,7 @@ struct perf_buffer_opts { * @param sample_cb function called on each received data record * @param lost_cb function called when record loss has occurred * @param ctx user-provided extra context passed into *sample_cb* and *lost_cb* + * @param opts optional parameters for the perf buffer, can be null * @return a new instance of struct perf_buffer on success, NULL on error with * *errno* containing an error code */ @@ -1771,9 +1898,10 @@ struct gen_loader_opts { const char *insns; __u32 data_sz; __u32 insns_sz; + bool gen_hash; }; -#define gen_loader_opts__last_field insns_sz +#define gen_loader_opts__last_field gen_hash LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts); @@ -1918,6 +2046,23 @@ LIBBPF_API int libbpf_register_prog_handler(const char *sec, */ LIBBPF_API int libbpf_unregister_prog_handler(int handler_id); +/** + * @brief **bpf_program__clone()** loads a single BPF program from a prepared + * BPF object into the kernel, returning its file descriptor. + * + * The BPF object must have been previously prepared with + * **bpf_object__prepare()**. If @opts is provided, any non-zero field + * overrides the defaults derived from the program/object internals. + * If @opts is NULL, all fields are populated automatically. + * + * The returned FD is owned by the caller and must be closed with close(). + * + * @param prog BPF program from a prepared object + * @param opts Optional load options; non-zero fields override defaults + * @return program FD (>= 0) on success; negative error code on failure + */ +LIBBPF_API int bpf_program__clone(struct bpf_program *prog, const struct bpf_prog_load_opts *opts); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index a8b2936a1646..dfed8d60af05 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -436,4 +436,28 @@ LIBBPF_1.6.0 { bpf_linker__add_buf; bpf_linker__add_fd; bpf_linker__new_fd; + bpf_object__prepare; + bpf_prog_stream_read; + bpf_program__attach_cgroup_opts; + bpf_program__func_info; + bpf_program__func_info_cnt; + bpf_program__line_info; + bpf_program__line_info_cnt; + btf__add_decl_attr; + btf__add_type_attr; } LIBBPF_1.5.0; + +LIBBPF_1.7.0 { + global: + bpf_map__set_exclusive_program; + bpf_map__exclusive_program; + bpf_prog_assoc_struct_ops; + bpf_program__assoc_struct_ops; + btf__permute; +} LIBBPF_1.6.0; + +LIBBPF_1.8.0 { + global: + bpf_program__clone; + btf__new_empty_opts; +} LIBBPF_1.7.0; diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c deleted file mode 100644 index 6b180172ec6b..000000000000 --- a/tools/lib/bpf/libbpf_errno.c +++ /dev/null @@ -1,75 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -/* - * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> - * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> - * Copyright (C) 2015 Huawei Inc. - * Copyright (C) 2017 Nicira, Inc. - */ - -#undef _GNU_SOURCE -#include <stdio.h> -#include <string.h> - -#include "libbpf.h" -#include "libbpf_internal.h" - -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 - -#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) -#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) -#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START) - -static const char *libbpf_strerror_table[NR_ERRNO] = { - [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf", - [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid", - [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost", - [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch", - [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf", - [ERRCODE_OFFSET(RELOC)] = "Relocation failed", - [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading", - [ERRCODE_OFFSET(PROG2BIG)] = "Program too big", - [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version", - [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", - [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", - [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", - [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing", -}; - -int libbpf_strerror(int err, char *buf, size_t size) -{ - int ret; - - if (!buf || !size) - return libbpf_err(-EINVAL); - - err = err > 0 ? err : -err; - - if (err < __LIBBPF_ERRNO__START) { - ret = strerror_r(err, buf, size); - buf[size - 1] = '\0'; - return libbpf_err_errno(ret); - } - - if (err < __LIBBPF_ERRNO__END) { - const char *msg; - - msg = libbpf_strerror_table[ERRNO_OFFSET(err)]; - ret = snprintf(buf, size, "%s", msg); - buf[size - 1] = '\0'; - /* The length of the buf and msg is positive. - * A negative number may be returned only when the - * size exceeds INT_MAX. Not likely to appear. - */ - if (ret >= size) - return libbpf_err(-ERANGE); - return 0; - } - - ret = snprintf(buf, size, "Unknown libbpf error %d", err); - buf[size - 1] = '\0'; - if (ret >= size) - return libbpf_err(-ERANGE); - return libbpf_err(-ENOENT); -} diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index de498e2dd6b0..3781c45b46d3 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -74,6 +74,8 @@ #define ELF64_ST_VISIBILITY(o) ((o) & 0x03) #endif +#define JUMPTABLES_SEC ".jumptables" + #define BTF_INFO_ENC(kind, kind_flag, vlen) \ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) @@ -172,6 +174,16 @@ do { \ #define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) #define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) +/** + * @brief **libbpf_errstr()** returns string corresponding to numeric errno + * @param err negative numeric errno + * @return pointer to string representation of the errno, that is invalidated + * upon the next call. + */ +const char *libbpf_errstr(int err); + +#define errstr(err) libbpf_errstr(err) + #ifndef __has_builtin #define __has_builtin(x) 0 #endif @@ -380,6 +392,12 @@ enum kern_feature_id { FEAT_ARG_CTX_TAG, /* Kernel supports '?' at the front of datasec names */ FEAT_BTF_QMARK_DATASEC, + /* Kernel supports LDIMM64 imm offsets past 512 MiB. */ + FEAT_LDIMM64_FULL_RANGE_OFF, + /* Kernel supports uprobe syscall */ + FEAT_UPROBE_SYSCALL, + /* Kernel supports BTF layout information */ + FEAT_BTF_LAYOUT, __FEAT_CNT, }; @@ -396,6 +414,7 @@ struct kern_feature_cache { bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); +void bpf_object_set_feat_cache(struct bpf_object *obj, struct kern_feature_cache *cache); int probe_kern_syscall_wrapper(int token_fd); int probe_memcg_account(int token_fd); @@ -406,9 +425,14 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len, int token_fd); +int libbpf__load_raw_btf_hdr(const struct btf_header *hdr, + const char *raw_types, const char *str_sec, + const char *layout_sec, int token_fd); +struct btf *bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *orig_btf); int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level, int token_fd); +struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, @@ -666,6 +690,15 @@ static inline int sys_dup3(int oldfd, int newfd, int flags) return syscall(__NR_dup3, oldfd, newfd, flags); } +/* Some versions of Android don't provide memfd_create() in their libc + * implementation, so avoid complications and just go straight to Linux + * syscall. + */ +static inline int sys_memfd_create(const char *name, unsigned flags) +{ + return syscall(__NR_memfd_create, name, flags); +} + /* Point *fixed_fd* to the same file that *tmp_fd* points to. * Regardless of success, *tmp_fd* is closed. * Whatever *fixed_fd* pointed to is closed silently. @@ -702,6 +735,11 @@ static inline bool is_pow_of_2(size_t x) return x && (x & (x - 1)) == 0; } +static inline __u32 ror32(__u32 v, int bits) +{ + return (v >> bits) | (v << (32 - bits)); +} + #define PROG_LOAD_ATTEMPTS 5 int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts); @@ -726,4 +764,8 @@ int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern, int probe_fd(int fd); +#define SHA256_DIGEST_LENGTH 32 +#define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64) + +void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]); #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 9dfbe7750f56..b70d9637ecf5 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -218,18 +218,10 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) return libbpf_err(ret); } -int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len, - int token_fd) +int libbpf__load_raw_btf_hdr(const struct btf_header *hdr, const char *raw_types, + const char *str_sec, const char *layout_sec, + int token_fd) { - struct btf_header hdr = { - .magic = BTF_MAGIC, - .version = BTF_VERSION, - .hdr_len = sizeof(struct btf_header), - .type_len = types_len, - .str_off = types_len, - .str_len = str_len, - }; LIBBPF_OPTS(bpf_btf_load_opts, opts, .token_fd = token_fd, .btf_flags = token_fd ? BPF_F_TOKEN_FD : 0, @@ -237,14 +229,16 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, int btf_fd, btf_len; __u8 *raw_btf; - btf_len = hdr.hdr_len + hdr.type_len + hdr.str_len; + btf_len = hdr->hdr_len + hdr->type_off + hdr->type_len + hdr->str_len + hdr->layout_len; raw_btf = malloc(btf_len); if (!raw_btf) return -ENOMEM; - memcpy(raw_btf, &hdr, sizeof(hdr)); - memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); - memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); + memcpy(raw_btf, hdr, sizeof(*hdr)); + memcpy(raw_btf + hdr->hdr_len + hdr->type_off, raw_types, hdr->type_len); + memcpy(raw_btf + hdr->hdr_len + hdr->str_off, str_sec, hdr->str_len); + if (layout_sec) + memcpy(raw_btf + hdr->hdr_len + hdr->layout_off, layout_sec, hdr->layout_len); btf_fd = bpf_btf_load(raw_btf, btf_len, &opts); @@ -252,6 +246,22 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, return btf_fd; } +int libbpf__load_raw_btf(const char *raw_types, size_t types_len, + const char *str_sec, size_t str_len, + int token_fd) +{ + struct btf_header hdr = { + .magic = BTF_MAGIC, + .version = BTF_VERSION, + .hdr_len = sizeof(struct btf_header), + .type_len = types_len, + .str_off = types_len, + .str_len = str_len, + }; + + return libbpf__load_raw_btf_hdr(&hdr, raw_types, str_sec, NULL, token_fd); +} + static int load_local_storage_btf(void) { const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l"; @@ -364,6 +374,10 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_SOCKHASH: case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: break; + case BPF_MAP_TYPE_INSN_ARRAY: + key_size = sizeof(__u32); + value_size = sizeof(struct bpf_insn_array_value); + break; case BPF_MAP_TYPE_UNSPEC: default: return -EOPNOTSUPP; diff --git a/tools/lib/bpf/libbpf_utils.c b/tools/lib/bpf/libbpf_utils.c new file mode 100644 index 000000000000..ac3beae54cf6 --- /dev/null +++ b/tools/lib/bpf/libbpf_utils.c @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + * Copyright (C) 2017 Nicira, Inc. + */ + +#undef _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <inttypes.h> +#include <linux/kernel.h> + +#include "libbpf.h" +#include "libbpf_internal.h" + +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + +/* make sure libbpf doesn't use kernel-only integer typedefs */ +#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 + +#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START) +#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c) +#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START) + +static const char *libbpf_strerror_table[NR_ERRNO] = { + [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf", + [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid", + [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost", + [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch", + [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf", + [ERRCODE_OFFSET(RELOC)] = "Relocation failed", + [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading", + [ERRCODE_OFFSET(PROG2BIG)] = "Program too big", + [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version", + [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type", + [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message", + [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence", + [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing", +}; + +int libbpf_strerror(int err, char *buf, size_t size) +{ + int ret; + + if (!buf || !size) + return libbpf_err(-EINVAL); + + err = err > 0 ? err : -err; + + if (err < __LIBBPF_ERRNO__START) { + ret = strerror_r(err, buf, size); + buf[size - 1] = '\0'; + return libbpf_err_errno(ret); + } + + if (err < __LIBBPF_ERRNO__END) { + const char *msg; + + msg = libbpf_strerror_table[ERRNO_OFFSET(err)]; + ret = snprintf(buf, size, "%s", msg); + buf[size - 1] = '\0'; + /* The length of the buf and msg is positive. + * A negative number may be returned only when the + * size exceeds INT_MAX. Not likely to appear. + */ + if (ret >= size) + return libbpf_err(-ERANGE); + return 0; + } + + ret = snprintf(buf, size, "Unknown libbpf error %d", err); + buf[size - 1] = '\0'; + if (ret >= size) + return libbpf_err(-ERANGE); + return libbpf_err(-ENOENT); +} + +const char *libbpf_errstr(int err) +{ + static __thread char buf[12]; + + if (err > 0) + err = -err; + + switch (err) { + case -E2BIG: return "-E2BIG"; + case -EACCES: return "-EACCES"; + case -EADDRINUSE: return "-EADDRINUSE"; + case -EADDRNOTAVAIL: return "-EADDRNOTAVAIL"; + case -EAGAIN: return "-EAGAIN"; + case -EALREADY: return "-EALREADY"; + case -EBADF: return "-EBADF"; + case -EBADFD: return "-EBADFD"; + case -EBUSY: return "-EBUSY"; + case -ECANCELED: return "-ECANCELED"; + case -ECHILD: return "-ECHILD"; + case -EDEADLK: return "-EDEADLK"; + case -EDOM: return "-EDOM"; + case -EEXIST: return "-EEXIST"; + case -EFAULT: return "-EFAULT"; + case -EFBIG: return "-EFBIG"; + case -EILSEQ: return "-EILSEQ"; + case -EINPROGRESS: return "-EINPROGRESS"; + case -EINTR: return "-EINTR"; + case -EINVAL: return "-EINVAL"; + case -EIO: return "-EIO"; + case -EISDIR: return "-EISDIR"; + case -ELOOP: return "-ELOOP"; + case -EMFILE: return "-EMFILE"; + case -EMLINK: return "-EMLINK"; + case -EMSGSIZE: return "-EMSGSIZE"; + case -ENAMETOOLONG: return "-ENAMETOOLONG"; + case -ENFILE: return "-ENFILE"; + case -ENODATA: return "-ENODATA"; + case -ENODEV: return "-ENODEV"; + case -ENOENT: return "-ENOENT"; + case -ENOEXEC: return "-ENOEXEC"; + case -ENOLINK: return "-ENOLINK"; + case -ENOMEM: return "-ENOMEM"; + case -ENOSPC: return "-ENOSPC"; + case -ENOTBLK: return "-ENOTBLK"; + case -ENOTDIR: return "-ENOTDIR"; + case -ENOTSUPP: return "-ENOTSUPP"; + case -ENOTTY: return "-ENOTTY"; + case -ENXIO: return "-ENXIO"; + case -EOPNOTSUPP: return "-EOPNOTSUPP"; + case -EOVERFLOW: return "-EOVERFLOW"; + case -EPERM: return "-EPERM"; + case -EPIPE: return "-EPIPE"; + case -EPROTO: return "-EPROTO"; + case -EPROTONOSUPPORT: return "-EPROTONOSUPPORT"; + case -ERANGE: return "-ERANGE"; + case -EROFS: return "-EROFS"; + case -ESPIPE: return "-ESPIPE"; + case -ESRCH: return "-ESRCH"; + case -ETXTBSY: return "-ETXTBSY"; + case -EUCLEAN: return "-EUCLEAN"; + case -EXDEV: return "-EXDEV"; + default: + snprintf(buf, sizeof(buf), "%d", err); + return buf; + } +} + +static inline __u32 get_unaligned_be32(const void *p) +{ + __be32 val; + + memcpy(&val, p, sizeof(val)); + return be32_to_cpu(val); +} + +static inline void put_unaligned_be32(__u32 val, void *p) +{ + __be32 be_val = cpu_to_be32(val); + + memcpy(p, &be_val, sizeof(be_val)); +} + +#define SHA256_BLOCK_LENGTH 64 +#define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z))) +#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define Sigma_0(x) (ror32((x), 2) ^ ror32((x), 13) ^ ror32((x), 22)) +#define Sigma_1(x) (ror32((x), 6) ^ ror32((x), 11) ^ ror32((x), 25)) +#define sigma_0(x) (ror32((x), 7) ^ ror32((x), 18) ^ ((x) >> 3)) +#define sigma_1(x) (ror32((x), 17) ^ ror32((x), 19) ^ ((x) >> 10)) + +static const __u32 sha256_K[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, + 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, + 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, + 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, + 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, + 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +}; + +#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) \ + { \ + __u32 tmp = h + Sigma_1(e) + Ch(e, f, g) + sha256_K[i] + w[i]; \ + d += tmp; \ + h = tmp + Sigma_0(a) + Maj(a, b, c); \ + } + +static void sha256_blocks(__u32 state[8], const __u8 *data, size_t nblocks) +{ + while (nblocks--) { + __u32 a = state[0]; + __u32 b = state[1]; + __u32 c = state[2]; + __u32 d = state[3]; + __u32 e = state[4]; + __u32 f = state[5]; + __u32 g = state[6]; + __u32 h = state[7]; + __u32 w[64]; + int i; + + for (i = 0; i < 16; i++) + w[i] = get_unaligned_be32(&data[4 * i]); + for (; i < ARRAY_SIZE(w); i++) + w[i] = sigma_1(w[i - 2]) + w[i - 7] + + sigma_0(w[i - 15]) + w[i - 16]; + for (i = 0; i < ARRAY_SIZE(w); i += 8) { + SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h); + SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g); + SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f); + SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e); + SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d); + SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c); + SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b); + SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a); + } + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + state[5] += f; + state[6] += g; + state[7] += h; + data += SHA256_BLOCK_LENGTH; + } +} + +void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]) +{ + __u32 state[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 }; + const __be64 bitcount = cpu_to_be64((__u64)len * 8); + __u8 final_data[2 * SHA256_BLOCK_LENGTH] = { 0 }; + size_t final_len = len % SHA256_BLOCK_LENGTH; + int i; + + sha256_blocks(state, data, len / SHA256_BLOCK_LENGTH); + + memcpy(final_data, data + len - final_len, final_len); + final_data[final_len] = 0x80; + final_len = roundup(final_len + 9, SHA256_BLOCK_LENGTH); + memcpy(&final_data[final_len - 8], &bitcount, 8); + + sha256_blocks(state, final_data, final_len / SHA256_BLOCK_LENGTH); + + for (i = 0; i < ARRAY_SIZE(state); i++) + put_unaligned_be32(state[i], &out[4 * i]); +} diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 28c58fb17250..c446c0cd8cf9 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@ #define __LIBBPF_VERSION_H #define LIBBPF_MAJOR_VERSION 1 -#define LIBBPF_MINOR_VERSION 6 +#define LIBBPF_MINOR_VERSION 8 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index b52f71c59616..78f92c39290a 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -25,7 +25,6 @@ #include "btf.h" #include "libbpf_internal.h" #include "strset.h" -#include "str_error.h" #define BTF_EXTERN_SEC ".extern" @@ -573,7 +572,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz); - fd = memfd_create(filename, 0); + fd = sys_memfd_create(filename, 0); if (fd < 0) { ret = -errno; pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret)); @@ -582,7 +581,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz, written = 0; while (written < buf_sz) { - ret = write(fd, buf, buf_sz); + ret = write(fd, buf + written, buf_sz - written); if (ret < 0) { ret = -errno; pr_warn("failed to write '%s': %s\n", filename, errstr(ret)); @@ -1376,7 +1375,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj } else { if (!secs_match(dst_sec, src_sec)) { pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } /* "license" and "version" sections are deduped */ @@ -2026,6 +2025,9 @@ static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj, obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx; return 0; } + + if (strcmp(src_sec->sec_name, JUMPTABLES_SEC) == 0) + goto add_sym; } if (sym_bind == STB_LOCAL) @@ -2163,7 +2165,7 @@ add_sym: obj->sym_map[src_sym_idx] = dst_sym_idx; - if (sym_type == STT_SECTION && dst_sym) { + if (sym_type == STT_SECTION && dst_sec) { dst_sec->sec_sym_idx = dst_sym_idx; dst_sym->st_value = 0; } @@ -2223,7 +2225,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob } } else if (!secs_match(dst_sec, src_sec)) { pr_warn("sections %s are not compatible\n", src_sec->sec_name); - return -1; + return -EINVAL; } /* shdr->sh_link points to SYMTAB */ diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 68a2def17175..c9a78fb16f11 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -143,7 +143,7 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, struct nlmsghdr *nh; int len, ret; - ret = alloc_iov(&iov, 4096); + ret = alloc_iov(&iov, 8192); if (ret) goto done; @@ -212,6 +212,8 @@ start: } } } + if (len) + pr_warn("Invalid message or trailing data in Netlink response: %d bytes left\n", len); } ret = 0; done: @@ -529,9 +531,9 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id) } -typedef int (*qdisc_config_t)(struct libbpf_nla_req *req); +typedef int (*qdisc_config_t)(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook); -static int clsact_config(struct libbpf_nla_req *req) +static int clsact_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook) { req->tc.tcm_parent = TC_H_CLSACT; req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0); @@ -539,6 +541,16 @@ static int clsact_config(struct libbpf_nla_req *req) return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact")); } +static int qdisc_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook) +{ + const char *qdisc = OPTS_GET(hook, qdisc, NULL); + + req->tc.tcm_parent = OPTS_GET(hook, parent, TC_H_ROOT); + req->tc.tcm_handle = OPTS_GET(hook, handle, 0); + + return nlattr_add(req, TCA_KIND, qdisc, strlen(qdisc) + 1); +} + static int attach_point_to_config(struct bpf_tc_hook *hook, qdisc_config_t *config) { @@ -552,6 +564,9 @@ static int attach_point_to_config(struct bpf_tc_hook *hook, return 0; case BPF_TC_CUSTOM: return -EOPNOTSUPP; + case BPF_TC_QDISC: + *config = &qdisc_config; + return 0; default: return -EINVAL; } @@ -596,7 +611,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags) req.tc.tcm_family = AF_UNSPEC; req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0); - ret = config(&req); + ret = config(&req, hook); if (ret < 0) return ret; @@ -639,6 +654,7 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook) case BPF_TC_INGRESS: case BPF_TC_EGRESS: return libbpf_err(__bpf_tc_detach(hook, NULL, true)); + case BPF_TC_QDISC: case BPF_TC_INGRESS | BPF_TC_EGRESS: return libbpf_err(tc_qdisc_delete(hook)); case BPF_TC_CUSTOM: diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 975e265eab3b..06663f9ea581 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -63,16 +63,16 @@ static int validate_nla(struct nlattr *nla, int maxtype, minlen = nla_attr_minlen[pt->type]; if (libbpf_nla_len(nla) < minlen) - return -1; + return -EINVAL; if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen) - return -1; + return -EINVAL; if (pt->type == LIBBPF_NLA_STRING) { char *data = libbpf_nla_data(nla); if (data[libbpf_nla_len(nla) - 1] != '\0') - return -1; + return -EINVAL; } return 0; @@ -118,19 +118,18 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, if (policy) { err = validate_nla(nla, maxtype, policy); if (err < 0) - goto errout; + return err; } - if (tb[type]) + if (tb[type]) { pr_warn("Attribute of type %#x found multiple times in message, " "previous attribute is being ignored.\n", type); + } tb[type] = nla; } - err = 0; -errout: - return err; + return 0; } /** diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 7632e9d41827..0ccc8f548cba 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -64,7 +64,6 @@ enum libbpf_print_level { #include "libbpf.h" #include "bpf.h" #include "btf.h" -#include "str_error.h" #include "libbpf_internal.h" #endif @@ -293,6 +292,8 @@ int bpf_core_parse_spec(const char *prog_name, const struct btf *btf, ++spec_str; if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) return -EINVAL; + if (access_idx < 0) + return -EINVAL; if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) return -E2BIG; spec_str += parsed_len; @@ -683,7 +684,7 @@ static int bpf_core_calc_field_relo(const char *prog_name, { const struct bpf_core_accessor *acc; const struct btf_type *t; - __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; + __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id, elem_id; const struct btf_member *m; const struct btf_type *mt; bool bitfield; @@ -706,8 +707,14 @@ static int bpf_core_calc_field_relo(const char *prog_name, if (!acc->name) { if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) { *val = spec->bit_offset / 8; - /* remember field size for load/store mem size */ - sz = btf__resolve_size(spec->btf, acc->type_id); + /* remember field size for load/store mem size; + * note, for arrays we care about individual element + * sizes, not the overall array size + */ + t = skip_mods_and_typedefs(spec->btf, acc->type_id, &elem_id); + while (btf_is_array(t)) + t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id); + sz = btf__resolve_size(spec->btf, elem_id); if (sz < 0) return -EINVAL; *field_sz = sz; @@ -767,7 +774,17 @@ static int bpf_core_calc_field_relo(const char *prog_name, case BPF_CORE_FIELD_BYTE_OFFSET: *val = byte_off; if (!bitfield) { - *field_sz = byte_sz; + /* remember field size for load/store mem size; + * note, for arrays we care about individual element + * sizes, not the overall array size + */ + t = skip_mods_and_typedefs(spec->btf, field_type_id, &elem_id); + while (btf_is_array(t)) + t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id); + sz = btf__resolve_size(spec->btf, elem_id); + if (sz < 0) + return -EINVAL; + *field_sz = sz; *type_id = field_type_id; } break; diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 9702b70da444..00ec4837a06d 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -21,7 +21,6 @@ #include "libbpf.h" #include "libbpf_internal.h" #include "bpf.h" -#include "str_error.h" struct ring { ring_buffer_sample_fn sample_cb; diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 4d5fa079b5d6..6a8f5c7a02eb 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -13,10 +13,15 @@ #include <unistd.h> #include <sys/syscall.h> #include <sys/mman.h> +#include <linux/keyctl.h> #include <stdlib.h> #include "bpf.h" #endif +#ifndef SHA256_DIGEST_LENGTH +#define SHA256_DIGEST_LENGTH 32 +#endif + #ifndef __NR_bpf # if defined(__mips__) && defined(_ABIO32) # define __NR_bpf 4355 @@ -64,6 +69,11 @@ struct bpf_load_and_run_opts { __u32 data_sz; __u32 insns_sz; const char *errstr; + void *signature; + __u32 signature_sz; + __s32 keyring_id; + void *excl_prog_hash; + __u32 excl_prog_hash_sz; }; long kern_sys_bpf(__u32 cmd, void *attr, __u32 attr_size); @@ -220,14 +230,19 @@ static inline int skel_map_create(enum bpf_map_type map_type, const char *map_name, __u32 key_size, __u32 value_size, - __u32 max_entries) + __u32 max_entries, + const void *excl_prog_hash, + __u32 excl_prog_hash_sz) { - const size_t attr_sz = offsetofend(union bpf_attr, map_extra); + const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size); union bpf_attr attr; memset(&attr, 0, attr_sz); attr.map_type = map_type; + attr.excl_prog_hash = (unsigned long) excl_prog_hash; + attr.excl_prog_hash_size = excl_prog_hash_sz; + strncpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.key_size = key_size; attr.value_size = value_size; @@ -300,6 +315,35 @@ static inline int skel_link_create(int prog_fd, int target_fd, return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz); } +static inline int skel_obj_get_info_by_fd(int fd) +{ + const size_t attr_sz = offsetofend(union bpf_attr, info); + __u8 sha[SHA256_DIGEST_LENGTH]; + struct bpf_map_info info; + __u32 info_len = sizeof(info); + union bpf_attr attr; + + memset(&info, 0, sizeof(info)); + info.hash = (long) &sha; + info.hash_size = SHA256_DIGEST_LENGTH; + + memset(&attr, 0, attr_sz); + attr.info.bpf_fd = fd; + attr.info.info = (long) &info; + attr.info.info_len = info_len; + return skel_sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz); +} + +static inline int skel_map_freeze(int fd) +{ + const size_t attr_sz = offsetofend(union bpf_attr, map_fd); + union bpf_attr attr; + + memset(&attr, 0, attr_sz); + attr.map_fd = fd; + + return skel_sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz); +} #ifdef __KERNEL__ #define set_err #else @@ -308,12 +352,13 @@ static inline int skel_link_create(int prog_fd, int target_fd, static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) { - const size_t prog_load_attr_sz = offsetofend(union bpf_attr, fd_array); + const size_t prog_load_attr_sz = offsetofend(union bpf_attr, keyring_id); const size_t test_run_attr_sz = offsetofend(union bpf_attr, test); int map_fd = -1, prog_fd = -1, key = 0, err; union bpf_attr attr; - err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1); + err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1, + opts->excl_prog_hash, opts->excl_prog_hash_sz); if (map_fd < 0) { opts->errstr = "failed to create loader map"; set_err; @@ -327,11 +372,34 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) goto out; } +#ifndef __KERNEL__ + err = skel_map_freeze(map_fd); + if (err < 0) { + opts->errstr = "failed to freeze map"; + set_err; + goto out; + } + err = skel_obj_get_info_by_fd(map_fd); + if (err < 0) { + opts->errstr = "failed to fetch obj info"; + set_err; + goto out; + } +#endif + memset(&attr, 0, prog_load_attr_sz); attr.prog_type = BPF_PROG_TYPE_SYSCALL; attr.insns = (long) opts->insns; attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn); attr.license = (long) "Dual BSD/GPL"; +#ifndef __KERNEL__ + attr.signature = (long) opts->signature; + attr.signature_size = opts->signature_sz; +#else + if (opts->signature || opts->signature_sz) + pr_warn("signatures are not supported from bpf_preload\n"); +#endif + attr.keyring_id = opts->keyring_id; memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog")); attr.fd_array = (long) &map_fd; attr.log_level = opts->ctx->log_level; diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c deleted file mode 100644 index 8743049e32b7..000000000000 --- a/tools/lib/bpf/str_error.c +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -#undef _GNU_SOURCE -#include <string.h> -#include <stdio.h> -#include <errno.h> -#include "str_error.h" - -#ifndef ENOTSUPP -#define ENOTSUPP 524 -#endif - -/* make sure libbpf doesn't use kernel-only integer typedefs */ -#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 - -/* - * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl - * libc, while checking strerror_r() return to avoid having to check this in - * all places calling it. - */ -char *libbpf_strerror_r(int err, char *dst, int len) -{ - int ret = strerror_r(err < 0 ? -err : err, dst, len); - /* on glibc <2.13, ret == -1 and errno is set, if strerror_r() can't - * handle the error, on glibc >=2.13 *positive* (errno-like) error - * code is returned directly - */ - if (ret == -1) - ret = errno; - if (ret) { - if (ret == EINVAL) - /* strerror_r() doesn't recognize this specific error */ - snprintf(dst, len, "unknown error (%d)", err < 0 ? err : -err); - else - snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret); - } - return dst; -} - -const char *errstr(int err) -{ - static __thread char buf[12]; - - if (err > 0) - err = -err; - - switch (err) { - case -E2BIG: return "-E2BIG"; - case -EACCES: return "-EACCES"; - case -EADDRINUSE: return "-EADDRINUSE"; - case -EADDRNOTAVAIL: return "-EADDRNOTAVAIL"; - case -EAGAIN: return "-EAGAIN"; - case -EALREADY: return "-EALREADY"; - case -EBADF: return "-EBADF"; - case -EBADFD: return "-EBADFD"; - case -EBUSY: return "-EBUSY"; - case -ECANCELED: return "-ECANCELED"; - case -ECHILD: return "-ECHILD"; - case -EDEADLK: return "-EDEADLK"; - case -EDOM: return "-EDOM"; - case -EEXIST: return "-EEXIST"; - case -EFAULT: return "-EFAULT"; - case -EFBIG: return "-EFBIG"; - case -EILSEQ: return "-EILSEQ"; - case -EINPROGRESS: return "-EINPROGRESS"; - case -EINTR: return "-EINTR"; - case -EINVAL: return "-EINVAL"; - case -EIO: return "-EIO"; - case -EISDIR: return "-EISDIR"; - case -ELOOP: return "-ELOOP"; - case -EMFILE: return "-EMFILE"; - case -EMLINK: return "-EMLINK"; - case -EMSGSIZE: return "-EMSGSIZE"; - case -ENAMETOOLONG: return "-ENAMETOOLONG"; - case -ENFILE: return "-ENFILE"; - case -ENODATA: return "-ENODATA"; - case -ENODEV: return "-ENODEV"; - case -ENOENT: return "-ENOENT"; - case -ENOEXEC: return "-ENOEXEC"; - case -ENOLINK: return "-ENOLINK"; - case -ENOMEM: return "-ENOMEM"; - case -ENOSPC: return "-ENOSPC"; - case -ENOTBLK: return "-ENOTBLK"; - case -ENOTDIR: return "-ENOTDIR"; - case -ENOTSUPP: return "-ENOTSUPP"; - case -ENOTTY: return "-ENOTTY"; - case -ENXIO: return "-ENXIO"; - case -EOPNOTSUPP: return "-EOPNOTSUPP"; - case -EOVERFLOW: return "-EOVERFLOW"; - case -EPERM: return "-EPERM"; - case -EPIPE: return "-EPIPE"; - case -EPROTO: return "-EPROTO"; - case -EPROTONOSUPPORT: return "-EPROTONOSUPPORT"; - case -ERANGE: return "-ERANGE"; - case -EROFS: return "-EROFS"; - case -ESPIPE: return "-ESPIPE"; - case -ESRCH: return "-ESRCH"; - case -ETXTBSY: return "-ETXTBSY"; - case -EUCLEAN: return "-EUCLEAN"; - case -EXDEV: return "-EXDEV"; - default: - snprintf(buf, sizeof(buf), "%d", err); - return buf; - } -} diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h deleted file mode 100644 index 66ffebde0684..000000000000 --- a/tools/lib/bpf/str_error.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __LIBBPF_STR_ERROR_H -#define __LIBBPF_STR_ERROR_H - -#define STRERR_BUFSIZE 128 - -char *libbpf_strerror_r(int err, char *dst, int len); - -/** - * @brief **errstr()** returns string corresponding to numeric errno - * @param err negative numeric errno - * @return pointer to string representation of the errno, that is invalidated - * upon the next call. - */ -const char *errstr(int err); -#endif /* __LIBBPF_STR_ERROR_H */ diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index b811f754939f..43deb05a5197 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -34,13 +34,32 @@ enum __bpf_usdt_arg_type { BPF_USDT_ARG_CONST, BPF_USDT_ARG_REG, BPF_USDT_ARG_REG_DEREF, + BPF_USDT_ARG_SIB, }; +/* + * This struct layout is designed specifically to be backwards/forward + * compatible between libbpf versions for ARG_CONST, ARG_REG, and + * ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+. + */ struct __bpf_usdt_arg_spec { /* u64 scalar interpreted depending on arg_type, see below */ __u64 val_off; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ /* arg location case, see bpf_usdt_arg() for details */ - enum __bpf_usdt_arg_type arg_type; + enum __bpf_usdt_arg_type arg_type: 8; + /* index register offset within struct pt_regs */ + __u16 idx_reg_off: 12; + /* scale factor for index register (1, 2, 4, or 8) */ + __u16 scale_bitshift: 4; + /* reserved for future use, keeps reg_off offset stable */ + __u8 __reserved: 8; +#else + __u8 __reserved: 8; + __u16 idx_reg_off: 12; + __u16 scale_bitshift: 4; + enum __bpf_usdt_arg_type arg_type: 8; +#endif /* offset of referenced register within struct pt_regs */ short reg_off; /* whether arg should be interpreted as signed value */ @@ -108,6 +127,38 @@ int bpf_usdt_arg_cnt(struct pt_regs *ctx) return spec->arg_cnt; } +/* Returns the size in bytes of the #*arg_num* (zero-indexed) USDT argument. + * Returns negative error if argument is not found or arg_num is invalid. + */ +static __always_inline +int bpf_usdt_arg_size(struct pt_regs *ctx, __u64 arg_num) +{ + struct __bpf_usdt_arg_spec *arg_spec; + struct __bpf_usdt_spec *spec; + int spec_id; + + spec_id = __bpf_usdt_spec_id(ctx); + if (spec_id < 0) + return -ESRCH; + + spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id); + if (!spec) + return -ESRCH; + + if (arg_num >= BPF_USDT_MAX_ARG_CNT) + return -ENOENT; + barrier_var(arg_num); + if (arg_num >= spec->arg_cnt) + return -ENOENT; + + arg_spec = &spec->args[arg_num]; + + /* arg_spec->arg_bitshift = 64 - arg_sz * 8 + * so: arg_sz = (64 - arg_spec->arg_bitshift) / 8 + */ + return (unsigned int)(64 - arg_spec->arg_bitshift) / 8; +} + /* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res. * Returns 0 on success; negative error, otherwise. * On error *res is guaranteed to be set to zero. @@ -117,7 +168,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) { struct __bpf_usdt_spec *spec; struct __bpf_usdt_arg_spec *arg_spec; - unsigned long val; + unsigned long val, idx; int err, spec_id; *res = 0; @@ -172,6 +223,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res) val >>= arg_spec->arg_bitshift; #endif break; + case BPF_USDT_ARG_SIB: + /* Arg is in memory addressed by SIB (Scale-Index-Base) mode + * (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first + * fetch the base register contents and the index register + * contents from pt_regs. Then we calculate the final address + * as base + (index * scale) + offset, and do a user-space + * probe read to fetch the argument value. + */ + err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off); + if (err) + return err; + err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off); + if (err) + return err; + err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx << arg_spec->scale_bitshift) + arg_spec->val_off)); + if (err) + return err; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + val >>= arg_spec->arg_bitshift; +#endif + break; default: return -EINVAL; } diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 4e4a52742b01..e3710933fd52 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -20,7 +20,6 @@ #include "libbpf_common.h" #include "libbpf_internal.h" #include "hashmap.h" -#include "str_error.h" /* libbpf's USDT support consists of BPF-side state/code and user-space * state/code working together in concert. BPF-side parts are defined in @@ -59,7 +58,7 @@ * * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y); * - * USDT is identified by it's <provider-name>:<probe-name> pair of names. Each + * USDT is identified by its <provider-name>:<probe-name> pair of names. Each * individual USDT has a fixed number of arguments (3 in the above example) * and specifies values of each argument as if it was a function call. * @@ -81,7 +80,7 @@ * NOP instruction that kernel can replace with an interrupt instruction to * trigger instrumentation code (BPF program for all that we care about). * - * Semaphore above is and optional feature. It records an address of a 2-byte + * Semaphore above is an optional feature. It records an address of a 2-byte * refcount variable (normally in '.probes' ELF section) used for signaling if * there is anything that is attached to USDT. This is useful for user * applications if, for example, they need to prepare some arguments that are @@ -121,7 +120,7 @@ * a uprobe BPF program (which for kernel, at least currently, is just a kprobe * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference * that uprobe is usually attached at the function entry, while USDT will - * normally will be somewhere inside the function. But it should always be + * normally be somewhere inside the function. But it should always be * pointing to NOP instruction, which makes such uprobes the fastest uprobe * kind. * @@ -151,7 +150,7 @@ * libbpf sets to spec ID during attach time, or, if kernel is too old to * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such * case. The latter means that some modes of operation can't be supported - * without BPF cookie. Such mode is attaching to shared library "generically", + * without BPF cookie. Such a mode is attaching to shared library "generically", * without specifying target process. In such case, it's impossible to * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode * is not supported without BPF cookie support. @@ -185,7 +184,7 @@ * as even if USDT spec string is the same, USDT cookie value can be * different. It was deemed excessive to try to deduplicate across independent * USDT attachments by taking into account USDT spec string *and* USDT cookie - * value, which would complicated spec ID accounting significantly for little + * value, which would complicate spec ID accounting significantly for little * gain. */ @@ -200,12 +199,23 @@ enum usdt_arg_type { USDT_ARG_CONST, USDT_ARG_REG, USDT_ARG_REG_DEREF, + USDT_ARG_SIB, }; /* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */ struct usdt_arg_spec { __u64 val_off; - enum usdt_arg_type arg_type; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + enum usdt_arg_type arg_type: 8; + __u16 idx_reg_off: 12; + __u16 scale_bitshift: 4; + __u8 __reserved: 8; /* keep reg_off offset stable */ +#else + __u8 __reserved: 8; /* keep reg_off offset stable */ + __u16 idx_reg_off: 12; + __u16 scale_bitshift: 4; + enum usdt_arg_type arg_type: 8; +#endif short reg_off; bool arg_signed; char arg_bitshift; @@ -252,6 +262,7 @@ struct usdt_manager { bool has_bpf_cookie; bool has_sema_refcnt; bool has_uprobe_multi; + bool has_uprobe_syscall; }; struct usdt_manager *usdt_manager_new(struct bpf_object *obj) @@ -291,6 +302,13 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj) * usdt probes. */ man->has_uprobe_multi = kernel_supports(obj, FEAT_UPROBE_MULTI_LINK); + + /* + * Detect kernel support for uprobe() syscall, it's presence means we can + * take advantage of faster nop5 uprobe handling. + * Added in: 56101b69c919 ("uprobes/x86: Add uprobe syscall to speed up uprobe") + */ + man->has_uprobe_syscall = kernel_supports(obj, FEAT_UPROBE_SYSCALL); return man; } @@ -570,19 +588,39 @@ static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long o return NULL; } -static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr, - const char *data, size_t name_off, size_t desc_off, - struct usdt_note *usdt_note); +static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off, + size_t desc_off, struct usdt_note *usdt_note); static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); -static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid, - const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, - struct usdt_target **out_targets, size_t *out_target_cnt) +#if defined(__x86_64__) +static bool has_nop_combo(int fd, long off) +{ + unsigned char nop_combo[6] = { + 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 /* nop,nop5 */ + }; + unsigned char buf[6]; + + if (pread(fd, buf, 6, off) != 6) + return false; + return memcmp(buf, nop_combo, 6) == 0; +} +#else +static bool has_nop_combo(int fd, long off) +{ + return false; +} +#endif + +static int collect_usdt_targets(struct usdt_manager *man, struct elf_fd *elf_fd, const char *path, + pid_t pid, const char *usdt_provider, const char *usdt_name, + __u64 usdt_cookie, struct usdt_target **out_targets, + size_t *out_target_cnt) { size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0; struct elf_seg *segs = NULL, *vma_segs = NULL; struct usdt_target *targets = NULL, *target; + Elf *elf = elf_fd->elf; long base_addr = 0; Elf_Scn *notes_scn, *base_scn; GElf_Shdr base_shdr, notes_shdr; @@ -626,7 +664,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * struct elf_seg *seg = NULL; void *tmp; - err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, ¬e); + err = parse_usdt_note(&nhdr, data->d_buf, name_off, desc_off, ¬e); if (err) goto err_out; @@ -775,6 +813,16 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * target = &targets[target_cnt]; memset(target, 0, sizeof(*target)); + /* + * We have uprobe syscall and usdt with nop,nop5 instructions combo, + * so we can place the uprobe directly on nop5 (+1) and get this probe + * optimized. + */ + if (man->has_uprobe_syscall && has_nop_combo(elf_fd->fd, usdt_rel_ip)) { + usdt_abs_ip++; + usdt_rel_ip++; + } + target->abs_ip = usdt_abs_ip; target->rel_ip = usdt_rel_ip; target->sema_off = usdt_sema_off; @@ -989,7 +1037,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct /* discover USDT in given binary, optionally limiting * activations to a given PID, if pid > 0 */ - err = collect_usdt_targets(man, elf_fd.elf, path, pid, usdt_provider, usdt_name, + err = collect_usdt_targets(man, &elf_fd, path, pid, usdt_provider, usdt_name, usdt_cookie, &targets, &target_cnt); if (err <= 0) { err = (err == 0) ? -ENOENT : err; @@ -1132,8 +1180,7 @@ err_out: /* Parse out USDT ELF note from '.note.stapsdt' section. * Logic inspired by perf's code. */ -static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr, - const char *data, size_t name_off, size_t desc_off, +static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, struct usdt_note *note) { const char *provider, *name, *args; @@ -1283,11 +1330,51 @@ static int calc_pt_regs_off(const char *reg_name) static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) { - char reg_name[16]; - int len, reg_off; - long off; + char reg_name[16] = {0}, idx_reg_name[16] = {0}; + int len, reg_off, idx_reg_off, scale = 1; + long off = 0; + + if (sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^,] , %d ) %n", + arg_sz, &off, reg_name, idx_reg_name, &scale, &len) == 5 || + sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^,] , %d ) %n", + arg_sz, reg_name, idx_reg_name, &scale, &len) == 4 || + sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^)] ) %n", + arg_sz, &off, reg_name, idx_reg_name, &len) == 4 || + sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^)] ) %n", + arg_sz, reg_name, idx_reg_name, &len) == 3 + ) { + /* + * Scale Index Base case: + * 1@-96(%rbp,%rax,8) + * 1@(%rbp,%rax,8) + * 1@-96(%rbp,%rax) + * 1@(%rbp,%rax) + */ + arg->arg_type = USDT_ARG_SIB; + arg->val_off = off; + + reg_off = calc_pt_regs_off(reg_name); + if (reg_off < 0) + return reg_off; + arg->reg_off = reg_off; - if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) { + idx_reg_off = calc_pt_regs_off(idx_reg_name); + if (idx_reg_off < 0) + return idx_reg_off; + arg->idx_reg_off = idx_reg_off; + + /* validate scale factor and set fields directly */ + switch (scale) { + case 1: arg->scale_bitshift = 0; break; + case 2: arg->scale_bitshift = 1; break; + case 4: arg->scale_bitshift = 2; break; + case 8: arg->scale_bitshift = 3; break; + default: + pr_warn("usdt: invalid SIB scale %d, expected 1, 2, 4, 8\n", scale); + return -EINVAL; + } + } else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", + arg_sz, &off, reg_name, &len) == 3) { /* Memory dereference case, e.g., -4@-20(%rbp) */ arg->arg_type = USDT_ARG_REG_DEREF; arg->val_off = off; @@ -1306,6 +1393,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec } else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) { /* Register read case, e.g., -4@%eax */ arg->arg_type = USDT_ARG_REG; + /* register read has no memory offset */ arg->val_off = 0; reg_off = calc_pt_regs_off(reg_name); @@ -1327,8 +1415,6 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec #elif defined(__s390x__) -/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */ - static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz) { unsigned int reg; |
