78 files changed, 12058 insertions, 988 deletions
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index 7f6396087b46..8665c799e0fa 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -95,7 +95,7 @@ install_lib: $(LIBFILE)
 		$(call do_install_mkdir,$(libdir_SQ)); \
 		cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
 
-HDRS := cpu.h debug.h io.h
+HDRS := cpu.h debug.h io.h io_dir.h
 FD_HDRS := fd/array.h
 FS_HDRS := fs/fs.h fs/tracing_path.h
 INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/api
diff --git a/tools/lib/api/io_dir.h b/tools/lib/api/io_dir.h
new file mode 100644
index 000000000000..ef83e967e48c
--- /dev/null
+++ b/tools/lib/api/io_dir.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/*
+ * Lightweight directory reading library.
+ */
+#ifndef __API_IO_DIR__
+#define __API_IO_DIR__
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <linux/limits.h>
+
+#if !defined(SYS_getdents64)
+#if defined(__x86_64__) || defined(__arm__)
+  #define SYS_getdents64 217
+#elif defined(__i386__) || defined(__s390x__) || defined(__sh__)
+  #define SYS_getdents64 220
+#elif defined(__alpha__)
+  #define SYS_getdents64 377
+#elif defined(__mips__)
+  #define SYS_getdents64 308
+#elif defined(__powerpc64__) || defined(__powerpc__)
+  #define SYS_getdents64 202
+#elif defined(__sparc64__) || defined(__sparc__)
+  #define SYS_getdents64 154
+#elif defined(__xtensa__)
+  #define SYS_getdents64 60
+#else
+  #define SYS_getdents64 61
+#endif
+#endif /* !defined(SYS_getdents64) */
+
+static inline ssize_t perf_getdents64(int fd, void *dirp, size_t count)
+{
+#ifdef MEMORY_SANITIZER
+	memset(dirp, 0, count);
+#endif
+	return syscall(SYS_getdents64, fd, dirp, count);
+}
+
+struct io_dirent64 {
+	ino64_t        d_ino;    /* 64-bit inode number */
+	off64_t        d_off;    /* 64-bit offset to next structure */
+	unsigned short d_reclen; /* Size of this dirent */
+	unsigned char  d_type;   /* File type */
+	char           d_name[NAME_MAX + 1]; /* Filename (null-terminated) */
+};
+
+struct io_dir {
+	int dirfd;
+	ssize_t available_bytes;
+	struct io_dirent64 *next;
+	struct io_dirent64 buff[4];
+};
+
+static inline void io_dir__init(struct io_dir *iod, int dirfd)
+{
+	iod->dirfd = dirfd;
+	iod->available_bytes = 0;
+}
+
+static inline void io_dir__rewinddir(struct io_dir *iod)
+{
+	lseek(iod->dirfd, 0, SEEK_SET);
+	iod->available_bytes = 0;
+}
+
+static inline struct io_dirent64 *io_dir__readdir(struct io_dir *iod)
+{
+	struct io_dirent64 *entry;
+
+	if (iod->available_bytes <= 0) {
+		ssize_t rc = perf_getdents64(iod->dirfd, iod->buff, sizeof(iod->buff));
+
+		if (rc <= 0)
+			return NULL;
+		iod->available_bytes = rc;
+		iod->next = iod->buff;
+	}
+	entry = iod->next;
+	iod->next = (struct io_dirent64 *)((char *)entry + entry->d_reclen);
+	iod->available_bytes -= entry->d_reclen;
+	return entry;
+}
+
+static inline bool io_dir__is_dir(const struct io_dir *iod, struct io_dirent64 *dent)
+{
+	if (dent->d_type == DT_UNKNOWN) {
+		struct stat st;
+
+		if (fstatat(iod->dirfd, dent->d_name, &st, /*flags=*/0))
+			return false;
+
+		if (S_ISDIR(st.st_mode)) {
+			dent->d_type = DT_DIR;
+			return true;
+		}
+	}
+	return dent->d_type == DT_DIR;
+}
+
+#endif  /* __API_IO_DIR__ */
diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c
index 2178862bb114..fedc9070f0e4 100644
--- a/tools/lib/bitmap.c
+++ b/tools/lib/bitmap.c
@@ -101,6 +101,26 @@ bool __bitmap_intersects(const unsigned long *bitmap1,
 	return false;
 }
 
+void __bitmap_set(unsigned long *map, unsigned int start, int len)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const unsigned int size = start + len;
+	int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+	while (len - bits_to_set >= 0) {
+		*p |= mask_to_set;
+		len -= bits_to_set;
+		bits_to_set = BITS_PER_LONG;
+		mask_to_set = ~0UL;
+		p++;
+	}
+	if (len) {
+		mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+		*p |= mask_to_set;
+	}
+}
+
 void __bitmap_clear(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);
@@ -120,3 +140,42 @@ void __bitmap_clear(unsigned long *map, unsigned int start, int len)
 		*p &= ~mask_to_clear;
 	}
 }
+
+bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+				const unsigned long *bitmap2, unsigned int bits)
+{
+	unsigned int k;
+	unsigned int lim = bits/BITS_PER_LONG;
+	unsigned long result = 0;
+
+	for (k = 0; k < lim; k++)
+		result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+	if (bits % BITS_PER_LONG)
+		result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
+			   BITMAP_LAST_WORD_MASK(bits));
+	return result != 0;
+}
+
+bool __bitmap_subset(const unsigned long *bitmap1,
+		     const unsigned long *bitmap2, unsigned int bits)
+{
+	unsigned int k, lim = bits/BITS_PER_LONG;
+	for (k = 0; k < lim; ++k)
+		if (bitmap1[k] & ~bitmap2[k])
+			return false;
+
+	if (bits % BITS_PER_LONG)
+		if ((bitmap1[k] & ~bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+			return false;
+	return true;
+}
+
+void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+				const unsigned long *bitmap2, unsigned int bits)
+{
+	unsigned int k;
+	unsigned int nr = BITS_TO_LONGS(bits);
+
+	for (k = 0; k < nr; k++)
+		dst[k] = bitmap1[k] ^ bitmap2[k];
+}
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index e2cd558ca0b4..c80204bb72a2 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@
-libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_utils.o \
 	    netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
 	    btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
 	    usdt.o zip.o elf.o features.o btf_iter.o btf_relocate.o
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 359f73ead613..5846de364209 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -154,7 +154,7 @@ int bump_rlimit_memlock(void)
 
 	memlock_bumped = true;
 
-	/* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */
+	/* zero memlock_rlim disables auto-bumping RLIMIT_MEMLOCK */
 	if (memlock_rlim == 0)
 		return 0;
 
@@ -172,7 +172,7 @@ int bpf_map_create(enum bpf_map_type map_type,
 		   __u32 max_entries,
 		   const struct bpf_map_create_opts *opts)
 {
-	const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd);
+	const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
 	union bpf_attr attr;
 	int fd;
 
@@ -203,6 +203,8 @@ int bpf_map_create(enum bpf_map_type map_type,
 	attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
 
 	attr.map_token_fd = OPTS_GET(opts, token_fd, 0);
+	attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL));
+	attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0);
 
 	fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
 	return libbpf_err_errno(fd);
@@ -238,7 +240,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 		  const struct bpf_insn *insns, size_t insn_cnt,
 		  struct bpf_prog_load_opts *opts)
 {
-	const size_t attr_sz = offsetofend(union bpf_attr, fd_array_cnt);
+	const size_t attr_sz = offsetofend(union bpf_attr, keyring_id);
 	void *finfo = NULL, *linfo = NULL;
 	const char *func_info, *line_info;
 	__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
@@ -792,6 +794,7 @@ int bpf_link_create(int prog_fd, int target_fd,
 	case BPF_TRACE_FENTRY:
 	case BPF_TRACE_FEXIT:
 	case BPF_MODIFY_RETURN:
+	case BPF_TRACE_FSESSION:
 	case BPF_LSM_MAC:
 		attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0);
 		if (!OPTS_ZEROED(opts, tracing))
@@ -837,6 +840,50 @@ int bpf_link_create(int prog_fd, int target_fd,
 		if (!OPTS_ZEROED(opts, netkit))
 			return libbpf_err(-EINVAL);
 		break;
+	case BPF_CGROUP_INET_INGRESS:
+	case BPF_CGROUP_INET_EGRESS:
+	case BPF_CGROUP_INET_SOCK_CREATE:
+	case BPF_CGROUP_INET_SOCK_RELEASE:
+	case BPF_CGROUP_INET4_BIND:
+	case BPF_CGROUP_INET6_BIND:
+	case BPF_CGROUP_INET4_POST_BIND:
+	case BPF_CGROUP_INET6_POST_BIND:
+	case BPF_CGROUP_INET4_CONNECT:
+	case BPF_CGROUP_INET6_CONNECT:
+	case BPF_CGROUP_UNIX_CONNECT:
+	case BPF_CGROUP_INET4_GETPEERNAME:
+	case BPF_CGROUP_INET6_GETPEERNAME:
+	case BPF_CGROUP_UNIX_GETPEERNAME:
+	case BPF_CGROUP_INET4_GETSOCKNAME:
+	case BPF_CGROUP_INET6_GETSOCKNAME:
+	case BPF_CGROUP_UNIX_GETSOCKNAME:
+	case BPF_CGROUP_UDP4_SENDMSG:
+	case BPF_CGROUP_UDP6_SENDMSG:
+	case BPF_CGROUP_UNIX_SENDMSG:
+	case BPF_CGROUP_UDP4_RECVMSG:
+	case BPF_CGROUP_UDP6_RECVMSG:
+	case BPF_CGROUP_UNIX_RECVMSG:
+	case BPF_CGROUP_SOCK_OPS:
+	case BPF_CGROUP_DEVICE:
+	case BPF_CGROUP_SYSCTL:
+	case BPF_CGROUP_GETSOCKOPT:
+	case BPF_CGROUP_SETSOCKOPT:
+	case BPF_LSM_CGROUP:
+		relative_fd = OPTS_GET(opts, cgroup.relative_fd, 0);
+		relative_id = OPTS_GET(opts, cgroup.relative_id, 0);
+		if (relative_fd && relative_id)
+			return libbpf_err(-EINVAL);
+		if (relative_id) {
+			attr.link_create.cgroup.relative_id = relative_id;
+			attr.link_create.flags |= BPF_F_ID;
+		} else {
+			attr.link_create.cgroup.relative_fd = relative_fd;
+		}
+		attr.link_create.cgroup.expected_revision =
+			OPTS_GET(opts, cgroup.expected_revision, 0);
+		if (!OPTS_ZEROED(opts, cgroup))
+			return libbpf_err(-EINVAL);
+		break;
 	default:
 		if (!OPTS_ZEROED(opts, flags))
 			return libbpf_err(-EINVAL);
@@ -1097,7 +1144,7 @@ int bpf_map_get_fd_by_id(__u32 id)
 int bpf_btf_get_fd_by_id_opts(__u32 id,
 			      const struct bpf_get_fd_by_id_opts *opts)
 {
-	const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
+	const size_t attr_sz = offsetofend(union bpf_attr, fd_by_id_token_fd);
 	union bpf_attr attr;
 	int fd;
 
@@ -1107,6 +1154,7 @@ int bpf_btf_get_fd_by_id_opts(__u32 id,
 	memset(&attr, 0, attr_sz);
 	attr.btf_id = id;
 	attr.open_flags = OPTS_GET(opts, open_flags, 0);
+	attr.fd_by_id_token_fd = OPTS_GET(opts, token_fd, 0);
 
 	fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz);
 	return libbpf_err_errno(fd);
@@ -1330,3 +1378,42 @@ int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts)
 	fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
+
+int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len,
+			 struct bpf_prog_stream_read_opts *opts)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, prog_stream_read);
+	union bpf_attr attr;
+	int err;
+
+	if (!OPTS_VALID(opts, bpf_prog_stream_read_opts))
+		return libbpf_err(-EINVAL);
+
+	memset(&attr, 0, attr_sz);
+	attr.prog_stream_read.stream_buf = ptr_to_u64(buf);
+	attr.prog_stream_read.stream_buf_len = buf_len;
+	attr.prog_stream_read.stream_id = stream_id;
+	attr.prog_stream_read.prog_fd = prog_fd;
+
+	err = sys_bpf(BPF_PROG_STREAM_READ_BY_FD, &attr, attr_sz);
+	return libbpf_err_errno(err);
+}
+
+int bpf_prog_assoc_struct_ops(int prog_fd, int map_fd,
+			      struct bpf_prog_assoc_struct_ops_opts *opts)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, prog_assoc_struct_ops);
+	union bpf_attr attr;
+	int err;
+
+	if (!OPTS_VALID(opts, bpf_prog_assoc_struct_ops_opts))
+		return libbpf_err(-EINVAL);
+
+	memset(&attr, 0, attr_sz);
+	attr.prog_assoc_struct_ops.map_fd = map_fd;
+	attr.prog_assoc_struct_ops.prog_fd = prog_fd;
+	attr.prog_assoc_struct_ops.flags = OPTS_GET(opts, flags, 0);
+
+	err = sys_bpf(BPF_PROG_ASSOC_STRUCT_OPS, &attr, attr_sz);
+	return libbpf_err_errno(err);
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 435da95d2058..2c8e88ddb674 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -54,9 +54,12 @@ struct bpf_map_create_opts {
 	__s32 value_type_btf_obj_fd;
 
 	__u32 token_fd;
+
+	const void *excl_prog_hash;
+	__u32 excl_prog_hash_size;
 	size_t :0;
 };
-#define bpf_map_create_opts__last_field token_fd
+#define bpf_map_create_opts__last_field excl_prog_hash_size
 
 LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
 			      const char *map_name,
@@ -286,6 +289,14 @@ LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
  *    Update spin_lock-ed map elements. This must be
  *    specified if the map value contains a spinlock.
  *
+ * **BPF_F_CPU**
+ *    As for percpu maps, update value on the specified CPU. And the cpu
+ *    info is embedded into the high 32 bits of **opts->elem_flags**.
+ *
+ * **BPF_F_ALL_CPUS**
+ *    As for percpu maps, update value across all CPUs. This flag cannot
+ *    be used with BPF_F_CPU at the same time.
+ *
  * @param fd BPF map file descriptor
  * @param keys pointer to an array of *count* keys
  * @param values pointer to an array of *count* values
@@ -438,6 +449,11 @@ struct bpf_link_create_opts {
 			__u32 relative_id;
 			__u64 expected_revision;
 		} netkit;
+		struct {
+			__u32 relative_fd;
+			__u32 relative_id;
+			__u64 expected_revision;
+		} cgroup;
 	};
 	size_t :0;
 };
@@ -487,9 +503,10 @@ LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id);
 struct bpf_get_fd_by_id_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
 	__u32 open_flags; /* permissions requested for the operation on fd */
+	__u32 token_fd;
 	size_t :0;
 };
-#define bpf_get_fd_by_id_opts__last_field open_flags
+#define bpf_get_fd_by_id_opts__last_field token_fd
 
 LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_prog_get_fd_by_id_opts(__u32 id,
@@ -703,6 +720,48 @@ struct bpf_token_create_opts {
 LIBBPF_API int bpf_token_create(int bpffs_fd,
 				struct bpf_token_create_opts *opts);
 
+struct bpf_prog_stream_read_opts {
+	size_t sz;
+	size_t :0;
+};
+#define bpf_prog_stream_read_opts__last_field sz
+/**
+ * @brief **bpf_prog_stream_read** reads data from the BPF stream of a given BPF
+ * program.
+ *
+ * @param prog_fd FD for the BPF program whose BPF stream is to be read.
+ * @param stream_id ID of the BPF stream to be read.
+ * @param buf Buffer to read data into from the BPF stream.
+ * @param buf_len Maximum number of bytes to read from the BPF stream.
+ * @param opts optional options, can be NULL
+ *
+ * @return The number of bytes read, on success; negative error code, otherwise
+ * (errno is also set to the error code)
+ */
+LIBBPF_API int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len,
+				    struct bpf_prog_stream_read_opts *opts);
+
+struct bpf_prog_assoc_struct_ops_opts {
+	size_t sz;
+	__u32 flags;
+	size_t :0;
+};
+#define bpf_prog_assoc_struct_ops_opts__last_field flags
+
+/**
+ * @brief **bpf_prog_assoc_struct_ops** associates a BPF program with a
+ * struct_ops map.
+ *
+ * @param prog_fd FD for the BPF program
+ * @param map_fd FD for the struct_ops map to be associated with the BPF program
+ * @param opts optional options, can be NULL
+ *
+ * @return 0 on success; negative error code, otherwise (errno is also set to
+ * the error code)
+ */
+LIBBPF_API int bpf_prog_assoc_struct_ops(int prog_fd, int map_fd,
+					 struct bpf_prog_assoc_struct_ops_opts *opts);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index c0e13cdf9660..b997c68bd945 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -388,7 +388,13 @@ extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak;
 #define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j
 #define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__)
 
+#if defined(__clang__) && (__clang_major__ >= 19)
+#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__))
+#elif defined(__GNUC__) && (__GNUC__ >= 14)
+#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__))
+#else
 #define ___type(...) typeof(___arrow(__VA_ARGS__))
+#endif
 
 #define ___read(read_fn, dst, src_type, src, accessor)			    \
 	read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor)
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
index 6ff963a491d9..49af4260b8e6 100644
--- a/tools/lib/bpf/bpf_gen_internal.h
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -4,6 +4,7 @@
 #define __BPF_GEN_INTERNAL_H
 
 #include "bpf.h"
+#include "libbpf_internal.h"
 
 struct ksym_relo_desc {
 	const char *name;
@@ -50,6 +51,7 @@ struct bpf_gen {
 	__u32 nr_ksyms;
 	int fd_array;
 	int nr_fd_array;
+	int hash_insn_offset[SHA256_DWORD_SIZE];
 };
 
 void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps);
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 686824b8b413..9d160b5b9c0e 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -15,6 +15,14 @@
 #define __array(name, val) typeof(val) *name[]
 #define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name
 
+#ifndef likely
+#define likely(x)      (__builtin_expect(!!(x), 1))
+#endif
+
+#ifndef unlikely
+#define unlikely(x)    (__builtin_expect(!!(x), 0))
+#endif
+
 /*
  * Helper macro to place programs, maps, license in
  * different sections in elf_bpf file. Section names
@@ -207,6 +215,7 @@ enum libbpf_tristate {
 #define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull")))
 #define __arg_nullable __attribute((btf_decl_tag("arg:nullable")))
 #define __arg_trusted __attribute((btf_decl_tag("arg:trusted")))
+#define __arg_untrusted __attribute((btf_decl_tag("arg:untrusted")))
 #define __arg_arena __attribute((btf_decl_tag("arg:arena")))
 
 #ifndef ___bpf_concat
@@ -306,6 +315,19 @@ enum libbpf_tristate {
 			  ___param, sizeof(___param));		\
 })
 
+#define bpf_stream_printk(stream_id, fmt, args...)					\
+({											\
+	static const char ___fmt[] = fmt;						\
+	unsigned long long ___param[___bpf_narg(args)];					\
+											\
+	_Pragma("GCC diagnostic push")							\
+	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")				\
+	___bpf_fill(___param, args);							\
+	_Pragma("GCC diagnostic pop")							\
+											\
+	bpf_stream_vprintk(stream_id, ___fmt, ___param, sizeof(___param));		\
+})
+
 /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args
  * Otherwise use __bpf_vprintk
  */
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index a8f6cd4841b0..dbe32a5d02cd 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -311,7 +311,7 @@ struct pt_regs___arm64 {
 #define __PT_RET_REG regs[31]
 #define __PT_FP_REG __unsupported__
 #define __PT_RC_REG gpr[3]
-#define __PT_SP_REG sp
+#define __PT_SP_REG gpr[1]
 #define __PT_IP_REG nip
 
 #elif defined(bpf_target_sparc)
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 48c66f3a9200..ceb57b46a878 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -12,6 +12,7 @@
 #include <sys/utsname.h>
 #include <sys/param.h>
 #include <sys/stat.h>
+#include <sys/mman.h>
 #include <linux/kernel.h>
 #include <linux/err.h>
 #include <linux/btf.h>
@@ -22,13 +23,42 @@
 #include "libbpf_internal.h"
 #include "hashmap.h"
 #include "strset.h"
-#include "str_error.h"
 
 #define BTF_MAX_NR_TYPES 0x7fffffffU
 #define BTF_MAX_STR_OFFSET 0x7fffffffU
 
 static struct btf_type btf_void;
 
+/*
+ * Describe how kinds are laid out; some have a singular element following the "struct btf_type",
+ * some have BTF_INFO_VLEN(t->info) elements.  Specify sizes for both.  Flags are currently unused.
+ * Kind layout can be optionally added to the BTF representation in a dedicated section to
+ * facilitate parsing.  New kinds must be added here.
+ */
+static struct btf_layout layouts[NR_BTF_KINDS] = {
+/*				singular element size		vlen element(s) size		flags */
+[BTF_KIND_UNKN] =	{	0,				0,				0 },
+[BTF_KIND_INT] =	{	sizeof(__u32),			0,				0 },
+[BTF_KIND_PTR] =	{	0,				0,				0 },
+[BTF_KIND_ARRAY] =	{	sizeof(struct btf_array),	0,				0 },
+[BTF_KIND_STRUCT] =	{	0,				sizeof(struct btf_member),	0 },
+[BTF_KIND_UNION] =	{	0,				sizeof(struct btf_member),	0 },
+[BTF_KIND_ENUM] =	{	0,				sizeof(struct btf_enum),	0 },
+[BTF_KIND_FWD] =	{	0,				0,				0 },
+[BTF_KIND_TYPEDEF] =	{	0,				0,				0 },
+[BTF_KIND_VOLATILE] =	{	0,				0,				0 },
+[BTF_KIND_CONST] =	{	0,				0,				0 },
+[BTF_KIND_RESTRICT] =	{	0,				0,				0 },
+[BTF_KIND_FUNC] =	{	0,				0,				0 },
+[BTF_KIND_FUNC_PROTO] =	{	0,				sizeof(struct btf_param),	0 },
+[BTF_KIND_VAR] =	{	sizeof(struct btf_var),		0,				0 },
+[BTF_KIND_DATASEC] =	{	0,				sizeof(struct btf_var_secinfo),	0 },
+[BTF_KIND_FLOAT] =	{	0,				0,				0 },
+[BTF_KIND_DECL_TAG] =	{	sizeof(struct btf_decl_tag),	0,				0 },
+[BTF_KIND_TYPE_TAG] =	{	0,				0,				0 },
+[BTF_KIND_ENUM64] =	{	0,				sizeof(struct btf_enum64),	0 },
+};
+
 struct btf {
 	/* raw BTF data in native endianness */
 	void *raw_data;
@@ -40,42 +70,53 @@ struct btf {
 
 	/*
 	 * When BTF is loaded from an ELF or raw memory it is stored
-	 * in a contiguous memory block. The hdr, type_data, and, strs_data
+	 * in a contiguous memory block. The type_data, layout and strs_data
 	 * point inside that memory region to their respective parts of BTF
 	 * representation:
 	 *
-	 * +--------------------------------+
-	 * |  Header  |  Types  |  Strings  |
-	 * +--------------------------------+
-	 * ^          ^         ^
-	 * |          |         |
-	 * hdr        |         |
-	 * types_data-+         |
-	 * strs_data------------+
+	 * +----------------------------------------+---------------+
+	 * |  Header  |  Types  |  Optional layout  |  Strings      |
+	 * +--------------------------------------------------------+
+	 * ^          ^         ^                   ^
+	 * |          |         |                   |
+	 * raw_data   |         |                   |
+	 * types_data-+         |                   |
+	 * layout---------------+                   |
+	 * strs_data--------------------------------+
+	 *
+	 * A separate struct btf_header is embedded as btf->hdr,
+	 * and header information is copied into it.  This allows us
+	 * to handle header data for various header formats; the original,
+	 * the extended header with layout info, etc.
 	 *
 	 * If BTF data is later modified, e.g., due to types added or
 	 * removed, BTF deduplication performed, etc, this contiguous
-	 * representation is broken up into three independently allocated
-	 * memory regions to be able to modify them independently.
+	 * representation is broken up into four independent memory
+	 * regions.
+	 *
 	 * raw_data is nulled out at that point, but can be later allocated
 	 * and cached again if user calls btf__raw_data(), at which point
-	 * raw_data will contain a contiguous copy of header, types, and
-	 * strings:
+	 * raw_data will contain a contiguous copy of header, types, optional
+	 * layout and strings.  layout optionally points to a
+	 * btf_layout array - this allows us to encode information about
+	 * the kinds known at encoding time.  If layout is NULL no
+	 * layout information is encoded.
 	 *
-	 * +----------+  +---------+  +-----------+
-	 * |  Header  |  |  Types  |  |  Strings  |
-	 * +----------+  +---------+  +-----------+
-	 * ^             ^            ^
-	 * |             |            |
-	 * hdr           |            |
-	 * types_data----+            |
-	 * strset__data(strs_set)-----+
+	 * +----------+  +---------+  +-----------+   +-----------+
+	 * |  Header  |  |  Types  |  |  Layout   |   |  Strings  |
+	 * +----------+  +---------+  +-----------+   +-----------+
+	 * ^             ^            ^               ^
+	 * |             |            |               |
+	 * hdr           |            |               |
+	 * types_data----+            |               |
+	 * layout---------------------+               |
+	 * strset__data(strs_set)---------------------+
 	 *
-	 *               +----------+---------+-----------+
-	 *               |  Header  |  Types  |  Strings  |
-	 * raw_data----->+----------+---------+-----------+
+	 *               +----------+---------+-------------------+-----------+
+	 *               |  Header  |  Types  |  Optional Layout  |  Strings  |
+	 * raw_data----->+----------+---------+-------------------+-----------+
 	 */
-	struct btf_header *hdr;
+	struct btf_header hdr;
 
 	void *types_data;
 	size_t types_data_cap; /* used size stored in hdr->type_len */
@@ -92,6 +133,8 @@ struct btf {
 	 *   - for split BTF counts number of types added on top of base BTF.
 	 */
 	__u32 nr_types;
+	/* the start IDs of named types in sorted BTF */
+	int named_start_id;
 	/* if not NULL, points to the base BTF on top of which the current
 	 * split BTF is based
 	 */
@@ -120,6 +163,20 @@ struct btf {
 	/* whether base_btf should be freed in btf_free for this instance */
 	bool owns_base;
 
+	/* whether raw_data is a (read-only) mmap */
+	bool raw_data_is_mmap;
+
+	/* is BTF modifiable? i.e. is it split into separate sections as described above? */
+	bool modifiable;
+	/* does BTF have header information we do not support?  If so, disallow
+	 * modification.
+	 */
+	bool has_hdr_extra;
+	/* Points either at raw kind layout data in parsed BTF (if present), or
+	 * at an allocated kind layout array when BTF is modifiable.
+	 */
+	void *layout;
+
 	/* BTF object FD, if loaded into kernel */
 	int fd;
 
@@ -211,7 +268,7 @@ static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
 	return 0;
 }
 
-static void btf_bswap_hdr(struct btf_header *h)
+static void btf_bswap_hdr(struct btf_header *h, __u32 hdr_len)
 {
 	h->magic = bswap_16(h->magic);
 	h->hdr_len = bswap_32(h->hdr_len);
@@ -219,66 +276,115 @@ static void btf_bswap_hdr(struct btf_header *h)
 	h->type_len = bswap_32(h->type_len);
 	h->str_off = bswap_32(h->str_off);
 	h->str_len = bswap_32(h->str_len);
+	/* May be operating on raw data with hdr_len that does not include below fields */
+	if (hdr_len >= sizeof(struct btf_header)) {
+		h->layout_off = bswap_32(h->layout_off);
+		h->layout_len = bswap_32(h->layout_len);
+	}
 }
 
 static int btf_parse_hdr(struct btf *btf)
 {
-	struct btf_header *hdr = btf->hdr;
-	__u32 meta_left;
+	struct btf_header *hdr = btf->raw_data;
+	__u32 hdr_len, meta_left;
 
-	if (btf->raw_size < sizeof(struct btf_header)) {
+	if (btf->raw_size < offsetofend(struct btf_header, str_len)) {
 		pr_debug("BTF header not found\n");
 		return -EINVAL;
 	}
 
+	hdr_len = hdr->hdr_len;
+
 	if (hdr->magic == bswap_16(BTF_MAGIC)) {
 		btf->swapped_endian = true;
-		if (bswap_32(hdr->hdr_len) != sizeof(struct btf_header)) {
+		hdr_len = bswap_32(hdr->hdr_len);
+		if (hdr_len < offsetofend(struct btf_header, str_len)) {
 			pr_warn("Can't load BTF with non-native endianness due to unsupported header length %u\n",
-				bswap_32(hdr->hdr_len));
+				hdr_len);
 			return -ENOTSUP;
 		}
-		btf_bswap_hdr(hdr);
 	} else if (hdr->magic != BTF_MAGIC) {
 		pr_debug("Invalid BTF magic: %x\n", hdr->magic);
 		return -EINVAL;
 	}
 
-	if (btf->raw_size < hdr->hdr_len) {
+	if (btf->raw_size < hdr_len) {
 		pr_debug("BTF header len %u larger than data size %u\n",
-			 hdr->hdr_len, btf->raw_size);
+			 hdr_len, btf->raw_size);
 		return -EINVAL;
 	}
 
-	meta_left = btf->raw_size - hdr->hdr_len;
-	if (meta_left < (long long)hdr->str_off + hdr->str_len) {
+	if (btf->swapped_endian)
+		btf_bswap_hdr(hdr, hdr_len);
+
+	memcpy(&btf->hdr, hdr, min((size_t)hdr_len, sizeof(struct btf_header)));
+
+	/* If unknown header data is found, modification is prohibited in
+	 * btf_ensure_modifiable().
+	 */
+	if (hdr_len > sizeof(struct btf_header)) {
+		__u8 *h = (__u8 *)hdr;
+		__u32 i;
+
+		for (i = sizeof(struct btf_header); i < hdr_len; i++) {
+			if (!h[i])
+				continue;
+			btf->has_hdr_extra = true;
+			pr_debug("Unknown BTF header data at offset %u; modification is disallowed\n",
+				 i);
+			break;
+		}
+	}
+
+	meta_left = btf->raw_size - hdr_len;
+	if (meta_left < (long long)btf->hdr.str_off + btf->hdr.str_len) {
 		pr_debug("Invalid BTF total size: %u\n", btf->raw_size);
 		return -EINVAL;
 	}
 
-	if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) {
+	if ((long long)btf->hdr.type_off + btf->hdr.type_len > btf->hdr.str_off) {
 		pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n",
-			 hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len);
+			 btf->hdr.type_off, btf->hdr.type_len, btf->hdr.str_off,
+			 btf->hdr.str_len);
 		return -EINVAL;
 	}
 
-	if (hdr->type_off % 4) {
+	if (btf->hdr.type_off % 4) {
 		pr_debug("BTF type section is not aligned to 4 bytes\n");
 		return -EINVAL;
 	}
 
+	if (btf->hdr.layout_len == 0)
+		return 0;
+
+	/* optional layout section sits between types and strings */
+	if (btf->hdr.layout_off % 4) {
+		pr_debug("BTF layout section is not aligned to 4 bytes\n");
+		return -EINVAL;
+	}
+	if (btf->hdr.layout_off < (long long)btf->hdr.type_off + btf->hdr.type_len) {
+		pr_debug("Invalid BTF data sections layout: type data at %u + %u,  layout data at %u + %u\n",
+			 btf->hdr.type_off, btf->hdr.type_len,
+			 btf->hdr.layout_off, btf->hdr.layout_len);
+		return -EINVAL;
+	}
+	if ((long long)btf->hdr.layout_off + btf->hdr.layout_len > btf->hdr.str_off ||
+	    btf->hdr.layout_off > btf->hdr.str_off) {
+		pr_debug("Invalid BTF data sections layout: layout data at %u + %u, strings data at %u\n",
+			 btf->hdr.layout_off, btf->hdr.layout_len, btf->hdr.str_off);
+		return -EINVAL;
+	}
 	return 0;
 }
 
 static int btf_parse_str_sec(struct btf *btf)
 {
-	const struct btf_header *hdr = btf->hdr;
 	const char *start = btf->strs_data;
-	const char *end = start + btf->hdr->str_len;
+	const char *end = start + btf->hdr.str_len;
 
-	if (btf->base_btf && hdr->str_len == 0)
+	if (btf->base_btf && btf->hdr.str_len == 0)
 		return 0;
-	if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) {
+	if (!btf->hdr.str_len || btf->hdr.str_len - 1 > BTF_MAX_STR_OFFSET || end[-1]) {
 		pr_debug("Invalid BTF string section\n");
 		return -EINVAL;
 	}
@@ -289,7 +395,63 @@ static int btf_parse_str_sec(struct btf *btf)
 	return 0;
 }
 
-static int btf_type_size(const struct btf_type *t)
+static int btf_parse_layout_sec(struct btf *btf)
+{
+	if (!btf->hdr.layout_len)
+		return 0;
+
+	if (btf->hdr.layout_len % sizeof(struct btf_layout) != 0) {
+		pr_debug("Invalid BTF kind layout section\n");
+		return -EINVAL;
+	}
+	btf->layout = btf->raw_data + btf->hdr.hdr_len + btf->hdr.layout_off;
+
+	if (btf->swapped_endian) {
+		struct btf_layout *l, *end = btf->layout + btf->hdr.layout_len;
+
+		for (l = btf->layout; l < end; l++)
+			l->flags = bswap_16(l->flags);
+	}
+
+	return 0;
+}
+
+/* for unknown kinds, consult kind layout. */
+static int btf_type_size_unknown(const struct btf *btf, const struct btf_type *t)
+{
+	__u32 l_cnt = btf->hdr.layout_len / sizeof(struct btf_layout);
+	struct btf_layout *l = btf->layout;
+	__u16 vlen = btf_vlen(t);
+	__u32 kind = btf_kind(t);
+
+	/* Fall back to base BTF if needed as they share layout information */
+	if (!l) {
+		struct btf *base_btf = btf->base_btf;
+
+		if (base_btf) {
+			l = base_btf->layout;
+			l_cnt = base_btf->hdr.layout_len / sizeof(struct btf_layout);
+		}
+	}
+	if (!l || kind >= l_cnt) {
+		pr_debug("Unsupported BTF_KIND: %u\n", btf_kind(t));
+		return -EINVAL;
+	}
+	if (l[kind].info_sz % 4) {
+		pr_debug("Unsupported info_sz %u for kind %u\n",
+			  l[kind].info_sz, kind);
+		return -EINVAL;
+	}
+	if (l[kind].elem_sz % 4) {
+		pr_debug("Unsupported elem_sz %u for kind %u\n",
+			 l[kind].elem_sz, kind);
+		return -EINVAL;
+	}
+
+	return sizeof(struct btf_type) + l[kind].info_sz + vlen * l[kind].elem_sz;
+}
+
+static int btf_type_size(const struct btf *btf, const struct btf_type *t)
 {
 	const int base_size = sizeof(struct btf_type);
 	__u16 vlen = btf_vlen(t);
@@ -325,8 +487,7 @@ static int btf_type_size(const struct btf_type *t)
 	case BTF_KIND_DECL_TAG:
 		return base_size + sizeof(struct btf_decl_tag);
 	default:
-		pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
-		return -EINVAL;
+		return btf_type_size_unknown(btf, t);
 	}
 }
 
@@ -416,16 +577,15 @@ static int btf_bswap_type_rest(struct btf_type *t)
 
 static int btf_parse_type_sec(struct btf *btf)
 {
-	struct btf_header *hdr = btf->hdr;
 	void *next_type = btf->types_data;
-	void *end_type = next_type + hdr->type_len;
+	void *end_type = next_type + btf->hdr.type_len;
 	int err, type_size;
 
 	while (next_type + sizeof(struct btf_type) <= end_type) {
 		if (btf->swapped_endian)
 			btf_bswap_type_base(next_type);
 
-		type_size = btf_type_size(next_type);
+		type_size = btf_type_size(btf, next_type);
 		if (type_size < 0)
 			return type_size;
 		if (next_type + type_size > end_type) {
@@ -586,8 +746,12 @@ static int btf_validate_type(const struct btf *btf, const struct btf_type *t, __
 		break;
 	}
 	default:
-		pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind);
-		return -EINVAL;
+		/* Kind may be represented in kind layout information. */
+		if (btf_type_size_unknown(btf, t) < 0) {
+			pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind);
+			return -EINVAL;
+		}
+		break;
 	}
 	return 0;
 }
@@ -894,46 +1058,105 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
 	return type_id;
 }
 
-__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
+static void btf_check_sorted(struct btf *btf)
 {
-	__u32 i, nr_types = btf__type_cnt(btf);
+	__u32 i, n, named_start_id = 0;
 
-	if (!strcmp(type_name, "void"))
-		return 0;
+	n = btf__type_cnt(btf);
+	for (i = btf->start_id + 1; i < n; i++) {
+		struct btf_type *ta = btf_type_by_id(btf, i - 1);
+		struct btf_type *tb = btf_type_by_id(btf, i);
+		const char *na = btf__str_by_offset(btf, ta->name_off);
+		const char *nb = btf__str_by_offset(btf, tb->name_off);
 
-	for (i = 1; i < nr_types; i++) {
-		const struct btf_type *t = btf__type_by_id(btf, i);
-		const char *name = btf__name_by_offset(btf, t->name_off);
+		if (strcmp(na, nb) > 0)
+			return;
 
-		if (name && !strcmp(type_name, name))
-			return i;
+		if (named_start_id == 0 && na[0] != '\0')
+			named_start_id = i - 1;
+		if (named_start_id == 0 && nb[0] != '\0')
+			named_start_id = i;
 	}
 
-	return libbpf_err(-ENOENT);
+	if (named_start_id)
+		btf->named_start_id = named_start_id;
+}
+
+static __s32 btf_find_type_by_name_bsearch(const struct btf *btf, const char *name,
+					   __s32 start_id)
+{
+	const struct btf_type *t;
+	const char *tname;
+	__s32 l, r, m;
+
+	l = start_id;
+	r = btf__type_cnt(btf) - 1;
+	while (l <= r) {
+		m = l + (r - l) / 2;
+		t = btf_type_by_id(btf, m);
+		tname = btf__str_by_offset(btf, t->name_off);
+		if (strcmp(tname, name) >= 0) {
+			if (l == r)
+				return r;
+			r = m;
+		} else {
+			l = m + 1;
+		}
+	}
+
+	return btf__type_cnt(btf);
 }
 
 static __s32 btf_find_by_name_kind(const struct btf *btf, int start_id,
-				   const char *type_name, __u32 kind)
+				   const char *type_name, __s32 kind)
 {
-	__u32 i, nr_types = btf__type_cnt(btf);
+	__u32 nr_types = btf__type_cnt(btf);
+	const struct btf_type *t;
+	const char *tname;
+	__s32 id;
 
-	if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
-		return 0;
+	if (start_id < btf->start_id) {
+		id = btf_find_by_name_kind(btf->base_btf, start_id,
+					   type_name, kind);
+		if (id >= 0)
+			return id;
+		start_id = btf->start_id;
+	}
 
-	for (i = start_id; i < nr_types; i++) {
-		const struct btf_type *t = btf__type_by_id(btf, i);
-		const char *name;
+	if (kind == BTF_KIND_UNKN || strcmp(type_name, "void") == 0)
+		return 0;
 
-		if (btf_kind(t) != kind)
-			continue;
-		name = btf__name_by_offset(btf, t->name_off);
-		if (name && !strcmp(type_name, name))
-			return i;
+	if (btf->named_start_id > 0 && type_name[0]) {
+		start_id = max(start_id, btf->named_start_id);
+		id = btf_find_type_by_name_bsearch(btf, type_name, start_id);
+		for (; id < nr_types; id++) {
+			t = btf__type_by_id(btf, id);
+			tname = btf__str_by_offset(btf, t->name_off);
+			if (strcmp(tname, type_name) != 0)
+				return libbpf_err(-ENOENT);
+			if (kind < 0 || btf_kind(t) == kind)
+				return id;
+		}
+	} else {
+		for (id = start_id; id < nr_types; id++) {
+			t = btf_type_by_id(btf, id);
+			if (kind > 0 && btf_kind(t) != kind)
+				continue;
+			tname = btf__str_by_offset(btf, t->name_off);
+			if (strcmp(tname, type_name) == 0)
+				return id;
+		}
 	}
 
 	return libbpf_err(-ENOENT);
 }
 
+/* the kind value of -1 indicates that kind matching should be skipped */
+__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
+{
+	return btf_find_by_name_kind(btf, 1, type_name, -1);
+}
+
 __s32 btf__find_by_name_kind_own(const struct btf *btf, const char *type_name,
 				 __u32 kind)
 {
@@ -948,7 +1171,19 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
 
 static bool btf_is_modifiable(const struct btf *btf)
 {
-	return (void *)btf->hdr != btf->raw_data;
+	/* BTF is modifiable if split into multiple sections */
+	return btf->modifiable;
+}
+
+static void btf_free_raw_data(struct btf *btf)
+{
+	if (btf->raw_data_is_mmap) {
+		munmap(btf->raw_data, btf->raw_size);
+		btf->raw_data_is_mmap = false;
+	} else {
+		free(btf->raw_data);
+	}
+	btf->raw_data = NULL;
 }
 
 void btf__free(struct btf *btf)
@@ -961,16 +1196,16 @@ void btf__free(struct btf *btf)
 
 	if (btf_is_modifiable(btf)) {
 		/* if BTF was modified after loading, it will have a split
-		 * in-memory representation for header, types, and strings
+		 * in-memory representation for types, strings and layout
 		 * sections, so we need to free all of them individually. It
 		 * might still have a cached contiguous raw data present,
 		 * which will be unconditionally freed below.
 		 */
-		free(btf->hdr);
 		free(btf->types_data);
 		strset__free(btf->strs_set);
+		free(btf->layout);
 	}
-	free(btf->raw_data);
+	btf_free_raw_data(btf);
 	free(btf->raw_data_swapped);
 	free(btf->type_offs);
 	if (btf->owns_base)
@@ -978,8 +1213,11 @@ void btf__free(struct btf *btf)
 	free(btf);
 }
 
-static struct btf *btf_new_empty(struct btf *base_btf)
+static struct btf *btf_new_empty(struct btf_new_opts *opts)
 {
+	bool add_layout = OPTS_GET(opts, add_layout, false);
+	struct btf *base_btf = OPTS_GET(opts, base_btf, NULL);
+	struct btf_header *hdr;
 	struct btf *btf;
 
 	btf = calloc(1, sizeof(*btf));
@@ -992,30 +1230,47 @@ static struct btf *btf_new_empty(struct btf *base_btf)
 	btf->fd = -1;
 	btf->ptr_sz = sizeof(void *);
 	btf->swapped_endian = false;
+	btf->named_start_id = 0;
 
 	if (base_btf) {
 		btf->base_btf = base_btf;
 		btf->start_id = btf__type_cnt(base_btf);
-		btf->start_str_off = base_btf->hdr->str_len;
+		btf->start_str_off = base_btf->hdr.str_len + base_btf->start_str_off;
 		btf->swapped_endian = base_btf->swapped_endian;
 	}
 
 	/* +1 for empty string at offset 0 */
 	btf->raw_size = sizeof(struct btf_header) + (base_btf ? 0 : 1);
+	if (add_layout)
+		btf->raw_size += sizeof(layouts);
 	btf->raw_data = calloc(1, btf->raw_size);
 	if (!btf->raw_data) {
 		free(btf);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	btf->hdr = btf->raw_data;
-	btf->hdr->hdr_len = sizeof(struct btf_header);
-	btf->hdr->magic = BTF_MAGIC;
-	btf->hdr->version = BTF_VERSION;
+	hdr = btf->raw_data;
+	hdr->hdr_len = sizeof(struct btf_header);
+	hdr->magic = BTF_MAGIC;
+	hdr->version = BTF_VERSION;
+
+	btf->types_data = btf->raw_data + hdr->hdr_len;
+	btf->strs_data = btf->raw_data + hdr->hdr_len;
+	hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */
+
+	if (add_layout) {
+		hdr->layout_len = sizeof(layouts);
+		btf->layout = layouts;
+		/*
+		 * No need to swap endianness here as btf_get_raw_data()
+		 * will do this for us if btf->swapped_endian is true.
+		 */
+		memcpy(btf->raw_data + hdr->hdr_len, layouts, sizeof(layouts));
+		btf->strs_data += sizeof(layouts);
+		hdr->str_off += sizeof(layouts);
+	}
 
-	btf->types_data = btf->raw_data + btf->hdr->hdr_len;
-	btf->strs_data = btf->raw_data + btf->hdr->hdr_len;
-	btf->hdr->str_len = base_btf ? 0 : 1; /* empty string at offset 0 */
+	memcpy(&btf->hdr, hdr, sizeof(*hdr));
 
 	return btf;
 }
@@ -1027,10 +1282,22 @@ struct btf *btf__new_empty(void)
 
 struct btf *btf__new_empty_split(struct btf *base_btf)
 {
-	return libbpf_ptr(btf_new_empty(base_btf));
+	LIBBPF_OPTS(btf_new_opts, opts);
+
+	opts.base_btf = base_btf;
+
+	return libbpf_ptr(btf_new_empty(&opts));
 }
 
-static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
+struct btf *btf__new_empty_opts(struct btf_new_opts *opts)
+{
+	if (!OPTS_VALID(opts, btf_new_opts))
+		return libbpf_err_ptr(-EINVAL);
+
+	return libbpf_ptr(btf_new_empty(opts));
+}
+
+static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap)
 {
 	struct btf *btf;
 	int err;
@@ -1043,34 +1310,42 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
 	btf->start_id = 1;
 	btf->start_str_off = 0;
 	btf->fd = -1;
+	btf->named_start_id = 0;
 
 	if (base_btf) {
 		btf->base_btf = base_btf;
 		btf->start_id = btf__type_cnt(base_btf);
-		btf->start_str_off = base_btf->hdr->str_len;
+		btf->start_str_off = base_btf->hdr.str_len + base_btf->start_str_off;
 	}
 
-	btf->raw_data = malloc(size);
-	if (!btf->raw_data) {
-		err = -ENOMEM;
-		goto done;
+	if (is_mmap) {
+		btf->raw_data = (void *)data;
+		btf->raw_data_is_mmap = true;
+	} else {
+		btf->raw_data = malloc(size);
+		if (!btf->raw_data) {
+			err = -ENOMEM;
+			goto done;
+		}
+		memcpy(btf->raw_data, data, size);
 	}
-	memcpy(btf->raw_data, data, size);
+
 	btf->raw_size = size;
 
-	btf->hdr = btf->raw_data;
 	err = btf_parse_hdr(btf);
 	if (err)
 		goto done;
 
-	btf->strs_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->str_off;
-	btf->types_data = btf->raw_data + btf->hdr->hdr_len + btf->hdr->type_off;
+	btf->strs_data = btf->raw_data + btf->hdr.hdr_len + btf->hdr.str_off;
+	btf->types_data = btf->raw_data + btf->hdr.hdr_len + btf->hdr.type_off;
 
 	err = btf_parse_str_sec(btf);
+	err = err ?: btf_parse_layout_sec(btf);
 	err = err ?: btf_parse_type_sec(btf);
 	err = err ?: btf_sanity_check(btf);
 	if (err)
 		goto done;
+	btf_check_sorted(btf);
 
 done:
 	if (err) {
@@ -1083,12 +1358,12 @@ done:
 
 struct btf *btf__new(const void *data, __u32 size)
 {
-	return libbpf_ptr(btf_new(data, size, NULL));
+	return libbpf_ptr(btf_new(data, size, NULL, false));
 }
 
 struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)
 {
-	return libbpf_ptr(btf_new(data, size, base_btf));
+	return libbpf_ptr(btf_new(data, size, base_btf, false));
 }
 
 struct btf_elf_secs {
@@ -1148,6 +1423,12 @@ static int btf_find_elf_sections(Elf *elf, const char *path, struct btf_elf_secs
 		else
 			continue;
 
+		if (sh.sh_type != SHT_PROGBITS) {
+			pr_warn("unexpected section type (%d) of section(%d, %s) from %s\n",
+				sh.sh_type, idx, name, path);
+			goto err;
+		}
+
 		data = elf_getdata(scn, 0);
 		if (!data) {
 			pr_warn("failed to get section(%d, %s) data from %s\n",
@@ -1203,7 +1484,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
 
 	if (secs.btf_base_data) {
 		dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size,
-					NULL);
+					NULL, false);
 		if (IS_ERR(dist_base_btf)) {
 			err = PTR_ERR(dist_base_btf);
 			dist_base_btf = NULL;
@@ -1212,7 +1493,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
 	}
 
 	btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size,
-		      dist_base_btf ?: base_btf);
+		      dist_base_btf ?: base_btf, false);
 	if (IS_ERR(btf)) {
 		err = PTR_ERR(btf);
 		goto done;
@@ -1329,7 +1610,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
 	}
 
 	/* finally parse BTF data */
-	btf = btf_new(data, sz, base_btf);
+	btf = btf_new(data, sz, base_btf, false);
 
 err_out:
 	free(data);
@@ -1348,6 +1629,37 @@ struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
 	return libbpf_ptr(btf_parse_raw(path, base_btf));
 }
 
+static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf)
+{
+	struct stat st;
+	void *data;
+	struct btf *btf;
+	int fd, err;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return ERR_PTR(-errno);
+
+	if (fstat(fd, &st) < 0) {
+		err = -errno;
+		close(fd);
+		return ERR_PTR(err);
+	}
+
+	data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	err = -errno;
+	close(fd);
+
+	if (data == MAP_FAILED)
+		return ERR_PTR(err);
+
+	btf = btf_new(data, st.st_size, base_btf, true);
+	if (IS_ERR(btf))
+		munmap(data, st.st_size);
+
+	return btf;
+}
+
 static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
 {
 	struct btf *btf;
@@ -1480,7 +1792,7 @@ static const void *btf_strs_data(const struct btf *btf)
 
 static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian)
 {
-	struct btf_header *hdr = btf->hdr;
+	const struct btf_header *hdr = &btf->hdr;
 	struct btf_type *t;
 	void *data, *p;
 	__u32 data_sz;
@@ -1493,14 +1805,17 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi
 	}
 
 	data_sz = hdr->hdr_len + hdr->type_len + hdr->str_len;
+	if (btf->layout)
+		data_sz += hdr->layout_len;
+
 	data = calloc(1, data_sz);
 	if (!data)
 		return NULL;
 	p = data;
 
-	memcpy(p, hdr, hdr->hdr_len);
+	memcpy(p, hdr, min((__u32)sizeof(struct btf_header), hdr->hdr_len));
 	if (swap_endian)
-		btf_bswap_hdr(p);
+		btf_bswap_hdr(p, hdr->hdr_len);
 	p += hdr->hdr_len;
 
 	memcpy(p, btf->types_data, hdr->type_len);
@@ -1518,8 +1833,18 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi
 	}
 	p += hdr->type_len;
 
+	if (btf->layout) {
+		memcpy(p, btf->layout, hdr->layout_len);
+		if (swap_endian) {
+			struct btf_layout *l, *end = p + hdr->layout_len;
+
+			for (l = p; l < end ; l++)
+				l->flags = bswap_16(l->flags);
+		}
+		p += hdr->layout_len;
+	}
+
 	memcpy(p, btf_strs_data(btf), hdr->str_len);
-	p += hdr->str_len;
 
 	*size = data_sz;
 	return data;
@@ -1554,7 +1879,7 @@ const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
 {
 	if (offset < btf->start_str_off)
 		return btf__str_by_offset(btf->base_btf, offset);
-	else if (offset - btf->start_str_off < btf->hdr->str_len)
+	else if (offset - btf->start_str_off < btf->hdr.str_len)
 		return btf_strs_data(btf) + (offset - btf->start_str_off);
 	else
 		return errno = EINVAL, NULL;
@@ -1612,19 +1937,25 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf)
 		goto exit_free;
 	}
 
-	btf = btf_new(ptr, btf_info.btf_size, base_btf);
+	btf = btf_new(ptr, btf_info.btf_size, base_btf, false);
 
 exit_free:
 	free(ptr);
 	return btf;
 }
 
-struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
+struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd)
 {
 	struct btf *btf;
 	int btf_fd;
+	LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts);
 
-	btf_fd = bpf_btf_get_fd_by_id(id);
+	if (token_fd) {
+		opts.open_flags |= BPF_F_TOKEN_FD;
+		opts.token_fd = token_fd;
+	}
+
+	btf_fd = bpf_btf_get_fd_by_id_opts(id, &opts);
 	if (btf_fd < 0)
 		return libbpf_err_ptr(-errno);
 
@@ -1634,6 +1965,11 @@ struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
 	return libbpf_ptr(btf);
 }
 
+struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
+{
+	return btf_load_from_kernel(id, base_btf, 0);
+}
+
 struct btf *btf__load_from_kernel_by_id(__u32 id)
 {
 	return btf__load_from_kernel_by_id_split(id, NULL);
@@ -1641,23 +1977,22 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)
 
 static void btf_invalidate_raw_data(struct btf *btf)
 {
-	if (btf->raw_data) {
-		free(btf->raw_data);
-		btf->raw_data = NULL;
-	}
+	if (btf->raw_data)
+		btf_free_raw_data(btf);
 	if (btf->raw_data_swapped) {
 		free(btf->raw_data_swapped);
 		btf->raw_data_swapped = NULL;
 	}
+	btf->named_start_id = 0;
 }
 
 /* Ensure BTF is ready to be modified (by splitting into a three memory
- * regions for header, types, and strings). Also invalidate cached
+ * regions for types, strings and layout. Also invalidate cached
  * raw_data, if any.
  */
 static int btf_ensure_modifiable(struct btf *btf)
 {
-	void *hdr, *types;
+	void *types, *layout = NULL;
 	struct strset *set = NULL;
 	int err = -ENOMEM;
 
@@ -1667,45 +2002,58 @@ static int btf_ensure_modifiable(struct btf *btf)
 		return 0;
 	}
 
-	/* split raw data into three memory regions */
-	hdr = malloc(btf->hdr->hdr_len);
-	types = malloc(btf->hdr->type_len);
-	if (!hdr || !types)
+	if (btf->has_hdr_extra) {
+		/* Additional BTF header data was found; not safe to modify. */
+		return -EOPNOTSUPP;
+	}
+
+	/* split raw data into memory regions; btf->hdr is done already. */
+	types = malloc(btf->hdr.type_len);
+	if (!types)
 		goto err_out;
+	memcpy(types, btf->types_data, btf->hdr.type_len);
 
-	memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
-	memcpy(types, btf->types_data, btf->hdr->type_len);
+	if (btf->hdr.layout_len) {
+		layout = malloc(btf->hdr.layout_len);
+		if (!layout)
+			goto err_out;
+		memcpy(layout, btf->raw_data + btf->hdr.hdr_len + btf->hdr.layout_off,
+		       btf->hdr.layout_len);
+	}
 
 	/* build lookup index for all strings */
-	set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr->str_len);
+	set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr.str_len);
 	if (IS_ERR(set)) {
 		err = PTR_ERR(set);
 		goto err_out;
 	}
 
 	/* only when everything was successful, update internal state */
-	btf->hdr = hdr;
 	btf->types_data = types;
-	btf->types_data_cap = btf->hdr->type_len;
+	btf->types_data_cap = btf->hdr.type_len;
 	btf->strs_data = NULL;
 	btf->strs_set = set;
+	if (layout)
+		btf->layout = layout;
 	/* if BTF was created from scratch, all strings are guaranteed to be
 	 * unique and deduplicated
 	 */
-	if (btf->hdr->str_len == 0)
+	if (btf->hdr.str_len == 0)
 		btf->strs_deduped = true;
-	if (!btf->base_btf && btf->hdr->str_len == 1)
+	if (!btf->base_btf && btf->hdr.str_len == 1)
 		btf->strs_deduped = true;
 
 	/* invalidate raw_data representation */
 	btf_invalidate_raw_data(btf);
 
+	btf->modifiable = true;
+
 	return 0;
 
 err_out:
 	strset__free(set);
-	free(hdr);
 	free(types);
+	free(layout);
 	return err;
 }
 
@@ -1718,6 +2066,7 @@ err_out:
 int btf__find_str(struct btf *btf, const char *s)
 {
 	int off;
+	int err;
 
 	if (btf->base_btf) {
 		off = btf__find_str(btf->base_btf, s);
@@ -1726,8 +2075,9 @@ int btf__find_str(struct btf *btf, const char *s)
 	}
 
 	/* BTF needs to be in a modifiable state to build string lookup index */
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	off = strset__find_str(btf->strs_set, s);
 	if (off < 0)
@@ -1744,6 +2094,7 @@ int btf__find_str(struct btf *btf, const char *s)
 int btf__add_str(struct btf *btf, const char *s)
 {
 	int off;
+	int err;
 
 	if (btf->base_btf) {
 		off = btf__find_str(btf->base_btf, s);
@@ -1751,14 +2102,15 @@ int btf__add_str(struct btf *btf, const char *s)
 			return off;
 	}
 
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	off = strset__add_str(btf->strs_set, s);
 	if (off < 0)
 		return libbpf_err(off);
 
-	btf->hdr->str_len = strset__data_size(btf->strs_set);
+	btf->hdr.str_len = strset__data_size(btf->strs_set);
 
 	return btf->start_str_off + off;
 }
@@ -1766,7 +2118,7 @@ int btf__add_str(struct btf *btf, const char *s)
 static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
 {
 	return libbpf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
-			      btf->hdr->type_len, UINT_MAX, add_sz);
+			      btf->hdr.type_len, UINT_MAX, add_sz);
 }
 
 static void btf_type_inc_vlen(struct btf_type *t)
@@ -1774,16 +2126,31 @@ static void btf_type_inc_vlen(struct btf_type *t)
 	t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, btf_kflag(t));
 }
 
+static void btf_hdr_update_type_len(struct btf *btf, int new_len)
+{
+	btf->hdr.type_len = new_len;
+	if (btf->layout) {
+		btf->hdr.layout_off = btf->hdr.type_off + new_len;
+		btf->hdr.str_off = btf->hdr.layout_off + btf->hdr.layout_len;
+	} else {
+		btf->hdr.str_off = btf->hdr.type_off + new_len;
+	}
+}
+
+static void btf_hdr_update_str_len(struct btf *btf, int new_len)
+{
+	btf->hdr.str_len = new_len;
+}
+
 static int btf_commit_type(struct btf *btf, int data_sz)
 {
 	int err;
 
-	err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
+	err = btf_add_type_idx_entry(btf, btf->hdr.type_len);
 	if (err)
 		return libbpf_err(err);
 
-	btf->hdr->type_len += data_sz;
-	btf->hdr->str_off += data_sz;
+	btf_hdr_update_type_len(btf, btf->hdr.type_len + data_sz);
 	btf->nr_types++;
 	return btf->start_id + btf->nr_types - 1;
 }
@@ -1832,13 +2199,14 @@ static int btf_add_type(struct btf_pipe *p, const struct btf_type *src_type)
 	__u32 *str_off;
 	int sz, err;
 
-	sz = btf_type_size(src_type);
+	sz = btf_type_size(p->src, src_type);
 	if (sz < 0)
 		return libbpf_err(sz);
 
 	/* deconstruct BTF, if necessary, and invalidate raw_data */
-	if (btf_ensure_modifiable(p->dst))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(p->dst);
+	if (err)
+		return libbpf_err(err);
 
 	t = btf_add_type_mem(p->dst, sz);
 	if (!t)
@@ -1873,24 +2241,31 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
 {
 	struct btf_pipe p = { .src = src_btf, .dst = btf };
 	int data_sz, sz, cnt, i, err, old_strs_len;
+	__u32 src_start_id;
 	__u32 *off;
 	void *t;
 
-	/* appending split BTF isn't supported yet */
-	if (src_btf->base_btf)
-		return libbpf_err(-ENOTSUP);
+	/*
+	 * When appending split BTF, the destination must share the same base
+	 * BTF so that base type ID references remain valid.
+	 */
+	if (src_btf->base_btf && src_btf->base_btf != btf->base_btf)
+		return libbpf_err(-EOPNOTSUPP);
+
+	src_start_id = src_btf->base_btf ? btf__type_cnt(src_btf->base_btf) : 1;
 
 	/* deconstruct BTF, if necessary, and invalidate raw_data */
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	/* remember original strings section size if we have to roll back
 	 * partial strings section changes
 	 */
-	old_strs_len = btf->hdr->str_len;
+	old_strs_len = btf->hdr.str_len;
 
-	data_sz = src_btf->hdr->type_len;
-	cnt = btf__type_cnt(src_btf) - 1;
+	data_sz = src_btf->hdr.type_len;
+	cnt = src_btf->nr_types;
 
 	/* pre-allocate enough memory for new types */
 	t = btf_add_type_mem(btf, data_sz);
@@ -1914,7 +2289,7 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
 		struct btf_field_iter it;
 		__u32 *type_id, *str_off;
 
-		sz = btf_type_size(t);
+		sz = btf_type_size(src_btf, t);
 		if (sz < 0) {
 			/* unlikely, has to be corrupted src_btf */
 			err = sz;
@@ -1929,6 +2304,9 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
 		if (err)
 			goto err_out;
 		while ((str_off = btf_field_iter_next(&it))) {
+			/* don't remap strings from shared base BTF */
+			if (*str_off < src_btf->start_str_off)
+				continue;
 			err = btf_rewrite_str(&p, str_off);
 			if (err)
 				goto err_out;
@@ -1943,11 +2321,11 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
 			if (!*type_id) /* nothing to do for VOID references */
 				continue;
 
-			/* we haven't updated btf's type count yet, so
-			 * btf->start_id + btf->nr_types - 1 is the type ID offset we should
-			 * add to all newly added BTF types
-			 */
-			*type_id += btf->start_id + btf->nr_types - 1;
+			/* don't remap types from shared base BTF */
+			if (*type_id < src_start_id)
+				continue;
+
+			*type_id += btf->start_id + btf->nr_types - src_start_id;
 		}
 
 		/* go to next type data and type offset index entry */
@@ -1963,8 +2341,7 @@ int btf__add_btf(struct btf *btf, const struct btf *src_btf)
 	 * update type count and various internal offsets and sizes to
 	 * "commit" the changes and made them visible to the outside world.
 	 */
-	btf->hdr->type_len += data_sz;
-	btf->hdr->str_off += data_sz;
+	btf_hdr_update_type_len(btf, btf->hdr.type_len + data_sz);
 	btf->nr_types += cnt;
 
 	hashmap__free(p.str_off_map);
@@ -1975,13 +2352,14 @@ err_out:
 	/* zero out preallocated memory as if it was just allocated with
 	 * libbpf_add_mem()
 	 */
-	memset(btf->types_data + btf->hdr->type_len, 0, data_sz);
-	memset(btf->strs_data + old_strs_len, 0, btf->hdr->str_len - old_strs_len);
+	memset(btf->types_data + btf->hdr.type_len, 0, data_sz);
+	if (btf->strs_data)
+		memset(btf->strs_data + old_strs_len, 0, btf->hdr.str_len - old_strs_len);
 
 	/* and now restore original strings section size; types data size
 	 * wasn't modified, so doesn't need restoring, see big comment above
 	 */
-	btf->hdr->str_len = old_strs_len;
+	btf_hdr_update_str_len(btf, old_strs_len);
 
 	hashmap__free(p.str_off_map);
 
@@ -2001,9 +2379,10 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
 {
 	struct btf_type *t;
 	int sz, name_off;
+	int err;
 
 	/* non-empty name */
-	if (!name || !name[0])
+	if (str_is_empty(name))
 		return libbpf_err(-EINVAL);
 	/* byte_sz must be power of 2 */
 	if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
@@ -2012,8 +2391,9 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
 		return libbpf_err(-EINVAL);
 
 	/* deconstruct BTF, if necessary, and invalidate raw_data */
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_type) + sizeof(int);
 	t = btf_add_type_mem(btf, sz);
@@ -2049,9 +2429,10 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
 {
 	struct btf_type *t;
 	int sz, name_off;
+	int err;
 
 	/* non-empty name */
-	if (!name || !name[0])
+	if (str_is_empty(name))
 		return libbpf_err(-EINVAL);
 
 	/* byte_sz must be one of the explicitly allowed values */
@@ -2059,8 +2440,9 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
 	    byte_sz != 16)
 		return libbpf_err(-EINVAL);
 
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_type);
 	t = btf_add_type_mem(btf, sz);
@@ -2090,30 +2472,32 @@ static int validate_type_id(int id)
 }
 
 /* generic append function for PTR, TYPEDEF, CONST/VOLATILE/RESTRICT */
-static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id)
+static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref_type_id, int kflag)
 {
 	struct btf_type *t;
 	int sz, name_off = 0;
+	int err;
 
 	if (validate_type_id(ref_type_id))
 		return libbpf_err(-EINVAL);
 
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_type);
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
 		return libbpf_err(-ENOMEM);
 
-	if (name && name[0]) {
+	if (!str_is_empty(name)) {
 		name_off = btf__add_str(btf, name);
 		if (name_off < 0)
 			return name_off;
 	}
 
 	t->name_off = name_off;
-	t->info = btf_type_info(kind, 0, 0);
+	t->info = btf_type_info(kind, 0, kflag);
 	t->type = ref_type_id;
 
 	return btf_commit_type(btf, sz);
@@ -2128,7 +2512,7 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
  */
 int btf__add_ptr(struct btf *btf, int ref_type_id)
 {
-	return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id);
+	return btf_add_ref_kind(btf, BTF_KIND_PTR, NULL, ref_type_id, 0);
 }
 
 /*
@@ -2144,13 +2528,15 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n
 {
 	struct btf_type *t;
 	struct btf_array *a;
+	int err;
 	int sz;
 
 	if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
 		return libbpf_err(-EINVAL);
 
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_type) + sizeof(struct btf_array);
 	t = btf_add_type_mem(btf, sz);
@@ -2174,16 +2560,18 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
 {
 	struct btf_type *t;
 	int sz, name_off = 0;
+	int err;
 
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_type);
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
 		return libbpf_err(-ENOMEM);
 
-	if (name && name[0]) {
+	if (!str_is_empty(name)) {
 		name_off = btf__add_str(btf, name);
 		if (name_off < 0)
 			return name_off;
@@ -2256,6 +2644,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
 	struct btf_member *m;
 	bool is_bitfield;
 	int sz, name_off = 0;
+	int err;
 
 	/* last type should be union/struct */
 	if (btf->nr_types == 0)
@@ -2276,15 +2665,16 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
 		return libbpf_err(-EINVAL);
 
 	/* decompose and invalidate raw data */
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_member);
 	m = btf_add_type_mem(btf, sz);
 	if (!m)
 		return libbpf_err(-ENOMEM);
 
-	if (name && name[0]) {
+	if (!str_is_empty(name)) {
 		name_off = btf__add_str(btf, name);
 		if (name_off < 0)
 			return name_off;
@@ -2299,8 +2689,7 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
 	/* update parent type's vlen and kflag */
 	t->info = btf_type_info(btf_kind(t), btf_vlen(t) + 1, is_bitfield || btf_kflag(t));
 
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
+	btf_hdr_update_type_len(btf, btf->hdr.type_len + sz);
 	return 0;
 }
 
@@ -2309,20 +2698,22 @@ static int btf_add_enum_common(struct btf *btf, const char *name, __u32 byte_sz,
 {
 	struct btf_type *t;
 	int sz, name_off = 0;
+	int err;
 
 	/* byte_sz must be power of 2 */
 	if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
 		return libbpf_err(-EINVAL);
 
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_type);
 	t = btf_add_type_mem(btf, sz);
 	if (!t)
 		return libbpf_err(-ENOMEM);
 
-	if (name && name[0]) {
+	if (!str_is_empty(name)) {
 		name_off = btf__add_str(btf, name);
 		if (name_off < 0)
 			return name_off;
@@ -2371,6 +2762,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
 	struct btf_type *t;
 	struct btf_enum *v;
 	int sz, name_off;
+	int err;
 
 	/* last type should be BTF_KIND_ENUM */
 	if (btf->nr_types == 0)
@@ -2380,14 +2772,15 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
 		return libbpf_err(-EINVAL);
 
 	/* non-empty name */
-	if (!name || !name[0])
+	if (str_is_empty(name))
 		return libbpf_err(-EINVAL);
 	if (value < INT_MIN || value > UINT_MAX)
 		return libbpf_err(-E2BIG);
 
 	/* decompose and invalidate raw data */
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_enum);
 	v = btf_add_type_mem(btf, sz);
@@ -2409,8 +2802,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
 	if (value < 0)
 		t->info = btf_type_info(btf_kind(t), btf_vlen(t), true);
 
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
+	btf_hdr_update_type_len(btf, btf->hdr.type_len + sz);
 	return 0;
 }
 
@@ -2448,6 +2840,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value)
 	struct btf_enum64 *v;
 	struct btf_type *t;
 	int sz, name_off;
+	int err;
 
 	/* last type should be BTF_KIND_ENUM64 */
 	if (btf->nr_types == 0)
@@ -2457,12 +2850,13 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value)
 		return libbpf_err(-EINVAL);
 
 	/* non-empty name */
-	if (!name || !name[0])
+	if (str_is_empty(name))
 		return libbpf_err(-EINVAL);
 
 	/* decompose and invalidate raw data */
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_enum64);
 	v = btf_add_type_mem(btf, sz);
@@ -2481,8 +2875,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value)
 	t = btf_last_type(btf);
 	btf_type_inc_vlen(t);
 
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
+	btf_hdr_update_type_len(btf, btf->hdr.type_len + sz);
 	return 0;
 }
 
@@ -2497,7 +2890,7 @@ int btf__add_enum64_value(struct btf *btf, const char *name, __u64 value)
  */
 int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
 {
-	if (!name || !name[0])
+	if (str_is_empty(name))
 		return libbpf_err(-EINVAL);
 
 	switch (fwd_kind) {
@@ -2506,7 +2899,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
 		struct btf_type *t;
 		int id;
 
-		id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0);
+		id = btf_add_ref_kind(btf, BTF_KIND_FWD, name, 0, 0);
 		if (id <= 0)
 			return id;
 		t = btf_type_by_id(btf, id);
@@ -2533,10 +2926,10 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
  */
 int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
 {
-	if (!name || !name[0])
+	if (str_is_empty(name))
 		return libbpf_err(-EINVAL);
 
-	return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
+	return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id, 0);
 }
 
 /*
@@ -2548,7 +2941,7 @@ int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
  */
 int btf__add_volatile(struct btf *btf, int ref_type_id)
 {
-	return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id);
+	return btf_add_ref_kind(btf, BTF_KIND_VOLATILE, NULL, ref_type_id, 0);
 }
 
 /*
@@ -2560,7 +2953,7 @@ int btf__add_volatile(struct btf *btf, int ref_type_id)
  */
 int btf__add_const(struct btf *btf, int ref_type_id)
 {
-	return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id);
+	return btf_add_ref_kind(btf, BTF_KIND_CONST, NULL, ref_type_id, 0);
 }
 
 /*
@@ -2572,7 +2965,7 @@ int btf__add_const(struct btf *btf, int ref_type_id)
  */
 int btf__add_restrict(struct btf *btf, int ref_type_id)
 {
-	return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id);
+	return btf_add_ref_kind(btf, BTF_KIND_RESTRICT, NULL, ref_type_id, 0);
 }
 
 /*
@@ -2585,10 +2978,27 @@ int btf__add_restrict(struct btf *btf, int ref_type_id)
  */
 int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id)
 {
-	if (!value || !value[0])
+	if (str_is_empty(value))
 		return libbpf_err(-EINVAL);
 
-	return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id);
+	return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 0);
+}
+
+/*
+ * Append new BTF_KIND_TYPE_TAG type with:
+ *   - *value*, non-empty/non-NULL tag value;
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ * Set info->kflag to 1, indicating this tag is an __attribute__
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id)
+{
+	if (str_is_empty(value))
+		return libbpf_err(-EINVAL);
+
+	return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id, 1);
 }
 
 /*
@@ -2604,13 +3014,13 @@ int btf__add_func(struct btf *btf, const char *name,
 {
 	int id;
 
-	if (!name || !name[0])
+	if (str_is_empty(name))
 		return libbpf_err(-EINVAL);
 	if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL &&
 	    linkage != BTF_FUNC_EXTERN)
 		return libbpf_err(-EINVAL);
 
-	id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
+	id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id, 0);
 	if (id > 0) {
 		struct btf_type *t = btf_type_by_id(btf, id);
 
@@ -2634,13 +3044,15 @@ int btf__add_func(struct btf *btf, const char *name,
 int btf__add_func_proto(struct btf *btf, int ret_type_id)
 {
 	struct btf_type *t;
+	int err;
 	int sz;
 
 	if (validate_type_id(ret_type_id))
 		return libbpf_err(-EINVAL);
 
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_type);
 	t = btf_add_type_mem(btf, sz);
@@ -2670,6 +3082,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
 	struct btf_type *t;
 	struct btf_param *p;
 	int sz, name_off = 0;
+	int err;
 
 	if (validate_type_id(type_id))
 		return libbpf_err(-EINVAL);
@@ -2682,15 +3095,16 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
 		return libbpf_err(-EINVAL);
 
 	/* decompose and invalidate raw data */
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_param);
 	p = btf_add_type_mem(btf, sz);
 	if (!p)
 		return libbpf_err(-ENOMEM);
 
-	if (name && name[0]) {
+	if (!str_is_empty(name)) {
 		name_off = btf__add_str(btf, name);
 		if (name_off < 0)
 			return name_off;
@@ -2703,8 +3117,7 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
 	t = btf_last_type(btf);
 	btf_type_inc_vlen(t);
 
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
+	btf_hdr_update_type_len(btf, btf->hdr.type_len + sz);
 	return 0;
 }
 
@@ -2723,9 +3136,10 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
 	struct btf_type *t;
 	struct btf_var *v;
 	int sz, name_off;
+	int err;
 
 	/* non-empty name */
-	if (!name || !name[0])
+	if (str_is_empty(name))
 		return libbpf_err(-EINVAL);
 	if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED &&
 	    linkage != BTF_VAR_GLOBAL_EXTERN)
@@ -2734,8 +3148,9 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
 		return libbpf_err(-EINVAL);
 
 	/* deconstruct BTF, if necessary, and invalidate raw_data */
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_type) + sizeof(struct btf_var);
 	t = btf_add_type_mem(btf, sz);
@@ -2772,13 +3187,15 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
 {
 	struct btf_type *t;
 	int sz, name_off;
+	int err;
 
 	/* non-empty name */
-	if (!name || !name[0])
+	if (str_is_empty(name))
 		return libbpf_err(-EINVAL);
 
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_type);
 	t = btf_add_type_mem(btf, sz);
@@ -2811,6 +3228,7 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
 {
 	struct btf_type *t;
 	struct btf_var_secinfo *v;
+	int err;
 	int sz;
 
 	/* last type should be BTF_KIND_DATASEC */
@@ -2824,8 +3242,9 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
 		return libbpf_err(-EINVAL);
 
 	/* decompose and invalidate raw data */
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_var_secinfo);
 	v = btf_add_type_mem(btf, sz);
@@ -2840,35 +3259,26 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
 	t = btf_last_type(btf);
 	btf_type_inc_vlen(t);
 
-	btf->hdr->type_len += sz;
-	btf->hdr->str_off += sz;
+	btf_hdr_update_type_len(btf, btf->hdr.type_len + sz);
 	return 0;
 }
 
-/*
- * Append new BTF_KIND_DECL_TAG type with:
- *   - *value* - non-empty/non-NULL string;
- *   - *ref_type_id* - referenced type ID, it might not exist yet;
- *   - *component_idx* - -1 for tagging reference type, otherwise struct/union
- *     member or function argument index;
- * Returns:
- *   - >0, type ID of newly added BTF type;
- *   - <0, on error.
- */
-int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
-		 int component_idx)
+static int btf_add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
+			    int component_idx, int kflag)
 {
 	struct btf_type *t;
 	int sz, value_off;
+	int err;
 
-	if (!value || !value[0] || component_idx < -1)
+	if (str_is_empty(value) || component_idx < -1)
 		return libbpf_err(-EINVAL);
 
 	if (validate_type_id(ref_type_id))
 		return libbpf_err(-EINVAL);
 
-	if (btf_ensure_modifiable(btf))
-		return libbpf_err(-ENOMEM);
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		return libbpf_err(err);
 
 	sz = sizeof(struct btf_type) + sizeof(struct btf_decl_tag);
 	t = btf_add_type_mem(btf, sz);
@@ -2880,13 +3290,46 @@ int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
 		return value_off;
 
 	t->name_off = value_off;
-	t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, false);
+	t->info = btf_type_info(BTF_KIND_DECL_TAG, 0, kflag);
 	t->type = ref_type_id;
 	btf_decl_tag(t)->component_idx = component_idx;
 
 	return btf_commit_type(btf, sz);
 }
 
+/*
+ * Append new BTF_KIND_DECL_TAG type with:
+ *   - *value* - non-empty/non-NULL string;
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ *   - *component_idx* - -1 for tagging reference type, otherwise struct/union
+ *     member or function argument index;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
+		      int component_idx)
+{
+	return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 0);
+}
+
+/*
+ * Append new BTF_KIND_DECL_TAG type with:
+ *   - *value* - non-empty/non-NULL string;
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ *   - *component_idx* - -1 for tagging reference type, otherwise struct/union
+ *     member or function argument index;
+ * Set info->kflag to 1, indicating this tag is an __attribute__
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id,
+		       int component_idx)
+{
+	return btf_add_decl_tag(btf, value, ref_type_id, component_idx, 1);
+}
+
 struct btf_ext_sec_info_param {
 	__u32 off;
 	__u32 len;
@@ -3015,8 +3458,6 @@ static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native)
 		.desc = "line_info",
 	};
 	struct btf_ext_sec_info_param core_relo = {
-		.off = btf_ext->hdr->core_relo_off,
-		.len = btf_ext->hdr->core_relo_len,
 		.min_rec_size = sizeof(struct bpf_core_relo),
 		.ext_info = &btf_ext->core_relo_info,
 		.desc = "core_relo",
@@ -3034,6 +3475,8 @@ static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native)
 	if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
 		return 0; /* skip core relos parsing */
 
+	core_relo.off = btf_ext->hdr->core_relo_off;
+	core_relo.len = btf_ext->hdr->core_relo_len;
 	err = btf_ext_parse_sec_info(btf_ext, &core_relo, is_native);
 	if (err)
 		return err;
@@ -3459,10 +3902,9 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts)
 		return libbpf_err(-EINVAL);
 	}
 
-	if (btf_ensure_modifiable(btf)) {
-		err = -ENOMEM;
+	err = btf_ensure_modifiable(btf);
+	if (err)
 		goto done;
-	}
 
 	err = btf_dedup_prep(d);
 	if (err) {
@@ -3782,7 +4224,7 @@ static int btf_dedup_strings(struct btf_dedup *d)
 
 	/* replace BTF string data and hash with deduped ones */
 	strset__free(d->btf->strs_set);
-	d->btf->hdr->str_len = strset__data_size(d->strs_set);
+	btf_hdr_update_str_len(d->btf, strset__data_size(d->strs_set));
 	d->btf->strs_set = d->strs_set;
 	d->strs_set = NULL;
 	d->btf->strs_deduped = true;
@@ -3795,6 +4237,20 @@ err_out:
 	return err;
 }
 
+/*
+ * Calculate type signature hash of TYPEDEF, ignoring referenced type IDs,
+ * as referenced type IDs equivalence is established separately during type
+ * graph equivalence check algorithm.
+ */
+static long btf_hash_typedef(struct btf_type *t)
+{
+	long h;
+
+	h = hash_combine(0, t->name_off);
+	h = hash_combine(h, t->info);
+	return h;
+}
+
 static long btf_hash_common(struct btf_type *t)
 {
 	long h;
@@ -3812,6 +4268,13 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
 	       t1->size == t2->size;
 }
 
+/* Check structural compatibility of two TYPEDEF. */
+static bool btf_equal_typedef(struct btf_type *t1, struct btf_type *t2)
+{
+	return t1->name_off == t2->name_off &&
+	       t1->info == t2->info;
+}
+
 /* Calculate type signature hash of INT or TAG. */
 static long btf_hash_int_decl_tag(struct btf_type *t)
 {
@@ -4299,46 +4762,120 @@ static inline __u16 btf_fwd_kind(struct btf_type *t)
 	return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
 }
 
-/* Check if given two types are identical ARRAY definitions */
-static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)
+static bool btf_dedup_identical_types(struct btf_dedup *d, __u32 id1, __u32 id2, int depth)
 {
 	struct btf_type *t1, *t2;
-
+	int k1, k2;
+recur:
 	t1 = btf_type_by_id(d->btf, id1);
 	t2 = btf_type_by_id(d->btf, id2);
-	if (!btf_is_array(t1) || !btf_is_array(t2))
+	if (depth <= 0) {
+		pr_debug("Reached depth limit for identical type comparison for '%s'/'%s'\n",
+			 btf__name_by_offset(d->btf, t1->name_off),
+			 btf__name_by_offset(d->btf, t2->name_off));
 		return false;
+	}
 
-	return btf_equal_array(t1, t2);
-}
+	k1 = btf_kind(t1);
+	k2 = btf_kind(t2);
+	if (k1 != k2)
+		return false;
 
-/* Check if given two types are identical STRUCT/UNION definitions */
-static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2)
-{
-	const struct btf_member *m1, *m2;
-	struct btf_type *t1, *t2;
-	int n, i;
+	switch (k1) {
+	case BTF_KIND_UNKN: /* VOID */
+		return true;
+	case BTF_KIND_INT:
+		return btf_equal_int_tag(t1, t2);
+	case BTF_KIND_ENUM:
+	case BTF_KIND_ENUM64:
+		return btf_compat_enum(t1, t2);
+	case BTF_KIND_FWD:
+	case BTF_KIND_FLOAT:
+		return btf_equal_common(t1, t2);
+	case BTF_KIND_CONST:
+	case BTF_KIND_VOLATILE:
+	case BTF_KIND_RESTRICT:
+	case BTF_KIND_PTR:
+	case BTF_KIND_TYPEDEF:
+	case BTF_KIND_FUNC:
+	case BTF_KIND_TYPE_TAG:
+		if (t1->info != t2->info || t1->name_off != t2->name_off)
+			return false;
+		id1 = t1->type;
+		id2 = t2->type;
+		goto recur;
+	case BTF_KIND_ARRAY: {
+		struct btf_array *a1, *a2;
 
-	t1 = btf_type_by_id(d->btf, id1);
-	t2 = btf_type_by_id(d->btf, id2);
+		if (!btf_compat_array(t1, t2))
+			return false;
 
-	if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2))
-		return false;
+		a1 = btf_array(t1);
+		a2 = btf_array(t1);
 
-	if (!btf_shallow_equal_struct(t1, t2))
-		return false;
+		if (a1->index_type != a2->index_type &&
+		    !btf_dedup_identical_types(d, a1->index_type, a2->index_type, depth - 1))
+			return false;
 
-	m1 = btf_members(t1);
-	m2 = btf_members(t2);
-	for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
-		if (m1->type != m2->type &&
-		    !btf_dedup_identical_arrays(d, m1->type, m2->type) &&
-		    !btf_dedup_identical_structs(d, m1->type, m2->type))
+		if (a1->type != a2->type &&
+		    !btf_dedup_identical_types(d, a1->type, a2->type, depth - 1))
 			return false;
+
+		return true;
+	}
+	case BTF_KIND_STRUCT:
+	case BTF_KIND_UNION: {
+		const struct btf_member *m1, *m2;
+		int i, n;
+
+		if (!btf_shallow_equal_struct(t1, t2))
+			return false;
+
+		m1 = btf_members(t1);
+		m2 = btf_members(t2);
+		for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) {
+			if (m1->type == m2->type)
+				continue;
+			if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1)) {
+				if (t1->name_off) {
+					pr_debug("%s '%s' size=%d vlen=%d id1[%u] id2[%u] shallow-equal but not identical for field#%d '%s'\n",
+						 k1 == BTF_KIND_STRUCT ? "STRUCT" : "UNION",
+						 btf__name_by_offset(d->btf, t1->name_off),
+						 t1->size, btf_vlen(t1), id1, id2, i,
+						 btf__name_by_offset(d->btf, m1->name_off));
+				}
+				return false;
+			}
+		}
+		return true;
+	}
+	case BTF_KIND_FUNC_PROTO: {
+		const struct btf_param *p1, *p2;
+		int i, n;
+
+		if (!btf_compat_fnproto(t1, t2))
+			return false;
+
+		if (t1->type != t2->type &&
+		    !btf_dedup_identical_types(d, t1->type, t2->type, depth - 1))
+			return false;
+
+		p1 = btf_params(t1);
+		p2 = btf_params(t2);
+		for (i = 0, n = btf_vlen(t1); i < n; i++, p1++, p2++) {
+			if (p1->type == p2->type)
+				continue;
+			if (!btf_dedup_identical_types(d, p1->type, p2->type, depth - 1))
+				return false;
+		}
+		return true;
+	}
+	default:
+		return false;
 	}
-	return true;
 }
 
+
 /*
  * Check equivalence of BTF type graph formed by candidate struct/union (we'll
  * call it "candidate graph" in this description for brevity) to a type graph
@@ -4457,19 +4994,13 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 		 * different fields within the *same* struct. This breaks type
 		 * equivalence check, which makes an assumption that candidate
 		 * types sub-graph has a consistent and deduped-by-compiler
-		 * types within a single CU. So work around that by explicitly
-		 * allowing identical array types here.
+		 * types within a single CU. And similar situation can happen
+		 * with struct/union sometimes, and event with pointers.
+		 * So accommodate cases like this doing a structural
+		 * comparison recursively, but avoiding being stuck in endless
+		 * loops by limiting the depth up to which we check.
 		 */
-		if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id))
-			return 1;
-		/* It turns out that similar situation can happen with
-		 * struct/union sometimes, sigh... Handle the case where
-		 * structs/unions are exactly the same, down to the referenced
-		 * type IDs. Anything more complicated (e.g., if referenced
-		 * types are different, but equivalent) is *way more*
-		 * complicated and requires a many-to-many equivalence mapping.
-		 */
-		if (btf_dedup_identical_structs(d, hypot_type_id, cand_id))
+		if (btf_dedup_identical_types(d, hypot_type_id, cand_id, 16))
 			return 1;
 		return 0;
 	}
@@ -4555,8 +5086,16 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 		canon_m = btf_members(canon_type);
 		for (i = 0; i < vlen; i++) {
 			eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type);
-			if (eq <= 0)
+			if (eq <= 0) {
+				if (cand_type->name_off) {
+					pr_debug("%s '%s' size=%d vlen=%d cand_id[%u] canon_id[%u] shallow-equal but not equiv for field#%d '%s': %d\n",
+						 cand_kind == BTF_KIND_STRUCT ? "STRUCT" : "UNION",
+						 btf__name_by_offset(d->btf, cand_type->name_off),
+						 cand_type->size, vlen, cand_id, canon_id, i,
+						 btf__name_by_offset(d->btf, cand_m->name_off), eq);
+				}
 				return eq;
+			}
 			cand_m++;
 			canon_m++;
 		}
@@ -4681,13 +5220,30 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
 	}
 }
 
+static inline long btf_hash_by_kind(struct btf_type *t, __u16 kind)
+{
+	if (kind == BTF_KIND_TYPEDEF)
+		return btf_hash_typedef(t);
+	else
+		return btf_hash_struct(t);
+}
+
+static inline bool btf_equal_by_kind(struct btf_type *t1, struct btf_type *t2, __u16 kind)
+{
+	if (kind == BTF_KIND_TYPEDEF)
+		return btf_equal_typedef(t1, t2);
+	else
+		return btf_shallow_equal_struct(t1, t2);
+}
+
 /*
- * Deduplicate struct/union types.
+ * Deduplicate struct/union and typedef types.
  *
  * For each struct/union type its type signature hash is calculated, taking
  * into account type's name, size, number, order and names of fields, but
  * ignoring type ID's referenced from fields, because they might not be deduped
- * completely until after reference types deduplication phase. This type hash
+ * completely until after reference types deduplication phase. For each typedef
+ * type, the hash is computed based on the type’s name and size. This type hash
  * is used to iterate over all potential canonical types, sharing same hash.
  * For each canonical candidate we check whether type graphs that they form
  * (through referenced types in fields and so on) are equivalent using algorithm
@@ -4719,18 +5275,20 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 	t = btf_type_by_id(d->btf, type_id);
 	kind = btf_kind(t);
 
-	if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
+	if (kind != BTF_KIND_STRUCT &&
+		kind != BTF_KIND_UNION &&
+		kind != BTF_KIND_TYPEDEF)
 		return 0;
 
-	h = btf_hash_struct(t);
+	h = btf_hash_by_kind(t, kind);
 	for_each_dedup_cand(d, hash_entry, h) {
 		__u32 cand_id = hash_entry->value;
 		int eq;
 
 		/*
 		 * Even though btf_dedup_is_equiv() checks for
-		 * btf_shallow_equal_struct() internally when checking two
-		 * structs (unions) for equivalence, we need to guard here
+		 * btf_equal_by_kind() internally when checking two
+		 * structs (unions) or typedefs for equivalence, we need to guard here
 		 * from picking matching FWD type as a dedup candidate.
 		 * This can happen due to hash collision. In such case just
 		 * relying on btf_dedup_is_equiv() would lead to potentially
@@ -4738,7 +5296,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
 		 * FWD and compatible STRUCT/UNION are considered equivalent.
 		 */
 		cand_type = btf_type_by_id(d->btf, cand_id);
-		if (!btf_shallow_equal_struct(t, cand_type))
+		if (!btf_equal_by_kind(t, cand_type, kind))
 			continue;
 
 		btf_dedup_clear_hypot_map(d);
@@ -4776,18 +5334,18 @@ static int btf_dedup_struct_types(struct btf_dedup *d)
 /*
  * Deduplicate reference type.
  *
- * Once all primitive and struct/union types got deduplicated, we can easily
+ * Once all primitive, struct/union and typedef types got deduplicated, we can easily
  * deduplicate all other (reference) BTF types. This is done in two steps:
  *
  * 1. Resolve all referenced type IDs into their canonical type IDs. This
- * resolution can be done either immediately for primitive or struct/union types
- * (because they were deduped in previous two phases) or recursively for
+ * resolution can be done either immediately for primitive, struct/union, and typedef
+ * types (because they were deduped in previous two phases) or recursively for
  * reference types. Recursion will always terminate at either primitive or
- * struct/union type, at which point we can "unwind" chain of reference types
- * one by one. There is no danger of encountering cycles because in C type
- * system the only way to form type cycle is through struct/union, so any chain
- * of reference types, even those taking part in a type cycle, will inevitably
- * reach struct/union at some point.
+ * struct/union and typedef types, at which point we can "unwind" chain of reference
+ * types one by one. There is no danger of encountering cycles in C, as the only way to
+ * form a type cycle is through struct or union types. Go can form such cycles through
+ * typedef. Thus, any chain of reference types, even those taking part in a type cycle,
+ * will inevitably reach a struct/union or typedef type at some point.
  *
  * 2. Once all referenced type IDs are resolved into canonical ones, BTF type
  * becomes "stable", in the sense that no further deduplication will cause
@@ -4819,7 +5377,6 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 	case BTF_KIND_VOLATILE:
 	case BTF_KIND_RESTRICT:
 	case BTF_KIND_PTR:
-	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
 	case BTF_KIND_TYPE_TAG:
 		ref_type_id = btf_dedup_ref_type(d, t->type);
@@ -5100,7 +5657,7 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
 			continue;
 
 		t = btf__type_by_id(d->btf, id);
-		len = btf_type_size(t);
+		len = btf_type_size(d->btf, t);
 		if (len < 0)
 			return len;
 
@@ -5114,14 +5671,17 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
 	/* shrink struct btf's internal types index and update btf_header */
 	d->btf->nr_types = next_type_id - d->btf->start_id;
 	d->btf->type_offs_cap = d->btf->nr_types;
-	d->btf->hdr->type_len = p - d->btf->types_data;
+	d->btf->hdr.type_len = p - d->btf->types_data;
 	new_offs = libbpf_reallocarray(d->btf->type_offs, d->btf->type_offs_cap,
 				       sizeof(*new_offs));
 	if (d->btf->type_offs_cap && !new_offs)
 		return -ENOMEM;
 	d->btf->type_offs = new_offs;
-	d->btf->hdr->str_off = d->btf->hdr->type_len;
-	d->btf->raw_size = d->btf->hdr->hdr_len + d->btf->hdr->type_len + d->btf->hdr->str_len;
+	if (d->btf->layout)
+		d->btf->hdr.layout_off = d->btf->hdr.type_off + d->btf->hdr.type_len;
+	d->btf->hdr.str_off = d->btf->hdr.type_off + d->btf->hdr.type_len + d->btf->hdr.layout_len;
+	d->btf->raw_size = d->btf->hdr.hdr_len + d->btf->hdr.type_off + d->btf->hdr.type_len +
+			   d->btf->hdr.layout_len + d->btf->hdr.str_len;
 	return 0;
 }
 
@@ -5217,7 +5777,10 @@ struct btf *btf__load_vmlinux_btf(void)
 		pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n",
 			sysfs_btf_path);
 	} else {
-		btf = btf__parse(sysfs_btf_path, NULL);
+		btf = btf_parse_raw_mmap(sysfs_btf_path, NULL);
+		if (IS_ERR(btf))
+			btf = btf__parse(sysfs_btf_path, NULL);
+
 		if (!btf) {
 			err = -errno;
 			pr_warn("failed to read kernel BTF from '%s': %s\n",
@@ -5576,7 +6139,7 @@ int btf__distill_base(const struct btf *src_btf, struct btf **new_base_btf,
 		goto done;
 	}
 	dist.split_start_id = btf__type_cnt(old_base);
-	dist.split_start_str = old_base->hdr->str_len;
+	dist.split_start_str = old_base->hdr.str_len;
 
 	/* Pass over src split BTF; generate the list of base BTF type ids it
 	 * references; these will constitute our distilled BTF set to be
@@ -5645,14 +6208,14 @@ done:
 
 const struct btf_header *btf_header(const struct btf *btf)
 {
-	return btf->hdr;
+	return &btf->hdr;
 }
 
 void btf_set_base_btf(struct btf *btf, const struct btf *base_btf)
 {
 	btf->base_btf = (struct btf *)base_btf;
 	btf->start_id = btf__type_cnt(base_btf);
-	btf->start_str_off = base_btf->hdr->str_len;
+	btf->start_str_off = base_btf->hdr.str_len + base_btf->start_str_off;
 }
 
 int btf__relocate(struct btf *btf, const struct btf *base_btf)
@@ -5663,3 +6226,135 @@ int btf__relocate(struct btf *btf, const struct btf *base_btf)
 		btf->owns_base = false;
 	return libbpf_err(err);
 }
+
+struct btf_permute {
+	struct btf *btf;
+	__u32 *id_map;
+	__u32 start_offs;
+};
+
+/* Callback function to remap individual type ID references */
+static int btf_permute_remap_type_id(__u32 *type_id, void *ctx)
+{
+	struct btf_permute *p = ctx;
+	__u32 new_id = *type_id;
+
+	/* refer to the base BTF or VOID type */
+	if (new_id < p->btf->start_id)
+		return 0;
+
+	if (new_id >= btf__type_cnt(p->btf))
+		return -EINVAL;
+
+	*type_id = p->id_map[new_id - p->btf->start_id + p->start_offs];
+	return 0;
+}
+
+int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt,
+		 const struct btf_permute_opts *opts)
+{
+	struct btf_permute p;
+	struct btf_ext *btf_ext;
+	void *nt, *new_types = NULL;
+	__u32 *order_map = NULL;
+	int err = 0, i;
+	__u32 n, id, start_offs = 0;
+
+	if (!OPTS_VALID(opts, btf_permute_opts))
+		return libbpf_err(-EINVAL);
+
+	if (btf__base_btf(btf)) {
+		n = btf->nr_types;
+	} else {
+		if (id_map[0] != 0)
+			return libbpf_err(-EINVAL);
+		n = btf__type_cnt(btf);
+		start_offs = 1;
+	}
+
+	if (id_map_cnt != n)
+		return libbpf_err(-EINVAL);
+
+	/* record the sequence of types */
+	order_map = calloc(id_map_cnt, sizeof(*id_map));
+	if (!order_map) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	new_types = calloc(btf->hdr.type_len, 1);
+	if (!new_types) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	err = btf_ensure_modifiable(btf);
+	if (err)
+		goto done;
+
+	for (i = start_offs; i < id_map_cnt; i++) {
+		id = id_map[i];
+		if (id < btf->start_id || id >= btf__type_cnt(btf)) {
+			err = -EINVAL;
+			goto done;
+		}
+		id -= btf->start_id - start_offs;
+		/* cannot be mapped to the same ID */
+		if (order_map[id]) {
+			err = -EINVAL;
+			goto done;
+		}
+		order_map[id] = i + btf->start_id - start_offs;
+	}
+
+	p.btf = btf;
+	p.id_map = id_map;
+	p.start_offs = start_offs;
+	nt = new_types;
+	for (i = start_offs; i < id_map_cnt; i++) {
+		struct btf_field_iter it;
+		const struct btf_type *t;
+		__u32 *type_id;
+		int type_size;
+
+		id = order_map[i];
+		t = btf__type_by_id(btf, id);
+		type_size = btf_type_size(btf, t);
+		memcpy(nt, t, type_size);
+
+		/* fix up referenced IDs for BTF */
+		err = btf_field_iter_init(&it, nt, BTF_FIELD_ITER_IDS);
+		if (err)
+			goto done;
+		while ((type_id = btf_field_iter_next(&it))) {
+			err = btf_permute_remap_type_id(type_id, &p);
+			if (err)
+				goto done;
+		}
+
+		nt += type_size;
+	}
+
+	/* fix up referenced IDs for btf_ext */
+	btf_ext = OPTS_GET(opts, btf_ext, NULL);
+	if (btf_ext) {
+		err = btf_ext_visit_type_ids(btf_ext, btf_permute_remap_type_id, &p);
+		if (err)
+			goto done;
+	}
+
+	for (nt = new_types, i = 0; i < id_map_cnt - start_offs; i++) {
+		btf->type_offs[i] = nt - new_types;
+		nt += btf_type_size(btf, nt);
+	}
+
+	free(order_map);
+	free(btf->types_data);
+	btf->types_data = new_types;
+	return 0;
+
+done:
+	free(order_map);
+	free(new_types);
+	return libbpf_err(err);
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 47ee8f6ac489..a1f8deca2603 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -94,6 +94,7 @@ LIBBPF_API struct btf *btf__new_empty(void);
  * @brief **btf__new_empty_split()** creates an unpopulated BTF object from an
  * ELF BTF section except with a base BTF on top of which split BTF should be
  * based
+ * @param base_btf base BTF object
  * @return new BTF object instance which has to be eventually freed with
  * **btf__free()**
  *
@@ -108,6 +109,26 @@ LIBBPF_API struct btf *btf__new_empty(void);
  */
 LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
 
+struct btf_new_opts {
+	size_t sz;
+	struct btf *base_btf;	/* optional base BTF */
+	bool add_layout;	/* add BTF layout information */
+	size_t:0;
+};
+#define btf_new_opts__last_field add_layout
+
+/**
+ * @brief **btf__new_empty_opts()** creates an unpopulated BTF object with
+ * optional *base_btf* and BTF kind layout description if *add_layout*
+ * is set
+ * @return new BTF object instance which has to be eventually freed with
+ * **btf__free()**
+ *
+ * On error, NULL is returned and the thread-local `errno` variable is
+ * set to the error code.
+ */
+LIBBPF_API struct btf *btf__new_empty_opts(struct btf_new_opts *opts);
+
 /**
  * @brief **btf__distill_base()** creates new versions of the split BTF
  * *src_btf* and its base BTF. The new base BTF will only contain the types
@@ -115,6 +136,10 @@ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
  * When that split BTF is loaded against a (possibly changed) base, this
  * distilled base BTF will help update references to that (possibly changed)
  * base BTF.
+ * @param src_btf source split BTF object
+ * @param new_base_btf pointer to where the new base BTF object pointer will be stored
+ * @param new_split_btf pointer to where the new split BTF object pointer will be stored
+ * @return 0 on success; negative error code, otherwise
  *
  * Both the new split and its associated new base BTF must be freed by
  * the caller.
@@ -227,6 +252,7 @@ LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id);
 LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id);
 LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id);
 LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id);
+LIBBPF_API int btf__add_type_attr(struct btf *btf, const char *value, int ref_type_id);
 
 /* func and func_proto construction APIs */
 LIBBPF_API int btf__add_func(struct btf *btf, const char *name,
@@ -243,6 +269,8 @@ LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id,
 /* tag construction API */
 LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id,
 			    int component_idx);
+LIBBPF_API int btf__add_decl_attr(struct btf *btf, const char *value, int ref_type_id,
+				  int component_idx);
 
 struct btf_dedup_opts {
 	size_t sz;
@@ -261,6 +289,9 @@ LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
  * to base BTF kinds, and verify those references are compatible with
  * *base_btf*; if they are, *btf* is adjusted such that is re-parented to
  * *base_btf* and type ids and strings are adjusted to accommodate this.
+ * @param btf split BTF object to relocate
+ * @param base_btf base BTF object
+ * @return 0 on success; negative error code, otherwise
  *
  * If successful, 0 is returned and **btf** now has **base_btf** as its
  * base.
@@ -270,6 +301,48 @@ LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
  */
 LIBBPF_API int btf__relocate(struct btf *btf, const struct btf *base_btf);
 
+struct btf_permute_opts {
+	size_t sz;
+	/* optional .BTF.ext info along the main BTF info */
+	struct btf_ext *btf_ext;
+	size_t :0;
+};
+#define btf_permute_opts__last_field btf_ext
+
+/**
+ * @brief **btf__permute()** rearranges BTF types in-place according to a specified ID mapping
+ * @param btf BTF object to permute
+ * @param id_map Array mapping original type IDs to new IDs
+ * @param id_map_cnt Number of elements in @id_map
+ * @param opts Optional parameters, including BTF extension data for reference updates
+ * @return 0 on success, negative error code on failure
+ *
+ * **btf__permute()** reorders BTF types based on the provided @id_map array,
+ * updating all internal type references to maintain consistency. The function
+ * operates in-place, modifying the BTF object directly.
+ *
+ * For **base BTF**:
+ * - @id_map must include all types from ID 0 to `btf__type_cnt(btf) - 1`
+ * - @id_map_cnt must be `btf__type_cnt(btf)`
+ * - Mapping is defined as `id_map[original_id] = new_id`
+ * - `id_map[0]` must be 0 (void type cannot be moved)
+ *
+ * For **split BTF**:
+ * - @id_map must include only split types (types added on top of the base BTF)
+ * - @id_map_cnt must be `btf__type_cnt(btf) - btf__type_cnt(btf__base_btf(btf))`
+ * - Mapping is defined as `id_map[original_id - start_id] = new_id`
+ * - `start_id` equals `btf__type_cnt(btf__base_btf(btf))`
+ *
+ * After permutation, all type references within the BTF data and optional
+ * BTF extension (if provided via @opts) are updated automatically.
+ *
+ * On error, returns a negative error code and sets errno:
+ *   - `-EINVAL`: Invalid parameters or invalid ID mapping
+ *   - `-ENOMEM`: Memory allocation failure
+ */
+LIBBPF_API int btf__permute(struct btf *btf, __u32 *id_map, __u32 id_map_cnt,
+			    const struct btf_permute_opts *opts);
+
 struct btf_dump;
 
 struct btf_dump_opts {
@@ -323,9 +396,10 @@ struct btf_dump_type_data_opts {
 	bool compact;		/* no newlines/indentation */
 	bool skip_names;	/* skip member/type names */
 	bool emit_zeroes;	/* show 0-valued fields */
+	bool emit_strings;	/* print char arrays as strings */
 	size_t :0;
 };
-#define btf_dump_type_data_opts__last_field emit_zeroes
+#define btf_dump_type_data_opts__last_field emit_strings
 
 LIBBPF_API int
 btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index a3fc6908f6c9..53c6624161d7 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -21,7 +21,6 @@
 #include "hashmap.h"
 #include "libbpf.h"
 #include "libbpf_internal.h"
-#include "str_error.h"
 
 static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
 static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
@@ -68,6 +67,7 @@ struct btf_dump_data {
 	bool compact;
 	bool skip_names;
 	bool emit_zeroes;
+	bool emit_strings;
 	__u8 indent_lvl;	/* base indent level */
 	char indent_str[BTF_DATA_INDENT_STR_LEN];
 	/* below are used during iteration */
@@ -226,6 +226,9 @@ static void btf_dump_free_names(struct hashmap *map)
 	size_t bkt;
 	struct hashmap_entry *cur;
 
+	if (!map)
+		return;
+
 	hashmap__for_each_entry(map, cur, bkt)
 		free((void *)cur->pkey);
 
@@ -1494,7 +1497,10 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
 		case BTF_KIND_TYPE_TAG:
 			btf_dump_emit_mods(d, decls);
 			name = btf_name_of(d, t->name_off);
-			btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name);
+			if (btf_kflag(t))
+				btf_dump_printf(d, " __attribute__((%s))", name);
+			else
+				btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name);
 			break;
 		case BTF_KIND_ARRAY: {
 			const struct btf_array *a = btf_array(t);
@@ -1756,9 +1762,18 @@ static int btf_dump_get_bitfield_value(struct btf_dump *d,
 	__u16 left_shift_bits, right_shift_bits;
 	const __u8 *bytes = data;
 	__u8 nr_copy_bits;
+	__u8 start_bit, nr_bytes;
 	__u64 num = 0;
 	int i;
 
+	/* Calculate how many bytes cover the bitfield */
+	start_bit = bits_offset % 8;
+	nr_bytes = (start_bit + bit_sz + 7) / 8;
+
+	/* Bound check */
+	if (data + nr_bytes > d->typed_dump->data_end)
+		return -E2BIG;
+
 	/* Maximum supported bitfield size is 64 bits */
 	if (t->size > 8) {
 		pr_warn("unexpected bitfield size %d\n", t->size);
@@ -2025,6 +2040,52 @@ static int btf_dump_var_data(struct btf_dump *d,
 	return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
 }
 
+static int btf_dump_string_data(struct btf_dump *d,
+				const struct btf_type *t,
+				__u32 id,
+				const void *data)
+{
+	const struct btf_array *array = btf_array(t);
+	const char *chars = data;
+	__u32 i;
+
+	/* Make sure it is a NUL-terminated string. */
+	for (i = 0; i < array->nelems; i++) {
+		if ((void *)(chars + i) >= d->typed_dump->data_end)
+			return -E2BIG;
+		if (chars[i] == '\0')
+			break;
+	}
+	if (i == array->nelems) {
+		/* The caller will print this as a regular array. */
+		return -EINVAL;
+	}
+
+	btf_dump_data_pfx(d);
+	btf_dump_printf(d, "\"");
+
+	for (i = 0; i < array->nelems; i++) {
+		char c = chars[i];
+
+		if (c == '\0') {
+			/*
+			 * When printing character arrays as strings, NUL bytes
+			 * are always treated as string terminators; they are
+			 * never printed.
+			 */
+			break;
+		}
+		if (isprint(c))
+			btf_dump_printf(d, "%c", c);
+		else
+			btf_dump_printf(d, "\\x%02x", (__u8)c);
+	}
+
+	btf_dump_printf(d, "\"");
+
+	return 0;
+}
+
 static int btf_dump_array_data(struct btf_dump *d,
 			       const struct btf_type *t,
 			       __u32 id,
@@ -2052,8 +2113,13 @@ static int btf_dump_array_data(struct btf_dump *d,
 		 * char arrays, so if size is 1 and element is
 		 * printable as a char, we'll do that.
 		 */
-		if (elem_size == 1)
+		if (elem_size == 1) {
+			if (d->typed_dump->emit_strings &&
+			    btf_dump_string_data(d, t, id, data) == 0) {
+				return 0;
+			}
 			d->typed_dump->is_array_char = true;
+		}
 	}
 
 	/* note that we increment depth before calling btf_dump_print() below;
@@ -2541,6 +2607,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
 	d->typed_dump->compact = OPTS_GET(opts, compact, false);
 	d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
 	d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false);
+	d->typed_dump->emit_strings = OPTS_GET(opts, emit_strings, false);
 
 	ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0);
 
diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
index 823f83ad819c..295dbda24580 100644
--- a/tools/lib/bpf/elf.c
+++ b/tools/lib/bpf/elf.c
@@ -9,7 +9,6 @@
 #include <linux/kernel.h>
 
 #include "libbpf_internal.h"
-#include "str_error.h"
 
 /* A SHT_GNU_versym section holds 16-bit words. This bit is set if
  * the symbol is hidden and can only be seen when referenced using an
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index 760657f5224c..4f19a0d79b0c 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -6,7 +6,6 @@
 #include "libbpf.h"
 #include "libbpf_common.h"
 #include "libbpf_internal.h"
-#include "str_error.h"
 
 static inline __u64 ptr_to_u64(const void *ptr)
 {
@@ -507,6 +506,115 @@ static int probe_kern_arg_ctx_tag(int token_fd)
 	return probe_fd(prog_fd);
 }
 
+static int probe_ldimm64_full_range_off(int token_fd)
+{
+	char log_buf[1024];
+	int prog_fd, map_fd;
+	int ret;
+	LIBBPF_OPTS(bpf_map_create_opts, map_opts,
+		.token_fd = token_fd,
+		.map_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+	);
+	LIBBPF_OPTS(bpf_prog_load_opts, prog_opts,
+		.token_fd = token_fd,
+		.prog_flags = token_fd ? BPF_F_TOKEN_FD : 0,
+		.log_buf = log_buf,
+		.log_size = sizeof(log_buf),
+	);
+	struct bpf_insn insns[] = {
+		BPF_LD_MAP_VALUE(BPF_REG_1, 0, 1UL << 30),
+		BPF_EXIT_INSN(),
+	};
+	int insn_cnt = ARRAY_SIZE(insns);
+
+	map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr", sizeof(int), 1, 1, &map_opts);
+	if (map_fd < 0) {
+		ret = -errno;
+		pr_warn("Error in %s(): %s. Couldn't create simple array map.\n",
+			__func__, errstr(ret));
+		return ret;
+	}
+	insns[0].imm = map_fd;
+
+	log_buf[0] = '\0';
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "global_reloc", "GPL", insns, insn_cnt, &prog_opts);
+	ret = -errno;
+
+	close(map_fd);
+
+	if (prog_fd >= 0) {
+		pr_warn("Error in %s(): Program loading unexpectedly succeeded.\n", __func__);
+		close(prog_fd);
+		return -EINVAL;
+	}
+
+	/*
+	 * Feature is allowed if we're not failing with the error message
+	 * "direct value offset of %u is not allowed" removed in
+	 * 12a1fe6e12db ("bpf/verifier: Do not limit maximum direct offset into arena map").
+	 * We should instead fail with "invalid access to map value pointer".
+	 * Ensure we match with one of the two and we're not failing with a
+	 * different, unexpected message.
+	 */
+	if (strstr(log_buf, "direct value offset of"))
+		return 0;
+
+	if (!strstr(log_buf, "invalid access to map value pointer")) {
+		pr_warn("Error in %s(): Program unexpectedly failed with message: %s.\n",
+			__func__, log_buf);
+		return ret;
+	}
+
+	return 1;
+}
+
+#ifdef __x86_64__
+
+#ifndef __NR_uprobe
+#define __NR_uprobe 336
+#endif
+
+static int probe_uprobe_syscall(int token_fd)
+{
+	/*
+	 * If kernel supports uprobe() syscall, it will return -ENXIO when called
+	 * from the outside of a kernel-generated uprobe trampoline.
+	 */
+	return syscall(__NR_uprobe) < 0 && errno == ENXIO;
+}
+#else
+static int probe_uprobe_syscall(int token_fd)
+{
+	return 0;
+}
+#endif
+
+static int probe_kern_btf_layout(int token_fd)
+{
+	static const char strs[] = "\0int";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
+	};
+	struct btf_layout layout[] = {
+		{ 0,			0,	0 },
+		{ sizeof(__u32),	0,	0 },
+	};
+	struct btf_header hdr = {
+		.magic = BTF_MAGIC,
+		.version = BTF_VERSION,
+		.hdr_len = sizeof(struct btf_header),
+		.type_len = sizeof(types),
+		.str_off = sizeof(types) + sizeof(layout),
+		.str_len = sizeof(strs),
+		.layout_off = sizeof(types),
+		.layout_len = sizeof(layout),
+	};
+
+	return probe_fd(libbpf__load_raw_btf_hdr(&hdr, (char *)types, strs,
+						 (char *)layout, token_fd));
+}
+
 typedef int (*feature_probe_fn)(int /* token_fd */);
 
 static struct kern_feature_cache feature_cache;
@@ -582,6 +690,15 @@ static struct kern_feature_desc {
 	[FEAT_BTF_QMARK_DATASEC] = {
 		"BTF DATASEC names starting from '?'", probe_kern_btf_qmark_datasec,
 	},
+	[FEAT_LDIMM64_FULL_RANGE_OFF] = {
+		"full range LDIMM64 support", probe_ldimm64_full_range_off,
+	},
+	[FEAT_UPROBE_SYSCALL] = {
+		"kernel supports uprobe syscall", probe_uprobe_syscall,
+	},
+	[FEAT_BTF_LAYOUT] = {
+		"kernel supports BTF layout", probe_kern_btf_layout,
+	},
 };
 
 bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id)
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index 113ae4abd345..9478b8f78f26 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -4,6 +4,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
+#include <asm/byteorder.h>
 #include <linux/filter.h>
 #include <sys/param.h>
 #include "btf.h"
@@ -13,8 +14,6 @@
 #include "hashmap.h"
 #include "bpf_gen_internal.h"
 #include "skel_internal.h"
-#include <asm/byteorder.h>
-#include "str_error.h"
 
 #define MAX_USED_MAPS	64
 #define MAX_USED_PROGS	32
@@ -110,6 +109,7 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in
 
 static int add_data(struct bpf_gen *gen, const void *data, __u32 size);
 static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off);
+static void emit_signature_match(struct bpf_gen *gen);
 
 void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps)
 {
@@ -152,6 +152,8 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps
 	/* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
 	emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
 	emit(gen, BPF_EXIT_INSN());
+	if (OPTS_GET(gen->opts, gen_hash, false))
+		emit_signature_match(gen);
 }
 
 static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
@@ -368,6 +370,8 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off)
 	__emit_sys_close(gen);
 }
 
+static void compute_sha_update_offsets(struct bpf_gen *gen);
+
 int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
 {
 	int i;
@@ -394,6 +398,9 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
 			      blob_fd_array_off(gen, i));
 	emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0));
 	emit(gen, BPF_EXIT_INSN());
+	if (OPTS_GET(gen->opts, gen_hash, false))
+		compute_sha_update_offsets(gen);
+
 	pr_debug("gen: finish %s\n", errstr(gen->error));
 	if (!gen->error) {
 		struct gen_loader_opts *opts = gen->opts;
@@ -446,6 +453,22 @@ void bpf_gen__free(struct bpf_gen *gen)
 	_val;							\
 })
 
+static void compute_sha_update_offsets(struct bpf_gen *gen)
+{
+	__u64 sha[SHA256_DWORD_SIZE];
+	__u64 sha_dw;
+	int i;
+
+	libbpf_sha256(gen->data_start, gen->data_cur - gen->data_start, (__u8 *)sha);
+	for (i = 0; i < SHA256_DWORD_SIZE; i++) {
+		struct bpf_insn *insn =
+			(struct bpf_insn *)(gen->insn_start + gen->hash_insn_offset[i]);
+		sha_dw = tgt_endian(sha[i]);
+		insn[0].imm = (__u32)sha_dw;
+		insn[1].imm = sha_dw >> 32;
+	}
+}
+
 void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data,
 		       __u32 btf_raw_size)
 {
@@ -557,6 +580,28 @@ void bpf_gen__map_create(struct bpf_gen *gen,
 		emit_sys_close_stack(gen, stack_off(inner_map_fd));
 }
 
+static void emit_signature_match(struct bpf_gen *gen)
+{
+	__s64 off;
+	int i;
+
+	for (i = 0; i < SHA256_DWORD_SIZE; i++) {
+		emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX,
+						 0, 0, 0, 0));
+		emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, i * sizeof(__u64)));
+		gen->hash_insn_offset[i] = gen->insn_cur - gen->insn_start;
+		emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_3, 0, 0, 0, 0, 0));
+
+		off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 2;
+		if (is_simm16(off)) {
+			emit(gen, BPF_MOV64_IMM(BPF_REG_7, -EINVAL));
+			emit(gen, BPF_JMP_REG(BPF_JNE, BPF_REG_2, BPF_REG_3, off));
+		} else {
+			gen->error = -ERANGE;
+		}
+	}
+}
+
 void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name,
 				   enum bpf_attach_type type)
 {
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 194809da5172..3a80a018fc7d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -50,7 +50,6 @@
 #include "libbpf.h"
 #include "bpf.h"
 #include "btf.h"
-#include "str_error.h"
 #include "libbpf_internal.h"
 #include "hashmap.h"
 #include "bpf_gen_internal.h"
@@ -60,6 +59,8 @@
 #define BPF_FS_MAGIC		0xcafe4a11
 #endif
 
+#define MAX_EVENT_NAME_LEN	64
+
 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
 
 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
@@ -114,6 +115,7 @@ static const char * const attach_type_name[] = {
 	[BPF_TRACE_FENTRY]		= "trace_fentry",
 	[BPF_TRACE_FEXIT]		= "trace_fexit",
 	[BPF_MODIFY_RETURN]		= "modify_return",
+	[BPF_TRACE_FSESSION]		= "trace_fsession",
 	[BPF_LSM_MAC]			= "lsm_mac",
 	[BPF_LSM_CGROUP]		= "lsm_cgroup",
 	[BPF_SK_LOOKUP]			= "sk_lookup",
@@ -189,6 +191,7 @@ static const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_USER_RINGBUF]             = "user_ringbuf",
 	[BPF_MAP_TYPE_CGRP_STORAGE]		= "cgrp_storage",
 	[BPF_MAP_TYPE_ARENA]			= "arena",
+	[BPF_MAP_TYPE_INSN_ARRAY]		= "insn_array",
 };
 
 static const char * const prog_type_name[] = {
@@ -284,7 +287,7 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...)
 	old_errno = errno;
 
 	va_start(args, format);
-	__libbpf_pr(level, format, args);
+	print_fn(level, format, args);
 	va_end(args);
 
 	errno = old_errno;
@@ -316,8 +319,6 @@ static void pr_perm_msg(int err)
 		buf);
 }
 
-#define STRERR_BUFSIZE  128
-
 /* Copied from tools/perf/util/util.h */
 #ifndef zfree
 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
@@ -370,6 +371,7 @@ enum reloc_type {
 	RELO_EXTERN_CALL,
 	RELO_SUBPROG_ADDR,
 	RELO_CORE,
+	RELO_INSN_ARRAY,
 };
 
 struct reloc_desc {
@@ -379,8 +381,17 @@ struct reloc_desc {
 		const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
 		struct {
 			int map_idx;
-			int sym_off;
-			int ext_idx;
+			unsigned int sym_off;
+			/*
+			 * The following two fields can be unionized, as the
+			 * ext_idx field is used for extern symbols, and the
+			 * sym_size is used for jump tables, which are never
+			 * extern
+			 */
+			union {
+				int ext_idx;
+				int sym_size;
+			};
 		};
 	};
 };
@@ -422,6 +433,11 @@ struct bpf_sec_def {
 	libbpf_prog_attach_fn_t prog_attach_fn;
 };
 
+struct bpf_light_subprog {
+	__u32 sec_insn_off;
+	__u32 sub_insn_off;
+};
+
 /*
  * bpf_prog should be a better name but it has been used in
  * linux/filter.h.
@@ -494,6 +510,10 @@ struct bpf_program {
 	__u32 line_info_rec_size;
 	__u32 line_info_cnt;
 	__u32 prog_flags;
+	__u8  hash[SHA256_DIGEST_LENGTH];
+
+	struct bpf_light_subprog *subprogs;
+	__u32 subprog_cnt;
 };
 
 struct bpf_struct_ops {
@@ -573,6 +593,7 @@ struct bpf_map {
 	bool autocreate;
 	bool autoattach;
 	__u64 map_extra;
+	struct bpf_program *excl_prog;
 };
 
 enum extern_type {
@@ -595,7 +616,7 @@ struct extern_desc {
 	int sym_idx;
 	int btf_id;
 	int sec_btf_id;
-	const char *name;
+	char *name;
 	char *essent_name;
 	bool is_set;
 	bool is_weak;
@@ -666,15 +687,23 @@ struct elf_state {
 	int symbols_shndx;
 	bool has_st_ops;
 	int arena_data_shndx;
+	int jumptables_data_shndx;
 };
 
 struct usdt_manager;
 
+enum bpf_object_state {
+	OBJ_OPEN,
+	OBJ_PREPARED,
+	OBJ_LOADED,
+};
+
 struct bpf_object {
 	char name[BPF_OBJ_NAME_LEN];
 	char license[64];
 	__u32 kern_version;
 
+	enum bpf_object_state state;
 	struct bpf_program *programs;
 	size_t nr_programs;
 	struct bpf_map *maps;
@@ -686,7 +715,6 @@ struct bpf_object {
 	int nr_extern;
 	int kconfig_map_idx;
 
-	bool loaded;
 	bool has_subcalls;
 	bool has_rodata;
 
@@ -727,9 +755,20 @@ struct bpf_object {
 
 	struct usdt_manager *usdt_man;
 
-	struct bpf_map *arena_map;
+	int arena_map_idx;
 	void *arena_data;
 	size_t arena_data_sz;
+	size_t arena_data_off;
+
+	void *jumptables_data;
+	size_t jumptables_data_sz;
+
+	struct {
+		struct bpf_program *prog;
+		unsigned int sym_off;
+		int fd;
+	} *jumptable_maps;
+	size_t jumptable_map_cnt;
 
 	struct kern_feature_cache *feat_cache;
 	char *token_path;
@@ -757,6 +796,7 @@ void bpf_program__unload(struct bpf_program *prog)
 
 	zfree(&prog->func_info);
 	zfree(&prog->line_info);
+	zfree(&prog->subprogs);
 }
 
 static void bpf_program__exit(struct bpf_program *prog)
@@ -890,7 +930,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
 			return -LIBBPF_ERRNO__FORMAT;
 		}
 
-		if (sec_off + prog_sz > sec_sz) {
+		if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) {
 			pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
 				sec_name, sec_off);
 			return -LIBBPF_ERRNO__FORMAT;
@@ -1005,35 +1045,33 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
 	const struct btf_member *kern_data_member;
 	struct btf *btf = NULL;
 	__s32 kern_vtype_id, kern_type_id;
-	char tname[256];
+	char tname[192], stname[256];
 	__u32 i;
 
 	snprintf(tname, sizeof(tname), "%.*s",
 		 (int)bpf_core_essential_name_len(tname_raw), tname_raw);
 
-	kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
-					&btf, mod_btf);
-	if (kern_type_id < 0) {
-		pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
-			tname);
-		return kern_type_id;
-	}
-	kern_type = btf__type_by_id(btf, kern_type_id);
+	snprintf(stname, sizeof(stname), "%s%s", STRUCT_OPS_VALUE_PREFIX, tname);
 
-	/* Find the corresponding "map_value" type that will be used
-	 * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
-	 * find "struct bpf_struct_ops_tcp_congestion_ops" from the
-	 * btf_vmlinux.
+	/* Look for the corresponding "map_value" type that will be used
+	 * in map_update(BPF_MAP_TYPE_STRUCT_OPS) first, figure out the btf
+	 * and the mod_btf.
+	 * For example, find "struct bpf_struct_ops_tcp_congestion_ops".
 	 */
-	kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
-						tname, BTF_KIND_STRUCT);
+	kern_vtype_id = find_ksym_btf_id(obj, stname, BTF_KIND_STRUCT, &btf, mod_btf);
 	if (kern_vtype_id < 0) {
-		pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
-			STRUCT_OPS_VALUE_PREFIX, tname);
+		pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", stname);
 		return kern_vtype_id;
 	}
 	kern_vtype = btf__type_by_id(btf, kern_vtype_id);
 
+	kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+	if (kern_type_id < 0) {
+		pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname);
+		return kern_type_id;
+	}
+	kern_type = btf__type_by_id(btf, kern_type_id);
+
 	/* Find "struct tcp_congestion_ops" from
 	 * struct bpf_struct_ops_tcp_congestion_ops {
 	 *	[ ... ]
@@ -1046,8 +1084,8 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
 			break;
 	}
 	if (i == btf_vlen(kern_vtype)) {
-		pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
-			tname, STRUCT_OPS_VALUE_PREFIX, tname);
+		pr_warn("struct_ops init_kern: struct %s data is not found in struct %s\n",
+			tname, stname);
 		return -EINVAL;
 	}
 
@@ -1509,9 +1547,10 @@ static struct bpf_object *bpf_object__new(const char *path,
 	obj->efile.obj_buf_sz = obj_buf_sz;
 	obj->efile.btf_maps_shndx = -1;
 	obj->kconfig_map_idx = -1;
+	obj->arena_map_idx = -1;
 
 	obj->kern_version = get_kernel_version();
-	obj->loaded = false;
+	obj->state  = OBJ_OPEN;
 
 	return obj;
 }
@@ -1719,15 +1758,6 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam
 	return ERR_PTR(-ENOENT);
 }
 
-/* Some versions of Android don't provide memfd_create() in their libc
- * implementation, so avoid complications and just go straight to Linux
- * syscall.
- */
-static int sys_memfd_create(const char *name, unsigned flags)
-{
-	return syscall(__NR_memfd_create, name, flags);
-}
-
 #ifndef MFD_CLOEXEC
 #define MFD_CLOEXEC 0x0001U
 #endif
@@ -2106,7 +2136,7 @@ static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
 	}
 
 	len = strlen(value);
-	if (value[len - 1] != '"') {
+	if (len < 2 || value[len - 1] != '"') {
 		pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
 			ext->name, value);
 		return -EINVAL;
@@ -2875,7 +2905,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 	var_extra = btf_var(var);
 	map_name = btf__name_by_offset(obj->btf, var->name_off);
 
-	if (map_name == NULL || map_name[0] == '\0') {
+	if (str_is_empty(map_name)) {
 		pr_warn("map #%d: empty name.\n", var_idx);
 		return -EINVAL;
 	}
@@ -2963,10 +2993,11 @@ static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
 			       void *data, size_t data_sz)
 {
 	const long page_sz = sysconf(_SC_PAGE_SIZE);
+	const size_t data_alloc_sz = roundup(data_sz, page_sz);
 	size_t mmap_sz;
 
-	mmap_sz = bpf_map_mmap_sz(obj->arena_map);
-	if (roundup(data_sz, page_sz) > mmap_sz) {
+	mmap_sz = bpf_map_mmap_sz(map);
+	if (data_alloc_sz > mmap_sz) {
 		pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",
 			sec_name, mmap_sz, data_sz);
 		return -E2BIG;
@@ -2999,7 +3030,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
 
 	scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
 	data = elf_sec_data(obj, scn);
-	if (!scn || !data) {
+	if (!data) {
 		pr_warn("elf: failed to get %s map definitions for %s\n",
 			MAPS_ELF_SEC, obj->path);
 		return -EINVAL;
@@ -3039,12 +3070,12 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
 		if (map->def.type != BPF_MAP_TYPE_ARENA)
 			continue;
 
-		if (obj->arena_map) {
+		if (obj->arena_map_idx >= 0) {
 			pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n",
-				map->name, obj->arena_map->name);
+				map->name, obj->maps[obj->arena_map_idx].name);
 			return -EINVAL;
 		}
-		obj->arena_map = map;
+		obj->arena_map_idx = i;
 
 		if (obj->efile.arena_data) {
 			err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx,
@@ -3054,7 +3085,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
 				return err;
 		}
 	}
-	if (obj->efile.arena_data && !obj->arena_map) {
+	if (obj->efile.arena_data && obj->arena_map_idx < 0) {
 		pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n",
 			ARENA_SEC);
 		return -ENOENT;
@@ -3107,12 +3138,14 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
 	bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
 	bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
 	bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
+	bool has_layout = kernel_supports(obj, FEAT_BTF_LAYOUT);
 
 	return !has_func || !has_datasec || !has_func_global || !has_float ||
-	       !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec;
+	       !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec ||
+	       !has_layout;
 }
 
-static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
+struct btf *bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *orig_btf)
 {
 	bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
 	bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
@@ -3122,9 +3155,64 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 	bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
 	bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
 	bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
+	bool has_layout = kernel_supports(obj, FEAT_BTF_LAYOUT);
 	int enum64_placeholder_id = 0;
+	const struct btf_header *hdr;
+	struct btf *btf = NULL;
+	const void *raw_data;
 	struct btf_type *t;
 	int i, j, vlen;
+	__u32 sz;
+	int err;
+
+	/* clone BTF to sanitize a copy and leave the original intact */
+	raw_data = btf__raw_data(orig_btf, &sz);
+	if (!raw_data)
+		return ERR_PTR(-ENOMEM);
+	/* btf_header() gives us endian-safe header info */
+	hdr = btf_header(orig_btf);
+
+	if (!has_layout && hdr->hdr_len >= sizeof(struct btf_header) &&
+	    (hdr->layout_len != 0 || hdr->layout_off != 0)) {
+		const struct btf_header *old_hdr = raw_data;
+		struct btf_header *new_hdr;
+		void *new_raw_data;
+		__u32 new_str_off;
+
+		/*
+		 * Need to rewrite BTF to exclude layout information and
+		 * move string section to immediately after types.
+		 */
+		new_raw_data = malloc(sz);
+		if (!new_raw_data)
+			return ERR_PTR(-ENOMEM);
+
+		memcpy(new_raw_data, raw_data, sz);
+		new_hdr = new_raw_data;
+		new_hdr->layout_off = 0;
+		new_hdr->layout_len = 0;
+		new_str_off = hdr->type_off + hdr->type_len;
+		/* Handle swapped endian case */
+		if (old_hdr->magic != hdr->magic)
+			new_hdr->str_off = bswap_32(new_str_off);
+		else
+			new_hdr->str_off = new_str_off;
+
+		memmove(new_raw_data + hdr->hdr_len + new_str_off,
+			new_raw_data + hdr->hdr_len + hdr->str_off,
+			hdr->str_len);
+		sz = hdr->hdr_len + hdr->type_off + hdr->type_len + hdr->str_len;
+		btf = btf__new(new_raw_data, sz);
+		free(new_raw_data);
+	} else {
+		btf = btf__new(raw_data, sz);
+	}
+	err = libbpf_get_error(btf);
+	if (err)
+		return ERR_PTR(err);
+
+	/* enforce 8-byte pointers for BPF-targeted BTFs */
+	btf__set_pointer_size(btf, 8);
 
 	for (i = 1; i < btf__type_cnt(btf); i++) {
 		t = (struct btf_type *)btf__type_by_id(btf, i);
@@ -3202,9 +3290,10 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 
 			if (enum64_placeholder_id == 0) {
 				enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
-				if (enum64_placeholder_id < 0)
-					return enum64_placeholder_id;
-
+				if (enum64_placeholder_id < 0) {
+					btf__free(btf);
+					return ERR_PTR(enum64_placeholder_id);
+				}
 				t = (struct btf_type *)btf__type_by_id(btf, i);
 			}
 
@@ -3218,7 +3307,7 @@ static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 		}
 	}
 
-	return 0;
+	return btf;
 }
 
 static bool libbpf_needs_btf(const struct bpf_object *obj)
@@ -3569,21 +3658,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 
 	sanitize = btf_needs_sanitization(obj);
 	if (sanitize) {
-		const void *raw_data;
-		__u32 sz;
-
-		/* clone BTF to sanitize a copy and leave the original intact */
-		raw_data = btf__raw_data(obj->btf, &sz);
-		kern_btf = btf__new(raw_data, sz);
-		err = libbpf_get_error(kern_btf);
-		if (err)
-			return err;
-
-		/* enforce 8-byte pointers for BPF-targeted BTFs */
-		btf__set_pointer_size(obj->btf, 8);
-		err = bpf_object__sanitize_btf(obj, kern_btf);
-		if (err)
-			return err;
+		kern_btf = bpf_object__sanitize_btf(obj, obj->btf);
+		if (IS_ERR(kern_btf))
+			return PTR_ERR(kern_btf);
 	}
 
 	if (obj->gen_loader) {
@@ -3945,6 +4022,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 			} else if (strcmp(name, ARENA_SEC) == 0) {
 				obj->efile.arena_data = data;
 				obj->efile.arena_data_shndx = idx;
+			} else if (strcmp(name, JUMPTABLES_SEC) == 0) {
+				obj->jumptables_data = malloc(data->d_size);
+				if (!obj->jumptables_data)
+					return -ENOMEM;
+				memcpy(obj->jumptables_data, data->d_buf, data->d_size);
+				obj->jumptables_data_sz = data->d_size;
+				obj->efile.jumptables_data_shndx = idx;
 			} else {
 				pr_info("elf: skipping unrecognized data section(%d) %s\n",
 					idx, name);
@@ -4241,7 +4325,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 		if (!sym_is_extern(sym))
 			continue;
 		ext_name = elf_sym_str(obj, sym->st_name);
-		if (!ext_name || !ext_name[0])
+		if (str_is_empty(ext_name))
 			continue;
 
 		ext = obj->externs;
@@ -4260,7 +4344,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
 			return ext->btf_id;
 		}
 		t = btf__type_by_id(obj->btf, ext->btf_id);
-		ext->name = btf__name_by_offset(obj->btf, t->name_off);
+		ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off));
+		if (!ext->name)
+			return -ENOMEM;
 		ext->sym_idx = i;
 		ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
 
@@ -4483,6 +4569,44 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
 	}
 }
 
+static int bpf_prog_compute_hash(struct bpf_program *prog)
+{
+	struct bpf_insn *purged;
+	int i, err = 0;
+
+	purged = calloc(prog->insns_cnt, BPF_INSN_SZ);
+	if (!purged)
+		return -ENOMEM;
+
+	/* If relocations have been done, the map_fd needs to be
+	 * discarded for the digest calculation.
+	 */
+	for (i = 0; i < prog->insns_cnt; i++) {
+		purged[i] = prog->insns[i];
+		if (purged[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
+		    (purged[i].src_reg == BPF_PSEUDO_MAP_FD ||
+		     purged[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
+			purged[i].imm = 0;
+			i++;
+			if (i >= prog->insns_cnt ||
+			    prog->insns[i].code != 0 ||
+			    prog->insns[i].dst_reg != 0 ||
+			    prog->insns[i].src_reg != 0 ||
+			    prog->insns[i].off != 0) {
+				err = -EINVAL;
+				goto out;
+			}
+			purged[i] = prog->insns[i];
+			purged[i].imm = 0;
+		}
+	}
+	libbpf_sha256(purged, prog->insns_cnt * sizeof(struct bpf_insn),
+		      prog->hash);
+out:
+	free(purged);
+	return err;
+}
+
 static int bpf_program__record_reloc(struct bpf_program *prog,
 				     struct reloc_desc *reloc_desc,
 				     __u32 insn_idx, const char *sym_name,
@@ -4580,10 +4704,30 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
 
 	/* arena data relocation */
 	if (shdr_idx == obj->efile.arena_data_shndx) {
+		if (obj->arena_map_idx < 0) {
+			pr_warn("prog '%s': bad arena data relocation at insn %u, no arena maps defined\n",
+				prog->name, insn_idx);
+			return -LIBBPF_ERRNO__RELOC;
+		}
 		reloc_desc->type = RELO_DATA;
 		reloc_desc->insn_idx = insn_idx;
-		reloc_desc->map_idx = obj->arena_map - obj->maps;
+		reloc_desc->map_idx = obj->arena_map_idx;
+		reloc_desc->sym_off = sym->st_value;
+
+		map = &obj->maps[obj->arena_map_idx];
+		pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n",
+			 prog->name, obj->arena_map_idx, map->name, map->sec_idx,
+			 map->sec_offset, insn_idx);
+		return 0;
+	}
+
+	/* jump table data relocation */
+	if (shdr_idx == obj->efile.jumptables_data_shndx) {
+		reloc_desc->type = RELO_INSN_ARRAY;
+		reloc_desc->insn_idx = insn_idx;
+		reloc_desc->map_idx = -1;
 		reloc_desc->sym_off = sym->st_value;
+		reloc_desc->sym_size = sym->st_size;
 		return 0;
 	}
 
@@ -4845,6 +4989,11 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
 	return 0;
 }
 
+static bool map_is_created(const struct bpf_map *map)
+{
+	return map->obj->state >= OBJ_PREPARED || map->reused;
+}
+
 bool bpf_map__autocreate(const struct bpf_map *map)
 {
 	return map->autocreate;
@@ -4852,7 +5001,7 @@ bool bpf_map__autocreate(const struct bpf_map *map)
 
 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
 {
-	if (map->obj->loaded)
+	if (map_is_created(map))
 		return libbpf_err(-EBUSY);
 
 	map->autocreate = autocreate;
@@ -4946,7 +5095,7 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
 
 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
 {
-	if (map->obj->loaded)
+	if (map_is_created(map))
 		return libbpf_err(-EBUSY);
 
 	map->def.max_entries = max_entries;
@@ -5054,12 +5203,20 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
 		 */
 		return true;
 
-	if (obj->token_fd)
+	if (obj->feat_cache)
 		return feat_supported(obj->feat_cache, feat_id);
 
 	return feat_supported(NULL, feat_id);
 }
 
+/* Used in testing to simulate missing features. */
+void bpf_object_set_feat_cache(struct bpf_object *obj, struct kern_feature_cache *cache)
+{
+	if (obj->feat_cache)
+		free(obj->feat_cache);
+	obj->feat_cache = cache;
+}
+
 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
 {
 	struct bpf_map_info map_info;
@@ -5076,6 +5233,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
 		return false;
 	}
 
+	/*
+	 * bpf_get_map_info_by_fd() for DEVMAP will always return flags with
+	 * BPF_F_RDONLY_PROG set, but it generally is not set at map creation time.
+	 * Thus, ignore the BPF_F_RDONLY_PROG flag in the flags returned from
+	 * bpf_get_map_info_by_fd() when checking for compatibility with an
+	 * existing DEVMAP.
+	 */
+	if (map->def.type == BPF_MAP_TYPE_DEVMAP || map->def.type == BPF_MAP_TYPE_DEVMAP_HASH)
+		map_info.map_flags &= ~BPF_F_RDONLY_PROG;
+
 	return (map_info.type == map->def.type &&
 		map_info.key_size == map->def.key_size &&
 		map_info.value_size == map->def.value_size &&
@@ -5191,11 +5358,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 
 static void bpf_map__destroy(struct bpf_map *map);
 
-static bool map_is_created(const struct bpf_map *map)
-{
-	return map->obj->loaded || map->reused;
-}
-
 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
 {
 	LIBBPF_OPTS(bpf_map_create_opts, create_attr);
@@ -5212,6 +5374,14 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
 	create_attr.token_fd = obj->token_fd;
 	if (obj->token_fd)
 		create_attr.map_flags |= BPF_F_TOKEN_FD;
+	if (map->excl_prog) {
+		err = bpf_prog_compute_hash(map->excl_prog);
+		if (err)
+			return err;
+
+		create_attr.excl_prog_hash = map->excl_prog->hash;
+		create_attr.excl_prog_hash_size = SHA256_DIGEST_LENGTH;
+	}
 
 	if (bpf_map__is_struct_ops(map)) {
 		create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
@@ -5511,7 +5681,8 @@ retry:
 					return err;
 				}
 				if (obj->arena_data) {
-					memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz);
+					memcpy(map->mmaped + obj->arena_data_off, obj->arena_data,
+						obj->arena_data_sz);
 					zfree(&obj->arena_data);
 				}
 			}
@@ -5681,11 +5852,12 @@ static int load_module_btfs(struct bpf_object *obj)
 		info.name = ptr_to_u64(name);
 		info.name_len = sizeof(name);
 
+		btf = NULL;
 		err = bpf_btf_get_info_by_fd(fd, &info, &len);
 		if (err) {
 			err = -errno;
 			pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err));
-			goto err_out;
+			break;
 		}
 
 		/* ignore non-module BTFs */
@@ -5699,15 +5871,15 @@ static int load_module_btfs(struct bpf_object *obj)
 		if (err) {
 			pr_warn("failed to load module [%s]'s BTF object #%d: %s\n",
 				name, id, errstr(err));
-			goto err_out;
+			break;
 		}
 
 		err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
 					sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
 		if (err)
-			goto err_out;
+			break;
 
-		mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
+		mod_btf = &obj->btf_modules[obj->btf_module_cnt];
 
 		mod_btf->btf = btf;
 		mod_btf->id = id;
@@ -5715,16 +5887,16 @@ static int load_module_btfs(struct bpf_object *obj)
 		mod_btf->name = strdup(name);
 		if (!mod_btf->name) {
 			err = -ENOMEM;
-			goto err_out;
+			break;
 		}
-		continue;
+		obj->btf_module_cnt++;
+	}
 
-err_out:
+	if (err) {
+		btf__free(btf);
 		close(fd);
-		return err;
 	}
-
-	return 0;
+	return err;
 }
 
 static struct bpf_core_cand_list *
@@ -6079,6 +6251,157 @@ static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
 	insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
 }
 
+static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, unsigned int sym_off)
+{
+	size_t i;
+
+	for (i = 0; i < obj->jumptable_map_cnt; i++) {
+		/*
+		 * This might happen that same offset is used for two different
+		 * programs (as jump tables can be the same). However, for
+		 * different programs different maps should be created.
+		 */
+		if (obj->jumptable_maps[i].sym_off == sym_off &&
+		    obj->jumptable_maps[i].prog == prog)
+			return obj->jumptable_maps[i].fd;
+	}
+
+	return -ENOENT;
+}
+
+static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, unsigned int sym_off, int map_fd)
+{
+	size_t cnt = obj->jumptable_map_cnt;
+	size_t size = sizeof(obj->jumptable_maps[0]);
+	void *tmp;
+
+	tmp = libbpf_reallocarray(obj->jumptable_maps, cnt + 1, size);
+	if (!tmp)
+		return -ENOMEM;
+
+	obj->jumptable_maps = tmp;
+	obj->jumptable_maps[cnt].prog = prog;
+	obj->jumptable_maps[cnt].sym_off = sym_off;
+	obj->jumptable_maps[cnt].fd = map_fd;
+	obj->jumptable_map_cnt++;
+
+	return 0;
+}
+
+static int find_subprog_idx(struct bpf_program *prog, int insn_idx)
+{
+	int i;
+
+	for (i = prog->subprog_cnt - 1; i >= 0; i--) {
+		if (insn_idx >= prog->subprogs[i].sub_insn_off)
+			return i;
+	}
+
+	return -1;
+}
+
+static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struct reloc_desc *relo)
+{
+	const __u32 jt_entry_size = 8;
+	unsigned int sym_off = relo->sym_off;
+	int jt_size = relo->sym_size;
+	__u32 max_entries = jt_size / jt_entry_size;
+	__u32 value_size = sizeof(struct bpf_insn_array_value);
+	struct bpf_insn_array_value val = {};
+	int subprog_idx;
+	int map_fd, err;
+	__u64 insn_off;
+	__u64 *jt;
+	__u32 i;
+
+	map_fd = find_jt_map(obj, prog, sym_off);
+	if (map_fd >= 0)
+		return map_fd;
+
+	if (sym_off % jt_entry_size) {
+		pr_warn("map '.jumptables': jumptable start %u should be multiple of %u\n",
+			sym_off, jt_entry_size);
+		return -EINVAL;
+	}
+
+	if (jt_size % jt_entry_size) {
+		pr_warn("map '.jumptables': jumptable size %d should be multiple of %u\n",
+			jt_size, jt_entry_size);
+		return -EINVAL;
+	}
+
+	map_fd = bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, ".jumptables",
+				4, value_size, max_entries, NULL);
+	if (map_fd < 0)
+		return map_fd;
+
+	if (!obj->jumptables_data) {
+		pr_warn("map '.jumptables': ELF file is missing jump table data\n");
+		err = -EINVAL;
+		goto err_close;
+	}
+	if (sym_off + jt_size > obj->jumptables_data_sz) {
+		pr_warn("map '.jumptables': jumptables_data size is %zd, trying to access %d\n",
+			obj->jumptables_data_sz, sym_off + jt_size);
+		err = -EINVAL;
+		goto err_close;
+	}
+
+	subprog_idx = -1; /* main program */
+	if (relo->insn_idx < 0 || relo->insn_idx >= prog->insns_cnt) {
+		pr_warn("map '.jumptables': invalid instruction index %d\n", relo->insn_idx);
+		err = -EINVAL;
+		goto err_close;
+	}
+	if (prog->subprogs)
+		subprog_idx = find_subprog_idx(prog, relo->insn_idx);
+
+	jt = (__u64 *)(obj->jumptables_data + sym_off);
+	for (i = 0; i < max_entries; i++) {
+		/*
+		 * The offset should be made to be relative to the beginning of
+		 * the main function, not the subfunction.
+		 */
+		insn_off = jt[i]/sizeof(struct bpf_insn);
+		if (subprog_idx >= 0) {
+			insn_off -= prog->subprogs[subprog_idx].sec_insn_off;
+			insn_off += prog->subprogs[subprog_idx].sub_insn_off;
+		} else {
+			insn_off -= prog->sec_insn_off;
+		}
+
+		/*
+		 * LLVM-generated jump tables contain u64 records, however
+		 * should contain values that fit in u32.
+		 */
+		if (insn_off > UINT32_MAX) {
+			pr_warn("map '.jumptables': invalid jump table value 0x%llx at offset %u\n",
+				(long long)jt[i], sym_off + i * jt_entry_size);
+			err = -EINVAL;
+			goto err_close;
+		}
+
+		val.orig_off = insn_off;
+		err = bpf_map_update_elem(map_fd, &i, &val, 0);
+		if (err)
+			goto err_close;
+	}
+
+	err = bpf_map_freeze(map_fd);
+	if (err)
+		goto err_close;
+
+	err = add_jt_map(obj, prog, sym_off, map_fd);
+	if (err)
+		goto err_close;
+
+	return map_fd;
+
+err_close:
+	close(map_fd);
+	return err;
+}
+
 /* Relocate data references within program code:
  *  - map references;
  *  - global variable references;
@@ -6112,6 +6435,10 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 		case RELO_DATA:
 			map = &obj->maps[relo->map_idx];
 			insn[1].imm = insn[0].imm + relo->sym_off;
+
+			if (relo->map_idx == obj->arena_map_idx)
+				insn[1].imm += obj->arena_data_off;
+
 			if (obj->gen_loader) {
 				insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
 				insn[0].imm = relo->map_idx;
@@ -6170,6 +6497,20 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 		case RELO_CORE:
 			/* will be handled by bpf_program_record_relos() */
 			break;
+		case RELO_INSN_ARRAY: {
+			int map_fd;
+
+			map_fd = create_jt_map(obj, prog, relo);
+			if (map_fd < 0) {
+				pr_warn("prog '%s': relo #%d: can't create jump table: sym_off %u\n",
+					prog->name, i, relo->sym_off);
+				return map_fd;
+			}
+			insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+			insn->imm = map_fd;
+			insn->off = 0;
+		}
+			break;
 		default:
 			pr_warn("prog '%s': relo #%d: bad relo type %d\n",
 				prog->name, i, relo->type);
@@ -6367,36 +6708,62 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra
 	return 0;
 }
 
+static int save_subprog_offsets(struct bpf_program *main_prog, struct bpf_program *subprog)
+{
+	size_t size = sizeof(main_prog->subprogs[0]);
+	int cnt = main_prog->subprog_cnt;
+	void *tmp;
+
+	tmp = libbpf_reallocarray(main_prog->subprogs, cnt + 1, size);
+	if (!tmp)
+		return -ENOMEM;
+
+	main_prog->subprogs = tmp;
+	main_prog->subprogs[cnt].sec_insn_off = subprog->sec_insn_off;
+	main_prog->subprogs[cnt].sub_insn_off = subprog->sub_insn_off;
+	main_prog->subprog_cnt++;
+
+	return 0;
+}
+
 static int
 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
 				struct bpf_program *subprog)
 {
-       struct bpf_insn *insns;
-       size_t new_cnt;
-       int err;
+	struct bpf_insn *insns;
+	size_t new_cnt;
+	int err;
 
-       subprog->sub_insn_off = main_prog->insns_cnt;
+	subprog->sub_insn_off = main_prog->insns_cnt;
 
-       new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
-       insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
-       if (!insns) {
-               pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
-               return -ENOMEM;
-       }
-       main_prog->insns = insns;
-       main_prog->insns_cnt = new_cnt;
+	new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
+	insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
+	if (!insns) {
+		pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
+		return -ENOMEM;
+	}
+	main_prog->insns = insns;
+	main_prog->insns_cnt = new_cnt;
+
+	memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
+	       subprog->insns_cnt * sizeof(*insns));
 
-       memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
-              subprog->insns_cnt * sizeof(*insns));
+	pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
+		 main_prog->name, subprog->insns_cnt, subprog->name);
+
+	/* The subprog insns are now appended. Append its relos too. */
+	err = append_subprog_relos(main_prog, subprog);
+	if (err)
+		return err;
 
-       pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
-                main_prog->name, subprog->insns_cnt, subprog->name);
+	err = save_subprog_offsets(main_prog, subprog);
+	if (err) {
+		pr_warn("prog '%s': failed to add subprog offsets: %s\n",
+			main_prog->name, errstr(err));
+		return err;
+	}
 
-       /* The subprog insns are now appended. Append its relos too. */
-       err = append_subprog_relos(main_prog, subprog);
-       if (err)
-               return err;
-       return 0;
+	return 0;
 }
 
 static int
@@ -7073,6 +7440,14 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat
 		bpf_object__sort_relos(obj);
 	}
 
+	/* place globals at the end of the arena (if supported) */
+	if (obj->arena_map_idx >= 0 && kernel_supports(obj, FEAT_LDIMM64_FULL_RANGE_OFF)) {
+		struct bpf_map *arena_map = &obj->maps[obj->arena_map_idx];
+
+		obj->arena_data_off = bpf_map_mmap_sz(arena_map) -
+				      roundup(obj->arena_data_sz, sysconf(_SC_PAGE_SIZE));
+	}
+
 	/* Before relocating calls pre-process relocations and mark
 	 * few ld_imm64 instructions that points to subprogs.
 	 * Otherwise bpf_object__reloc_code() later would have to consider
@@ -7897,13 +8272,6 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
 
 	for (i = 0; i < obj->nr_programs; i++) {
 		prog = &obj->programs[i];
-		err = bpf_object__sanitize_prog(obj, prog);
-		if (err)
-			return err;
-	}
-
-	for (i = 0; i < obj->nr_programs; i++) {
-		prog = &obj->programs[i];
 		if (prog_is_subprog(obj, prog))
 			continue;
 		if (!prog->autoload) {
@@ -7927,6 +8295,21 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
 	return 0;
 }
 
+static int bpf_object_prepare_progs(struct bpf_object *obj)
+{
+	struct bpf_program *prog;
+	size_t i;
+	int err;
+
+	for (i = 0; i < obj->nr_programs; i++) {
+		prog = &obj->programs[i];
+		err = bpf_object__sanitize_prog(obj, prog);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
 
 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
@@ -8172,7 +8555,7 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
 	struct bpf_object *obj = ctx;
 	const struct btf_type *t;
 	struct extern_desc *ext;
-	char *res;
+	const char *res;
 
 	res = strstr(sym_name, ".llvm.");
 	if (sym_type == 'd' && res)
@@ -8543,14 +8926,77 @@ static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
 	return 0;
 }
 
+static void bpf_object_unpin(struct bpf_object *obj)
+{
+	int i;
+
+	/* unpin any maps that were auto-pinned during load */
+	for (i = 0; i < obj->nr_maps; i++)
+		if (obj->maps[i].pinned && !obj->maps[i].reused)
+			bpf_map__unpin(&obj->maps[i], NULL);
+}
+
+static void bpf_object_post_load_cleanup(struct bpf_object *obj)
+{
+	int i;
+
+	/* clean up fd_array */
+	zfree(&obj->fd_array);
+
+	/* clean up module BTFs */
+	for (i = 0; i < obj->btf_module_cnt; i++) {
+		close(obj->btf_modules[i].fd);
+		btf__free(obj->btf_modules[i].btf);
+		free(obj->btf_modules[i].name);
+	}
+	obj->btf_module_cnt = 0;
+	zfree(&obj->btf_modules);
+
+	/* clean up vmlinux BTF */
+	btf__free(obj->btf_vmlinux);
+	obj->btf_vmlinux = NULL;
+}
+
+static int bpf_object_prepare(struct bpf_object *obj, const char *target_btf_path)
+{
+	int err;
+
+	if (obj->state >= OBJ_PREPARED) {
+		pr_warn("object '%s': prepare loading can't be attempted twice\n", obj->name);
+		return -EINVAL;
+	}
+
+	err = bpf_object_prepare_token(obj);
+	err = err ? : bpf_object__probe_loading(obj);
+	err = err ? : bpf_object__load_vmlinux_btf(obj, false);
+	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
+	err = err ? : bpf_object__sanitize_maps(obj);
+	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
+	err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
+	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
+	err = err ? : bpf_object__sanitize_and_load_btf(obj);
+	err = err ? : bpf_object__create_maps(obj);
+	err = err ? : bpf_object_prepare_progs(obj);
+
+	if (err) {
+		bpf_object_unpin(obj);
+		bpf_object_unload(obj);
+		obj->state = OBJ_LOADED;
+		return err;
+	}
+
+	obj->state = OBJ_PREPARED;
+	return 0;
+}
+
 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
 {
-	int err, i;
+	int err;
 
 	if (!obj)
 		return libbpf_err(-EINVAL);
 
-	if (obj->loaded) {
+	if (obj->state >= OBJ_LOADED) {
 		pr_warn("object '%s': load can't be attempted twice\n", obj->name);
 		return libbpf_err(-EINVAL);
 	}
@@ -8565,17 +9011,12 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
 		return libbpf_err(-LIBBPF_ERRNO__ENDIAN);
 	}
 
-	err = bpf_object_prepare_token(obj);
-	err = err ? : bpf_object__probe_loading(obj);
-	err = err ? : bpf_object__load_vmlinux_btf(obj, false);
-	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
-	err = err ? : bpf_object__sanitize_maps(obj);
-	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
-	err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
-	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
-	err = err ? : bpf_object__sanitize_and_load_btf(obj);
-	err = err ? : bpf_object__create_maps(obj);
-	err = err ? : bpf_object__load_progs(obj, extra_log_level);
+	if (obj->state < OBJ_PREPARED) {
+		err = bpf_object_prepare(obj, target_btf_path);
+		if (err)
+			return libbpf_err(err);
+	}
+	err = bpf_object__load_progs(obj, extra_log_level);
 	err = err ? : bpf_object_init_prog_arrays(obj);
 	err = err ? : bpf_object_prepare_struct_ops(obj);
 
@@ -8587,36 +9028,22 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch
 			err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
 	}
 
-	/* clean up fd_array */
-	zfree(&obj->fd_array);
+	bpf_object_post_load_cleanup(obj);
+	obj->state = OBJ_LOADED; /* doesn't matter if successfully or not */
 
-	/* clean up module BTFs */
-	for (i = 0; i < obj->btf_module_cnt; i++) {
-		close(obj->btf_modules[i].fd);
-		btf__free(obj->btf_modules[i].btf);
-		free(obj->btf_modules[i].name);
+	if (err) {
+		bpf_object_unpin(obj);
+		bpf_object_unload(obj);
+		pr_warn("failed to load object '%s'\n", obj->path);
+		return libbpf_err(err);
 	}
-	free(obj->btf_modules);
-
-	/* clean up vmlinux BTF */
-	btf__free(obj->btf_vmlinux);
-	obj->btf_vmlinux = NULL;
-
-	obj->loaded = true; /* doesn't matter if successfully or not */
-
-	if (err)
-		goto out;
 
 	return 0;
-out:
-	/* unpin any maps that were auto-pinned during load */
-	for (i = 0; i < obj->nr_maps; i++)
-		if (obj->maps[i].pinned && !obj->maps[i].reused)
-			bpf_map__unpin(&obj->maps[i], NULL);
+}
 
-	bpf_object_unload(obj);
-	pr_warn("failed to load object '%s'\n", obj->path);
-	return libbpf_err(err);
+int bpf_object__prepare(struct bpf_object *obj)
+{
+	return libbpf_err(bpf_object_prepare(obj, NULL));
 }
 
 int bpf_object__load(struct bpf_object *obj)
@@ -8866,7 +9293,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 	if (!obj)
 		return libbpf_err(-ENOENT);
 
-	if (!obj->loaded) {
+	if (obj->state < OBJ_PREPARED) {
 		pr_warn("object not yet loaded; load it first\n");
 		return libbpf_err(-ENOENT);
 	}
@@ -8945,7 +9372,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
 	if (!obj)
 		return libbpf_err(-ENOENT);
 
-	if (!obj->loaded) {
+	if (obj->state < OBJ_LOADED) {
 		pr_warn("object not yet loaded; load it first\n");
 		return libbpf_err(-ENOENT);
 	}
@@ -9064,6 +9491,13 @@ void bpf_object__close(struct bpf_object *obj)
 	if (IS_ERR_OR_NULL(obj))
 		return;
 
+	/*
+	 * if user called bpf_object__prepare() without ever getting to
+	 * bpf_object__load(), we need to clean up stuff that is normally
+	 * cleaned up at the end of loading step
+	 */
+	bpf_object_post_load_cleanup(obj);
+
 	usdt_manager_free(obj->usdt_man);
 	obj->usdt_man = NULL;
 
@@ -9080,8 +9514,10 @@ void bpf_object__close(struct bpf_object *obj)
 	zfree(&obj->btf_custom_path);
 	zfree(&obj->kconfig);
 
-	for (i = 0; i < obj->nr_extern; i++)
+	for (i = 0; i < obj->nr_extern; i++) {
+		zfree(&obj->externs[i].name);
 		zfree(&obj->externs[i].essent_name);
+	}
 
 	zfree(&obj->externs);
 	obj->nr_extern = 0;
@@ -9102,6 +9538,13 @@ void bpf_object__close(struct bpf_object *obj)
 
 	zfree(&obj->arena_data);
 
+	zfree(&obj->jumptables_data);
+	obj->jumptables_data_sz = 0;
+
+	for (i = 0; i < obj->jumptable_map_cnt; i++)
+		close(obj->jumptable_maps[i].fd);
+	zfree(&obj->jumptable_maps);
+
 	free(obj);
 }
 
@@ -9132,7 +9575,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj)
 
 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
 {
-	if (obj->loaded)
+	if (obj->state >= OBJ_LOADED)
 		return libbpf_err(-EINVAL);
 
 	obj->kern_version = kern_version;
@@ -9145,12 +9588,12 @@ int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
 	struct bpf_gen *gen;
 
 	if (!opts)
-		return -EFAULT;
+		return libbpf_err(-EFAULT);
 	if (!OPTS_VALID(opts, gen_loader_opts))
-		return -EINVAL;
-	gen = calloc(sizeof(*gen), 1);
+		return libbpf_err(-EINVAL);
+	gen = calloc(1, sizeof(*gen));
 	if (!gen)
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 	gen->opts = opts;
 	gen->swapped_endian = !is_native_endianness(obj);
 	obj->gen_loader = gen;
@@ -9229,7 +9672,7 @@ bool bpf_program__autoload(const struct bpf_program *prog)
 
 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
 {
-	if (prog->obj->loaded)
+	if (prog->obj->state >= OBJ_LOADED)
 		return libbpf_err(-EINVAL);
 
 	prog->autoload = autoload;
@@ -9261,14 +9704,14 @@ int bpf_program__set_insns(struct bpf_program *prog,
 {
 	struct bpf_insn *insns;
 
-	if (prog->obj->loaded)
-		return -EBUSY;
+	if (prog->obj->state >= OBJ_LOADED)
+		return libbpf_err(-EBUSY);
 
 	insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
 	/* NULL is a valid return from reallocarray if the new count is zero */
 	if (!insns && new_insn_cnt) {
 		pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
-		return -ENOMEM;
+		return libbpf_err(-ENOMEM);
 	}
 	memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
 
@@ -9304,7 +9747,7 @@ static int last_custom_sec_def_handler_id;
 
 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
 {
-	if (prog->obj->loaded)
+	if (prog->obj->state >= OBJ_LOADED)
 		return libbpf_err(-EBUSY);
 
 	/* if type is not changed, do nothing */
@@ -9335,7 +9778,7 @@ enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program
 int bpf_program__set_expected_attach_type(struct bpf_program *prog,
 					   enum bpf_attach_type type)
 {
-	if (prog->obj->loaded)
+	if (prog->obj->state >= OBJ_LOADED)
 		return libbpf_err(-EBUSY);
 
 	prog->expected_attach_type = type;
@@ -9349,7 +9792,7 @@ __u32 bpf_program__flags(const struct bpf_program *prog)
 
 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
 {
-	if (prog->obj->loaded)
+	if (prog->obj->state >= OBJ_LOADED)
 		return libbpf_err(-EBUSY);
 
 	prog->prog_flags = flags;
@@ -9363,7 +9806,7 @@ __u32 bpf_program__log_level(const struct bpf_program *prog)
 
 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
 {
-	if (prog->obj->loaded)
+	if (prog->obj->state >= OBJ_LOADED)
 		return libbpf_err(-EBUSY);
 
 	prog->log_level = log_level;
@@ -9379,17 +9822,146 @@ const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_siz
 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
 {
 	if (log_size && !log_buf)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	if (prog->log_size > UINT_MAX)
-		return -EINVAL;
-	if (prog->obj->loaded)
-		return -EBUSY;
+		return libbpf_err(-EINVAL);
+	if (prog->obj->state >= OBJ_LOADED)
+		return libbpf_err(-EBUSY);
 
 	prog->log_buf = log_buf;
 	prog->log_size = log_size;
 	return 0;
 }
 
+struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog)
+{
+	if (prog->func_info_rec_size != sizeof(struct bpf_func_info))
+		return libbpf_err_ptr(-EOPNOTSUPP);
+	return prog->func_info;
+}
+
+__u32 bpf_program__func_info_cnt(const struct bpf_program *prog)
+{
+	return prog->func_info_cnt;
+}
+
+struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog)
+{
+	if (prog->line_info_rec_size != sizeof(struct bpf_line_info))
+		return libbpf_err_ptr(-EOPNOTSUPP);
+	return prog->line_info;
+}
+
+__u32 bpf_program__line_info_cnt(const struct bpf_program *prog)
+{
+	return prog->line_info_cnt;
+}
+
+int bpf_program__clone(struct bpf_program *prog, const struct bpf_prog_load_opts *opts)
+{
+	LIBBPF_OPTS(bpf_prog_load_opts, attr);
+	struct bpf_object *obj;
+	const void *info;
+	__u32 info_cnt, info_rec_size;
+	int err, fd, prog_btf_fd;
+
+	if (!prog)
+		return libbpf_err(-EINVAL);
+
+	if (!OPTS_VALID(opts, bpf_prog_load_opts))
+		return libbpf_err(-EINVAL);
+
+	obj = prog->obj;
+	if (obj->state < OBJ_PREPARED)
+		return libbpf_err(-EINVAL);
+
+	/*
+	 * Caller-provided opts take priority; fall back to
+	 * prog/object defaults when the caller leaves them zero.
+	 */
+	attr.attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0) ?: prog->attach_prog_fd;
+	attr.prog_flags = OPTS_GET(opts, prog_flags, 0) ?: prog->prog_flags;
+	attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0) ?: prog->prog_ifindex;
+	attr.kern_version = OPTS_GET(opts, kern_version, 0) ?: obj->kern_version;
+	attr.fd_array = OPTS_GET(opts, fd_array, NULL) ?: obj->fd_array;
+	attr.fd_array_cnt = OPTS_GET(opts, fd_array_cnt, 0) ?: obj->fd_array_cnt;
+	attr.token_fd = OPTS_GET(opts, token_fd, 0) ?: obj->token_fd;
+	if (attr.token_fd)
+		attr.prog_flags |= BPF_F_TOKEN_FD;
+
+	prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0);
+	if (!prog_btf_fd && obj->btf)
+		prog_btf_fd = btf__fd(obj->btf);
+
+	/* BTF func/line info: only pass if kernel supports it */
+	if (kernel_supports(obj, FEAT_BTF_FUNC) && prog_btf_fd > 0) {
+		attr.prog_btf_fd = prog_btf_fd;
+
+		/* func_info/line_info triples: all-or-nothing from caller */
+		info = OPTS_GET(opts, func_info, NULL);
+		info_cnt = OPTS_GET(opts, func_info_cnt, 0);
+		info_rec_size = OPTS_GET(opts, func_info_rec_size, 0);
+		if (!!info != !!info_cnt || !!info != !!info_rec_size) {
+			pr_warn("prog '%s': func_info, func_info_cnt, and func_info_rec_size must all be specified or all omitted\n",
+				prog->name);
+			return libbpf_err(-EINVAL);
+		}
+		attr.func_info = info ?: prog->func_info;
+		attr.func_info_cnt = info ? info_cnt : prog->func_info_cnt;
+		attr.func_info_rec_size = info ? info_rec_size : prog->func_info_rec_size;
+
+		info = OPTS_GET(opts, line_info, NULL);
+		info_cnt = OPTS_GET(opts, line_info_cnt, 0);
+		info_rec_size = OPTS_GET(opts, line_info_rec_size, 0);
+		if (!!info != !!info_cnt || !!info != !!info_rec_size) {
+			pr_warn("prog '%s': line_info, line_info_cnt, and line_info_rec_size must all be specified or all omitted\n",
+				prog->name);
+			return libbpf_err(-EINVAL);
+		}
+		attr.line_info = info ?: prog->line_info;
+		attr.line_info_cnt = info ? info_cnt : prog->line_info_cnt;
+		attr.line_info_rec_size = info ? info_rec_size : prog->line_info_rec_size;
+	}
+
+	/* Logging is caller-controlled; no fallback to prog/obj log settings */
+	attr.log_buf = OPTS_GET(opts, log_buf, NULL);
+	attr.log_size = OPTS_GET(opts, log_size, 0);
+	attr.log_level = OPTS_GET(opts, log_level, 0);
+
+	/*
+	 * Fields below may be mutated by prog_prepare_load_fn:
+	 * Seed them from prog/obj defaults here;
+	 * Later override with caller-provided opts.
+	 */
+	attr.expected_attach_type = prog->expected_attach_type;
+	attr.attach_btf_id = prog->attach_btf_id;
+	attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
+
+	if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
+		err = prog->sec_def->prog_prepare_load_fn(prog, &attr, prog->sec_def->cookie);
+		if (err)
+			return libbpf_err(err);
+	}
+
+	/* Re-apply caller overrides for output fields */
+	if (OPTS_GET(opts, expected_attach_type, 0))
+		attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0);
+	if (OPTS_GET(opts, attach_btf_id, 0))
+		attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0);
+	if (OPTS_GET(opts, attach_btf_obj_fd, 0))
+		attr.attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0);
+
+	/*
+	 * Unlike bpf_object_load_prog(), we intentionally do not call bpf_prog_bind_map()
+	 * for RODATA maps here to avoid mutating the object's state. Callers can bind the
+	 * required maps themselves using bpf_prog_bind_map().
+	 */
+	fd = bpf_prog_load(prog->type, prog->name, obj->license, prog->insns, prog->insns_cnt,
+			   &attr);
+
+	return libbpf_err(fd);
+}
+
 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) {			    \
 	.sec = (char *)sec_pfx,						    \
 	.prog_type = BPF_PROG_TYPE_##ptype,				    \
@@ -9457,6 +10029,8 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("fentry.s+",		TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
 	SEC_DEF("fmod_ret.s+",		TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
 	SEC_DEF("fexit.s+",		TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
+	SEC_DEF("fsession+",		TRACING, BPF_TRACE_FSESSION, SEC_ATTACH_BTF, attach_trace),
+	SEC_DEF("fsession.s+",		TRACING, BPF_TRACE_FSESSION, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
 	SEC_DEF("freplace+",		EXT, 0, SEC_ATTACH_BTF, attach_trace),
 	SEC_DEF("lsm+",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 	SEC_DEF("lsm.s+",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
@@ -9959,7 +10533,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
 	return libbpf_err(err);
 }
 
-static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
+static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd, int token_fd)
 {
 	struct bpf_prog_info info;
 	__u32 info_len = sizeof(info);
@@ -9979,7 +10553,7 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
 		pr_warn("The target program doesn't have BTF\n");
 		goto out;
 	}
-	btf = btf__load_from_kernel_by_id(info.btf_id);
+	btf = btf_load_from_kernel(info.btf_id, NULL, token_fd);
 	err = libbpf_get_error(btf);
 	if (err) {
 		pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err));
@@ -9999,7 +10573,7 @@ static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
 			      enum bpf_attach_type attach_type,
 			      int *btf_obj_fd, int *btf_type_id)
 {
-	int ret, i, mod_len;
+	int ret, i, mod_len = 0;
 	const char *fn_name, *mod_name = NULL;
 
 	fn_name = strchr(attach_name, ':');
@@ -10062,7 +10636,7 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
 			pr_warn("prog '%s': attach program FD is not set\n", prog->name);
 			return -EINVAL;
 		}
-		err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
+		err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd, prog->obj->token_fd);
 		if (err < 0) {
 			pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n",
 				prog->name, attach_prog_fd, attach_name, errstr(err));
@@ -10299,7 +10873,7 @@ static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
 
 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
 {
-	if (map->obj->loaded || map->reused)
+	if (map_is_created(map))
 		return libbpf_err(-EBUSY);
 
 	if (map->mmaped) {
@@ -10307,7 +10881,7 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
 		int err;
 
 		if (map->def.type != BPF_MAP_TYPE_ARRAY)
-			return -EOPNOTSUPP;
+			return libbpf_err(-EOPNOTSUPP);
 
 		mmap_old_sz = bpf_map_mmap_sz(map);
 		mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries);
@@ -10315,7 +10889,7 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
 		if (err) {
 			pr_warn("map '%s': failed to resize memory-mapped region: %s\n",
 				bpf_map__name(map), errstr(err));
-			return err;
+			return libbpf_err(err);
 		}
 		err = map_btf_datasec_resize(map, size);
 		if (err && err != -ENOENT) {
@@ -10345,7 +10919,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
 {
 	size_t actual_sz;
 
-	if (map->obj->loaded || map->reused)
+	if (map_is_created(map))
 		return libbpf_err(-EBUSY);
 
 	if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
@@ -10417,6 +10991,27 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
 	return 0;
 }
 
+int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog)
+{
+	if (map_is_created(map)) {
+		pr_warn("exclusive programs must be set before map creation\n");
+		return libbpf_err(-EINVAL);
+	}
+
+	if (map->obj != prog->obj) {
+		pr_warn("excl_prog and map must be from the same bpf object\n");
+		return libbpf_err(-EINVAL);
+	}
+
+	map->excl_prog = prog;
+	return 0;
+}
+
+struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map)
+{
+	return map->excl_prog;
+}
+
 static struct bpf_map *
 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
 {
@@ -10496,7 +11091,7 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
 }
 
 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
-			   size_t value_sz, bool check_value_sz)
+			   size_t value_sz, bool check_value_sz, __u64 flags)
 {
 	if (!map_is_created(map)) /* map is not yet created */
 		return -ENOENT;
@@ -10523,6 +11118,20 @@ static int validate_map_op(const struct bpf_map *map, size_t key_sz,
 		int num_cpu = libbpf_num_possible_cpus();
 		size_t elem_sz = roundup(map->def.value_size, 8);
 
+		if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) {
+			if ((flags & BPF_F_CPU) && (flags & BPF_F_ALL_CPUS)) {
+				pr_warn("map '%s': BPF_F_CPU and BPF_F_ALL_CPUS are mutually exclusive\n",
+					map->name);
+				return -EINVAL;
+			}
+			if (map->def.value_size != value_sz) {
+				pr_warn("map '%s': unexpected value size %zu provided for either BPF_F_CPU or BPF_F_ALL_CPUS, expected %u\n",
+					map->name, value_sz, map->def.value_size);
+				return -EINVAL;
+			}
+			break;
+		}
+
 		if (value_sz != num_cpu * elem_sz) {
 			pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
 				map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
@@ -10547,7 +11156,7 @@ int bpf_map__lookup_elem(const struct bpf_map *map,
 {
 	int err;
 
-	err = validate_map_op(map, key_sz, value_sz, true);
+	err = validate_map_op(map, key_sz, value_sz, true, flags);
 	if (err)
 		return libbpf_err(err);
 
@@ -10560,7 +11169,7 @@ int bpf_map__update_elem(const struct bpf_map *map,
 {
 	int err;
 
-	err = validate_map_op(map, key_sz, value_sz, true);
+	err = validate_map_op(map, key_sz, value_sz, true, flags);
 	if (err)
 		return libbpf_err(err);
 
@@ -10572,7 +11181,7 @@ int bpf_map__delete_elem(const struct bpf_map *map,
 {
 	int err;
 
-	err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
+	err = validate_map_op(map, key_sz, 0, false /* check_value_sz */, flags);
 	if (err)
 		return libbpf_err(err);
 
@@ -10585,7 +11194,7 @@ int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
 {
 	int err;
 
-	err = validate_map_op(map, key_sz, value_sz, true);
+	err = validate_map_op(map, key_sz, value_sz, true, flags);
 	if (err)
 		return libbpf_err(err);
 
@@ -10597,7 +11206,7 @@ int bpf_map__get_next_key(const struct bpf_map *map,
 {
 	int err;
 
-	err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
+	err = validate_map_op(map, key_sz, 0, false /* check_value_sz */, 0);
 	if (err)
 		return libbpf_err(err);
 
@@ -10868,11 +11477,14 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p
 		}
 		link->link.fd = pfd;
 	}
-	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
-		err = -errno;
-		pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
-			prog->name, pfd, errstr(err));
-		goto err_out;
+
+	if (!OPTS_GET(opts, dont_enable, false)) {
+		if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+			err = -errno;
+			pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
+				prog->name, pfd, errstr(err));
+			goto err_out;
+		}
 	}
 
 	return &link->link;
@@ -11056,16 +11668,16 @@ static const char *tracefs_available_filter_functions_addrs(void)
 			     : TRACEFS"/available_filter_functions_addrs";
 }
 
-static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
-					 const char *kfunc_name, size_t offset)
+static void gen_probe_legacy_event_name(char *buf, size_t buf_sz,
+					const char *name, size_t offset)
 {
 	static int index = 0;
 	int i;
 
-	snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
-		 __sync_fetch_and_add(&index, 1));
+	snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(),
+		 __sync_fetch_and_add(&index, 1), name, offset);
 
-	/* sanitize binary_path in the probe name */
+	/* sanitize name in the probe name */
 	for (i = 0; buf[i]; i++) {
 		if (!isalnum(buf[i]))
 			buf[i] = '_';
@@ -11151,8 +11763,6 @@ static const char *arch_specific_syscall_pfx(void)
 	return "ia32";
 #elif defined(__s390x__)
 	return "s390x";
-#elif defined(__s390__)
-	return "s390";
 #elif defined(__arm__)
 	return "arm";
 #elif defined(__aarch64__)
@@ -11190,9 +11800,9 @@ int probe_kern_syscall_wrapper(int token_fd)
 
 		return pfd >= 0 ? 1 : 0;
 	} else { /* legacy mode */
-		char probe_name[128];
+		char probe_name[MAX_EVENT_NAME_LEN];
 
-		gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
+		gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
 		if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
 			return 0;
 
@@ -11242,16 +11852,18 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
 	default:
 		return libbpf_err_ptr(-EINVAL);
 	}
+	if (!func_name && legacy)
+		return libbpf_err_ptr(-EOPNOTSUPP);
 
 	if (!legacy) {
 		pfd = perf_event_open_probe(false /* uprobe */, retprobe,
 					    func_name, offset,
 					    -1 /* pid */, 0 /* ref_ctr_off */);
 	} else {
-		char probe_name[256];
+		char probe_name[MAX_EVENT_NAME_LEN];
 
-		gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
-					     func_name, offset);
+		gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
+					    func_name, offset);
 
 		legacy_probe = strdup(probe_name);
 		if (!legacy_probe)
@@ -11261,21 +11873,21 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
 						    offset, -1 /* pid */);
 	}
 	if (pfd < 0) {
-		err = -errno;
-		pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
+		err = pfd;
+		pr_warn("prog '%s': failed to create %s '%s%s0x%zx' perf event: %s\n",
 			prog->name, retprobe ? "kretprobe" : "kprobe",
-			func_name, offset,
-			errstr(err));
+			func_name ?: "", func_name ? "+" : "",
+			offset, errstr(err));
 		goto err_out;
 	}
 	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
 	err = libbpf_get_error(link);
 	if (err) {
 		close(pfd);
-		pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
+		pr_warn("prog '%s': failed to attach to %s '%s%s0x%zx': %s\n",
 			prog->name, retprobe ? "kretprobe" : "kprobe",
-			func_name, offset,
-			errstr(err));
+			func_name ?: "", func_name ? "+" : "",
+			offset, errstr(err));
 		goto err_clean_legacy;
 	}
 	if (legacy) {
@@ -11400,7 +12012,8 @@ static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
 		 *
 		 *   [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG")
 		 */
-		char sym_trim[256], *psym_trim = sym_trim, *sym_sfx;
+		char sym_trim[256], *psym_trim = sym_trim;
+		const char *sym_sfx;
 
 		if (!(sym_sfx = strstr(sym_name, ".llvm.")))
 			return 0;
@@ -11590,7 +12203,16 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
 	if (addrs && syms)
 		return libbpf_err_ptr(-EINVAL);
 
-	if (pattern) {
+	/*
+	 * Exact function name (no wildcards) without unique_match:
+	 * bypass kallsyms parsing and pass the symbol directly to the
+	 * kernel via syms[] array.  When unique_match is set, fall
+	 * through to the slow path which detects duplicate symbols.
+	 */
+	if (pattern && !strpbrk(pattern, "*?") && !unique_match) {
+		syms = &pattern;
+		cnt = 1;
+	} else if (pattern) {
 		if (has_available_filter_functions_addrs())
 			err = libbpf_available_kprobes_parse(&res);
 		else
@@ -11633,6 +12255,14 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
 	link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts);
 	if (link_fd < 0) {
 		err = -errno;
+		/*
+		 * Normalize error code: when exact name bypasses kallsyms
+		 * parsing, kernel returns ESRCH from ftrace_lookup_symbols().
+		 * Convert to ENOENT for API consistency with the pattern
+		 * matching path which returns ENOENT from userspace.
+		 */
+		if (err == -ESRCH)
+			err = -ENOENT;
 		pr_warn("prog '%s': failed to attach: %s\n",
 			prog->name, errstr(err));
 		goto error;
@@ -11795,20 +12425,6 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru
 	return ret;
 }
 
-static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
-					 const char *binary_path, uint64_t offset)
-{
-	int i;
-
-	snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
-
-	/* sanitize binary_path in the probe name */
-	for (i = 0; buf[i]; i++) {
-		if (!isalnum(buf[i]))
-			buf[i] = '_';
-	}
-}
-
 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
 					  const char *binary_path, size_t offset)
 {
@@ -11953,8 +12569,6 @@ static const char *arch_specific_lib_paths(void)
 	return "/lib/i386-linux-gnu";
 #elif defined(__s390x__)
 	return "/lib/s390x-linux-gnu";
-#elif defined(__s390__)
-	return "/lib/s390-linux-gnu";
 #elif defined(__arm__) && defined(__SOFTFP__)
 	return "/lib/arm-linux-gnueabi";
 #elif defined(__arm__) && !defined(__SOFTFP__)
@@ -11999,7 +12613,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
 		if (!search_paths[i])
 			continue;
 		for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
-			char *next_path;
+			const char *next_path;
 			int seg_len;
 
 			if (s[0] == ':')
@@ -12232,13 +12846,14 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
 		pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
 					    func_offset, pid, ref_ctr_off);
 	} else {
-		char probe_name[PATH_MAX + 64];
+		char probe_name[MAX_EVENT_NAME_LEN];
 
 		if (ref_ctr_off)
 			return libbpf_err_ptr(-EINVAL);
 
-		gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
-					     binary_path, func_offset);
+		gen_probe_legacy_event_name(probe_name, sizeof(probe_name),
+					    strrchr(binary_path, '/') ? : binary_path,
+					    func_offset);
 
 		legacy_probe = strdup(probe_name);
 		if (!legacy_probe)
@@ -12248,7 +12863,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
 						    binary_path, func_offset, pid);
 	}
 	if (pfd < 0) {
-		err = -errno;
+		err = pfd;
 		pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
 			prog->name, retprobe ? "uretprobe" : "uprobe",
 			binary_path, func_offset,
@@ -12769,6 +13384,34 @@ struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifi
 }
 
 struct bpf_link *
+bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd,
+				const struct bpf_cgroup_opts *opts)
+{
+	LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
+	__u32 relative_id;
+	int relative_fd;
+
+	if (!OPTS_VALID(opts, bpf_cgroup_opts))
+		return libbpf_err_ptr(-EINVAL);
+
+	relative_id = OPTS_GET(opts, relative_id, 0);
+	relative_fd = OPTS_GET(opts, relative_fd, 0);
+
+	if (relative_fd && relative_id) {
+		pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
+			prog->name);
+		return libbpf_err_ptr(-EINVAL);
+	}
+
+	link_create_opts.cgroup.expected_revision = OPTS_GET(opts, expected_revision, 0);
+	link_create_opts.cgroup.relative_fd = relative_fd;
+	link_create_opts.cgroup.relative_id = relative_id;
+	link_create_opts.flags = OPTS_GET(opts, flags, 0);
+
+	return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", &link_create_opts);
+}
+
+struct bpf_link *
 bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
 			const struct bpf_tcx_opts *opts)
 {
@@ -12858,7 +13501,7 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
 	if (target_fd) {
 		LIBBPF_OPTS(bpf_link_create_opts, target_opts);
 
-		btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
+		btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd, prog->obj->token_fd);
 		if (btf_id < 0)
 			return libbpf_err_ptr(btf_id);
 
@@ -13070,17 +13713,17 @@ int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
 	int err;
 
 	if (!bpf_map__is_struct_ops(map))
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	if (map->fd < 0) {
 		pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name);
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 	}
 
 	st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
 	/* Ensure the type of a link is correct */
 	if (st_ops_link->map_fd < 0)
-		return -EINVAL;
+		return libbpf_err(-EINVAL);
 
 	err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
 	/* It can be EBUSY if the map has been used to create or
@@ -13306,7 +13949,6 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
 	attr.config = PERF_COUNT_SW_BPF_OUTPUT;
 	attr.type = PERF_TYPE_SOFTWARE;
 	attr.sample_type = PERF_SAMPLE_RAW;
-	attr.sample_period = sample_period;
 	attr.wakeup_events = sample_period;
 
 	p.attr = &attr;
@@ -13666,12 +14308,12 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
 	if (!prog || attach_prog_fd < 0)
 		return libbpf_err(-EINVAL);
 
-	if (prog->obj->loaded)
+	if (prog->obj->state >= OBJ_LOADED)
 		return libbpf_err(-EINVAL);
 
 	if (attach_prog_fd && !attach_func_name) {
-		/* remember attach_prog_fd and let bpf_program__load() find
-		 * BTF ID during the program load
+		/* Store attach_prog_fd. The BTF ID will be resolved later during
+		 * the normal object/program load phase.
 		 */
 		prog->attach_prog_fd = attach_prog_fd;
 		return 0;
@@ -13679,7 +14321,7 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
 
 	if (attach_prog_fd) {
 		btf_id = libbpf_find_prog_btf_id(attach_func_name,
-						 attach_prog_fd);
+						 attach_prog_fd, prog->obj->token_fd);
 		if (btf_id < 0)
 			return libbpf_err(btf_id);
 	} else {
@@ -13703,6 +14345,37 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
 	return 0;
 }
 
+int bpf_program__assoc_struct_ops(struct bpf_program *prog, struct bpf_map *map,
+				  struct bpf_prog_assoc_struct_ops_opts *opts)
+{
+	int prog_fd, map_fd;
+
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warn("prog '%s': can't associate BPF program without FD (was it loaded?)\n",
+			prog->name);
+		return libbpf_err(-EINVAL);
+	}
+
+	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
+		pr_warn("prog '%s': can't associate struct_ops program\n", prog->name);
+		return libbpf_err(-EINVAL);
+	}
+
+	map_fd = bpf_map__fd(map);
+	if (map_fd < 0) {
+		pr_warn("map '%s': can't associate BPF map without FD (was it created?)\n", map->name);
+		return libbpf_err(-EINVAL);
+	}
+
+	if (!bpf_map__is_struct_ops(map)) {
+		pr_warn("map '%s': can't associate non-struct_ops map\n", map->name);
+		return libbpf_err(-EINVAL);
+	}
+
+	return bpf_prog_assoc_struct_ops(prog_fd, map_fd, opts);
+}
+
 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
 {
 	int err = 0, n, len, start, end = -1;
@@ -13968,7 +14641,10 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
 		if (!map_skel->mmaped)
 			continue;
 
-		*map_skel->mmaped = map->mmaped;
+		if (map->def.type == BPF_MAP_TYPE_ARENA)
+			*map_skel->mmaped = map->mmaped + map->obj->arena_data_off;
+		else
+			*map_skel->mmaped = map->mmaped;
 	}
 
 	return 0;
@@ -14034,6 +14710,12 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
 		}
 
 		link = map_skel->link;
+		if (!link) {
+			pr_warn("map '%s': BPF map skeleton link is uninitialized\n",
+				bpf_map__name(map));
+			continue;
+		}
+
 		if (*link)
 			continue;
 
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3020ee45303a..bba4e8464396 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -24,8 +24,25 @@
 extern "C" {
 #endif
 
+/**
+ * @brief **libbpf_major_version()** provides the major version of libbpf.
+ * @return An integer, the major version number
+ */
 LIBBPF_API __u32 libbpf_major_version(void);
+
+/**
+ * @brief **libbpf_minor_version()** provides the minor version of libbpf.
+ * @return An integer, the minor version number
+ */
 LIBBPF_API __u32 libbpf_minor_version(void);
+
+/**
+ * @brief **libbpf_version_string()** provides the version of libbpf in a
+ * human-readable form, e.g., "v1.7".
+ * @return Pointer to a static string containing the version
+ *
+ * The format is *not* a part of a stable API and may change in the future.
+ */
 LIBBPF_API const char *libbpf_version_string(void);
 
 enum libbpf_errno {
@@ -49,6 +66,14 @@ enum libbpf_errno {
 	__LIBBPF_ERRNO__END,
 };
 
+/**
+ * @brief **libbpf_strerror()** converts the provided error code into a
+ * human-readable string.
+ * @param err The error code to convert
+ * @param buf Pointer to a buffer where the error message will be stored
+ * @param size The number of bytes in the buffer
+ * @return 0, on success; negative error code, otherwise
+ */
 LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
 
 /**
@@ -242,6 +267,19 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
 		     const struct bpf_object_open_opts *opts);
 
 /**
+ * @brief **bpf_object__prepare()** prepares BPF object for loading:
+ * performs ELF processing, relocations, prepares final state of BPF program
+ * instructions (accessible with bpf_program__insns()), creates and
+ * (potentially) pins maps. Leaves BPF object in the state ready for program
+ * loading.
+ * @param obj Pointer to a valid BPF object instance returned by
+ * **bpf_object__open*()** API
+ * @return 0, on success; negative error code, otherwise, error code is
+ * stored in errno
+ */
+LIBBPF_API int bpf_object__prepare(struct bpf_object *obj);
+
+/**
  * @brief **bpf_object__load()** loads BPF object into kernel.
  * @param obj Pointer to a valid BPF object instance returned by
  * **bpf_object__open*()** APIs
@@ -410,7 +448,7 @@ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
 
 /**
  * @brief **bpf_program__unpin()** unpins the BPF program from a file
- * in the BPFFS specified by a path. This decrements the programs
+ * in the BPFFS specified by a path. This decrements program's in-kernel
  * reference count.
  *
  * The file pinning the BPF program can also be unlinked by a different
@@ -443,14 +481,12 @@ LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
 
 /**
  * @brief **bpf_link__unpin()** unpins the BPF link from a file
- * in the BPFFS specified by a path. This decrements the links
- * reference count.
+ * in the BPFFS. This decrements link's in-kernel reference count.
  *
  * The file pinning the BPF link can also be unlinked by a different
  * process in which case this function will return an error.
  *
- * @param prog BPF program to unpin
- * @param path file path to the pin in a BPF file system
+ * @param link BPF link to unpin
  * @return 0, on success; negative error code, otherwise
  */
 LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
@@ -486,9 +522,11 @@ struct bpf_perf_event_opts {
 	__u64 bpf_cookie;
 	/* don't use BPF link when attach BPF program */
 	bool force_ioctl_attach;
+	/* don't automatically enable the event */
+	bool dont_enable;
 	size_t :0;
 };
-#define bpf_perf_event_opts__last_field force_ioctl_attach
+#define bpf_perf_event_opts__last_field dont_enable
 
 LIBBPF_API struct bpf_link *
 bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd);
@@ -519,7 +557,7 @@ struct bpf_kprobe_opts {
 	size_t sz;
 	/* custom user-provided value fetchable through bpf_get_attach_cookie() */
 	__u64 bpf_cookie;
-	/* function's offset to install kprobe to */
+	/* function offset, or raw address if func_name == NULL */
 	size_t offset;
 	/* kprobe is return probe */
 	bool retprobe;
@@ -527,11 +565,36 @@ struct bpf_kprobe_opts {
 	enum probe_attach_mode attach_mode;
 	size_t :0;
 };
+
 #define bpf_kprobe_opts__last_field attach_mode
 
+/**
+ * @brief **bpf_program__attach_kprobe()** attaches a BPF program to a
+ * kernel function entry or return.
+ *
+ * @param prog BPF program to attach
+ * @param retprobe Attach to function return
+ * @param func_name Name of the kernel function to attach to
+ * @return Reference to the newly created BPF link; or NULL is returned on
+ * error, error code is stored in errno
+ */
 LIBBPF_API struct bpf_link *
 bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe,
 			   const char *func_name);
+
+/**
+ * @brief **bpf_program__attach_kprobe_opts()** is just like
+ * bpf_program__attach_kprobe() except with an options struct
+ * for various configurations.
+ *
+ * @param prog BPF program to attach
+ * @param func_name Name of the kernel function to attach to. If NULL,
+ * opts->offset is treated as a raw kernel address. Raw-address attach
+ * is supported with PROBE_ATTACH_MODE_PERF and PROBE_ATTACH_MODE_LINK.
+ * @param opts Options for altering program attachment
+ * @return Reference to the newly created BPF link; or NULL is returned on
+ * error, error code is stored in errno
+ */
 LIBBPF_API struct bpf_link *
 bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
                                 const char *func_name,
@@ -864,6 +927,21 @@ LIBBPF_API struct bpf_link *
 bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,
 			   const struct bpf_netkit_opts *opts);
 
+struct bpf_cgroup_opts {
+	/* size of this struct, for forward/backward compatibility */
+	size_t sz;
+	__u32 flags;
+	__u32 relative_fd;
+	__u32 relative_id;
+	__u64 expected_revision;
+	size_t :0;
+};
+#define bpf_cgroup_opts__last_field expected_revision
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd,
+				const struct bpf_cgroup_opts *opts);
+
 struct bpf_map;
 
 LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map);
@@ -927,6 +1005,12 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le
 LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size);
 LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size);
 
+LIBBPF_API struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog);
+LIBBPF_API __u32 bpf_program__func_info_cnt(const struct bpf_program *prog);
+
+LIBBPF_API struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog);
+LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog);
+
 /**
  * @brief **bpf_program__set_attach_target()** sets BTF-based attach target
  * for supported BPF program types:
@@ -934,14 +1018,35 @@ LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf,
  *   - fentry/fexit/fmod_ret;
  *   - lsm;
  *   - freplace.
- * @param prog BPF program to set the attach type for
- * @param type attach type to set the BPF map to have
+ * @param prog BPF program to configure; must be not yet loaded.
+ * @param attach_prog_fd FD of target BPF program (for freplace/extension).
+ * If >0 and func name omitted, defers BTF ID resolution.
+ * @param attach_func_name Target function name. Used either with
+ * attach_prog_fd to find destination BTF type ID in that BPF program, or
+ * alone (no attach_prog_fd) to resolve kernel (vmlinux/module) BTF ID.
+ * Must be provided if attach_prog_fd is 0.
  * @return error code; or 0 if no error occurred.
  */
 LIBBPF_API int
 bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
 			       const char *attach_func_name);
 
+struct bpf_prog_assoc_struct_ops_opts; /* defined in bpf.h */
+
+/**
+ * @brief **bpf_program__assoc_struct_ops()** associates a BPF program with a
+ * struct_ops map.
+ *
+ * @param prog BPF program
+ * @param map struct_ops map to be associated with the BPF program
+ * @param opts optional options, can be NULL
+ *
+ * @return 0, on success; negative error code, otherwise
+ */
+LIBBPF_API int
+bpf_program__assoc_struct_ops(struct bpf_program *prog, struct bpf_map *map,
+			      struct bpf_prog_assoc_struct_ops_opts *opts);
+
 /**
  * @brief **bpf_object__find_map_by_name()** returns BPF map of
  * the given name, if it exists within the passed BPF object
@@ -1037,6 +1142,7 @@ LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
 /**
  * @brief **bpf_map__set_value_size()** sets map value size.
  * @param map the BPF map instance
+ * @param size the new value size
  * @return 0, on success; negative error, otherwise
  *
  * There is a special case for maps with associated memory-mapped regions, like
@@ -1135,13 +1241,14 @@ LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map);
  * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
  * @param value pointer to memory in which looked up value will be stored
  * @param value_sz size in byte of value data memory; it has to match BPF map
- * definition's **value_size**. For per-CPU BPF maps value size has to be
- * a product of BPF map value size and number of possible CPUs in the system
- * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for
- * per-CPU values value size has to be aligned up to closest 8 bytes for
- * alignment reasons, so expected size is: `round_up(value_size, 8)
- * * libbpf_num_possible_cpus()`.
- * @flags extra flags passed to kernel for this operation
+ * definition's **value_size**. For per-CPU BPF maps, value size can be
+ * `value_size` if either **BPF_F_CPU** or **BPF_F_ALL_CPUS** is specified
+ * in **flags**, otherwise a product of BPF map value size and number of
+ * possible CPUs in the system (could be fetched with
+ * **libbpf_num_possible_cpus()**). Note also that for per-CPU values value
+ * size has to be aligned up to closest 8 bytes, so expected size is:
+ * `round_up(value_size, 8) * libbpf_num_possible_cpus()`.
+ * @param flags extra flags passed to kernel for this operation
  * @return 0, on success; negative error, otherwise
  *
  * **bpf_map__lookup_elem()** is high-level equivalent of
@@ -1158,14 +1265,8 @@ LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map,
  * @param key pointer to memory containing bytes of the key
  * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
  * @param value pointer to memory containing bytes of the value
- * @param value_sz size in byte of value data memory; it has to match BPF map
- * definition's **value_size**. For per-CPU BPF maps value size has to be
- * a product of BPF map value size and number of possible CPUs in the system
- * (could be fetched with **libbpf_num_possible_cpus()**). Note also that for
- * per-CPU values value size has to be aligned up to closest 8 bytes for
- * alignment reasons, so expected size is: `round_up(value_size, 8)
- * * libbpf_num_possible_cpus()`.
- * @flags extra flags passed to kernel for this operation
+ * @param value_sz refer to **bpf_map__lookup_elem**'s description.'
+ * @param flags extra flags passed to kernel for this operation
  * @return 0, on success; negative error, otherwise
  *
  * **bpf_map__update_elem()** is high-level equivalent of
@@ -1181,7 +1282,7 @@ LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map,
  * @param map BPF map to delete element from
  * @param key pointer to memory containing bytes of the key
  * @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
  * @return 0, on success; negative error, otherwise
  *
  * **bpf_map__delete_elem()** is high-level equivalent of
@@ -1204,7 +1305,7 @@ LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map,
  * per-CPU values value size has to be aligned up to closest 8 bytes for
  * alignment reasons, so expected size is: `round_up(value_size, 8)
  * * libbpf_num_possible_cpus()`.
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
  * @return 0, on success; negative error, otherwise
  *
  * **bpf_map__lookup_and_delete_elem()** is high-level equivalent of
@@ -1230,6 +1331,28 @@ LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
  */
 LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map,
 				     const void *cur_key, void *next_key, size_t key_sz);
+/**
+ * @brief **bpf_map__set_exclusive_program()** sets a map to be exclusive to the
+ * specified program. This must be called *before* the map is created.
+ *
+ * @param map BPF map to make exclusive.
+ * @param prog BPF program to be the exclusive user of the map. Must belong
+ * to the same bpf_object as the map.
+ * @return 0 on success; a negative error code otherwise.
+ *
+ * This function must be called after the BPF object is opened but before
+ * it is loaded. Once the object is loaded, only the specified program
+ * will be able to access the map's contents.
+ */
+LIBBPF_API int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog);
+
+/**
+ * @brief **bpf_map__exclusive_program()** returns the exclusive program
+ * that is registered with the map (if any).
+ * @param map BPF map to which the exclusive program is registered.
+ * @return the registered exclusive program.
+ */
+LIBBPF_API struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map);
 
 struct bpf_xdp_set_link_opts {
 	size_t sz;
@@ -1270,6 +1393,7 @@ enum bpf_tc_attach_point {
 	BPF_TC_INGRESS = 1 << 0,
 	BPF_TC_EGRESS  = 1 << 1,
 	BPF_TC_CUSTOM  = 1 << 2,
+	BPF_TC_QDISC   = 1 << 3,
 };
 
 #define BPF_TC_PARENT(a, b) 	\
@@ -1284,9 +1408,11 @@ struct bpf_tc_hook {
 	int ifindex;
 	enum bpf_tc_attach_point attach_point;
 	__u32 parent;
+	__u32 handle;
+	const char *qdisc;
 	size_t :0;
 };
-#define bpf_tc_hook__last_field parent
+#define bpf_tc_hook__last_field qdisc
 
 struct bpf_tc_opts {
 	size_t sz;
@@ -1551,6 +1677,7 @@ struct perf_buffer_opts {
  * @param sample_cb function called on each received data record
  * @param lost_cb function called when record loss has occurred
  * @param ctx user-provided extra context passed into *sample_cb* and *lost_cb*
+ * @param opts optional parameters for the perf buffer, can be null
  * @return a new instance of struct perf_buffer on success, NULL on error with
  * *errno* containing an error code
  */
@@ -1771,9 +1898,10 @@ struct gen_loader_opts {
 	const char *insns;
 	__u32 data_sz;
 	__u32 insns_sz;
+	bool gen_hash;
 };
 
-#define gen_loader_opts__last_field insns_sz
+#define gen_loader_opts__last_field gen_hash
 LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj,
 				      struct gen_loader_opts *opts);
 
@@ -1918,6 +2046,23 @@ LIBBPF_API int libbpf_register_prog_handler(const char *sec,
  */
 LIBBPF_API int libbpf_unregister_prog_handler(int handler_id);
 
+/**
+ * @brief **bpf_program__clone()** loads a single BPF program from a prepared
+ * BPF object into the kernel, returning its file descriptor.
+ *
+ * The BPF object must have been previously prepared with
+ * **bpf_object__prepare()**. If @opts is provided, any non-zero field
+ * overrides the defaults derived from the program/object internals.
+ * If @opts is NULL, all fields are populated automatically.
+ *
+ * The returned FD is owned by the caller and must be closed with close().
+ *
+ * @param prog BPF program from a prepared object
+ * @param opts Optional load options; non-zero fields override defaults
+ * @return program FD (>= 0) on success; negative error code on failure
+ */
+LIBBPF_API int bpf_program__clone(struct bpf_program *prog, const struct bpf_prog_load_opts *opts);
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index a8b2936a1646..dfed8d60af05 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -436,4 +436,28 @@ LIBBPF_1.6.0 {
 		bpf_linker__add_buf;
 		bpf_linker__add_fd;
 		bpf_linker__new_fd;
+		bpf_object__prepare;
+		bpf_prog_stream_read;
+		bpf_program__attach_cgroup_opts;
+		bpf_program__func_info;
+		bpf_program__func_info_cnt;
+		bpf_program__line_info;
+		bpf_program__line_info_cnt;
+		btf__add_decl_attr;
+		btf__add_type_attr;
 } LIBBPF_1.5.0;
+
+LIBBPF_1.7.0 {
+	global:
+		bpf_map__set_exclusive_program;
+		bpf_map__exclusive_program;
+		bpf_prog_assoc_struct_ops;
+		bpf_program__assoc_struct_ops;
+		btf__permute;
+} LIBBPF_1.6.0;
+
+LIBBPF_1.8.0 {
+	global:
+		bpf_program__clone;
+		btf__new_empty_opts;
+} LIBBPF_1.7.0;
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
deleted file mode 100644
index 6b180172ec6b..000000000000
--- a/tools/lib/bpf/libbpf_errno.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-/*
- * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
- * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
- * Copyright (C) 2015 Huawei Inc.
- * Copyright (C) 2017 Nicira, Inc.
- */
-
-#undef _GNU_SOURCE
-#include <stdio.h>
-#include <string.h>
-
-#include "libbpf.h"
-#include "libbpf_internal.h"
-
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
-#define ERRNO_OFFSET(e)		((e) - __LIBBPF_ERRNO__START)
-#define ERRCODE_OFFSET(c)	ERRNO_OFFSET(LIBBPF_ERRNO__##c)
-#define NR_ERRNO	(__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
-
-static const char *libbpf_strerror_table[NR_ERRNO] = {
-	[ERRCODE_OFFSET(LIBELF)]	= "Something wrong in libelf",
-	[ERRCODE_OFFSET(FORMAT)]	= "BPF object format invalid",
-	[ERRCODE_OFFSET(KVERSION)]	= "'version' section incorrect or lost",
-	[ERRCODE_OFFSET(ENDIAN)]	= "Endian mismatch",
-	[ERRCODE_OFFSET(INTERNAL)]	= "Internal error in libbpf",
-	[ERRCODE_OFFSET(RELOC)]		= "Relocation failed",
-	[ERRCODE_OFFSET(VERIFY)]	= "Kernel verifier blocks program loading",
-	[ERRCODE_OFFSET(PROG2BIG)]	= "Program too big",
-	[ERRCODE_OFFSET(KVER)]		= "Incorrect kernel version",
-	[ERRCODE_OFFSET(PROGTYPE)]	= "Kernel doesn't support this program type",
-	[ERRCODE_OFFSET(WRNGPID)]	= "Wrong pid in netlink message",
-	[ERRCODE_OFFSET(INVSEQ)]	= "Invalid netlink sequence",
-	[ERRCODE_OFFSET(NLPARSE)]	= "Incorrect netlink message parsing",
-};
-
-int libbpf_strerror(int err, char *buf, size_t size)
-{
-	int ret;
-
-	if (!buf || !size)
-		return libbpf_err(-EINVAL);
-
-	err = err > 0 ? err : -err;
-
-	if (err < __LIBBPF_ERRNO__START) {
-		ret = strerror_r(err, buf, size);
-		buf[size - 1] = '\0';
-		return libbpf_err_errno(ret);
-	}
-
-	if (err < __LIBBPF_ERRNO__END) {
-		const char *msg;
-
-		msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
-		ret = snprintf(buf, size, "%s", msg);
-		buf[size - 1] = '\0';
-		/* The length of the buf and msg is positive.
-		 * A negative number may be returned only when the
-		 * size exceeds INT_MAX. Not likely to appear.
-		 */
-		if (ret >= size)
-			return libbpf_err(-ERANGE);
-		return 0;
-	}
-
-	ret = snprintf(buf, size, "Unknown libbpf error %d", err);
-	buf[size - 1] = '\0';
-	if (ret >= size)
-		return libbpf_err(-ERANGE);
-	return libbpf_err(-ENOENT);
-}
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index de498e2dd6b0..3781c45b46d3 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -74,6 +74,8 @@
 #define ELF64_ST_VISIBILITY(o) ((o) & 0x03)
 #endif
 
+#define JUMPTABLES_SEC ".jumptables"
+
 #define BTF_INFO_ENC(kind, kind_flag, vlen) \
 	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
@@ -172,6 +174,16 @@ do {				\
 #define pr_info(fmt, ...)	__pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
 #define pr_debug(fmt, ...)	__pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
 
+/**
+ * @brief **libbpf_errstr()** returns string corresponding to numeric errno
+ * @param err negative numeric errno
+ * @return pointer to string representation of the errno, that is invalidated
+ * upon the next call.
+ */
+const char *libbpf_errstr(int err);
+
+#define errstr(err) libbpf_errstr(err)
+
 #ifndef __has_builtin
 #define __has_builtin(x) 0
 #endif
@@ -380,6 +392,12 @@ enum kern_feature_id {
 	FEAT_ARG_CTX_TAG,
 	/* Kernel supports '?' at the front of datasec names */
 	FEAT_BTF_QMARK_DATASEC,
+	/* Kernel supports LDIMM64 imm offsets past 512 MiB. */
+	FEAT_LDIMM64_FULL_RANGE_OFF,
+	/* Kernel supports uprobe syscall */
+	FEAT_UPROBE_SYSCALL,
+	/* Kernel supports BTF layout information */
+	FEAT_BTF_LAYOUT,
 	__FEAT_CNT,
 };
 
@@ -396,6 +414,7 @@ struct kern_feature_cache {
 
 bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id);
 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
+void bpf_object_set_feat_cache(struct bpf_object *obj, struct kern_feature_cache *cache);
 
 int probe_kern_syscall_wrapper(int token_fd);
 int probe_memcg_account(int token_fd);
@@ -406,9 +425,14 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
 int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 			 const char *str_sec, size_t str_len,
 			 int token_fd);
+int libbpf__load_raw_btf_hdr(const struct btf_header *hdr,
+			     const char *raw_types, const char *str_sec,
+			     const char *layout_sec, int token_fd);
+struct btf *bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *orig_btf);
 int btf_load_into_kernel(struct btf *btf,
 			 char *log_buf, size_t log_sz, __u32 log_level,
 			 int token_fd);
+struct btf *btf_load_from_kernel(__u32 id, struct btf *base_btf, int token_fd);
 
 struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
@@ -666,6 +690,15 @@ static inline int sys_dup3(int oldfd, int newfd, int flags)
 	return syscall(__NR_dup3, oldfd, newfd, flags);
 }
 
+/* Some versions of Android don't provide memfd_create() in their libc
+ * implementation, so avoid complications and just go straight to Linux
+ * syscall.
+ */
+static inline int sys_memfd_create(const char *name, unsigned flags)
+{
+	return syscall(__NR_memfd_create, name, flags);
+}
+
 /* Point *fixed_fd* to the same file that *tmp_fd* points to.
  * Regardless of success, *tmp_fd* is closed.
  * Whatever *fixed_fd* pointed to is closed silently.
@@ -702,6 +735,11 @@ static inline bool is_pow_of_2(size_t x)
 	return x && (x & (x - 1)) == 0;
 }
 
+static inline __u32 ror32(__u32 v, int bits)
+{
+	return (v >> bits) | (v << (32 - bits));
+}
+
 #define PROG_LOAD_ATTEMPTS 5
 int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts);
 
@@ -726,4 +764,8 @@ int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
 
 int probe_fd(int fd);
 
+#define SHA256_DIGEST_LENGTH 32
+#define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64)
+
+void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]);
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 9dfbe7750f56..b70d9637ecf5 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -218,18 +218,10 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts)
 	return libbpf_err(ret);
 }
 
-int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
-			 const char *str_sec, size_t str_len,
-			 int token_fd)
+int libbpf__load_raw_btf_hdr(const struct btf_header *hdr, const char *raw_types,
+			     const char *str_sec, const char *layout_sec,
+			     int token_fd)
 {
-	struct btf_header hdr = {
-		.magic = BTF_MAGIC,
-		.version = BTF_VERSION,
-		.hdr_len = sizeof(struct btf_header),
-		.type_len = types_len,
-		.str_off = types_len,
-		.str_len = str_len,
-	};
 	LIBBPF_OPTS(bpf_btf_load_opts, opts,
 		.token_fd = token_fd,
 		.btf_flags = token_fd ? BPF_F_TOKEN_FD : 0,
@@ -237,14 +229,16 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 	int btf_fd, btf_len;
 	__u8 *raw_btf;
 
-	btf_len = hdr.hdr_len + hdr.type_len + hdr.str_len;
+	btf_len = hdr->hdr_len + hdr->type_off + hdr->type_len + hdr->str_len + hdr->layout_len;
 	raw_btf = malloc(btf_len);
 	if (!raw_btf)
 		return -ENOMEM;
 
-	memcpy(raw_btf, &hdr, sizeof(hdr));
-	memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
-	memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
+	memcpy(raw_btf, hdr, sizeof(*hdr));
+	memcpy(raw_btf + hdr->hdr_len + hdr->type_off, raw_types, hdr->type_len);
+	memcpy(raw_btf + hdr->hdr_len + hdr->str_off, str_sec, hdr->str_len);
+	if (layout_sec)
+		memcpy(raw_btf + hdr->hdr_len + hdr->layout_off, layout_sec, hdr->layout_len);
 
 	btf_fd = bpf_btf_load(raw_btf, btf_len, &opts);
 
@@ -252,6 +246,22 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
 	return btf_fd;
 }
 
+int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
+			 const char *str_sec, size_t str_len,
+			 int token_fd)
+{
+	struct btf_header hdr = {
+		.magic = BTF_MAGIC,
+		.version = BTF_VERSION,
+		.hdr_len = sizeof(struct btf_header),
+		.type_len = types_len,
+		.str_off = types_len,
+		.str_len = str_len,
+	};
+
+	return libbpf__load_raw_btf_hdr(&hdr, raw_types, str_sec, NULL, token_fd);
+}
+
 static int load_local_storage_btf(void)
 {
 	const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l";
@@ -364,6 +374,10 @@ static int probe_map_create(enum bpf_map_type map_type)
 	case BPF_MAP_TYPE_SOCKHASH:
 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
 		break;
+	case BPF_MAP_TYPE_INSN_ARRAY:
+		key_size	= sizeof(__u32);
+		value_size	= sizeof(struct bpf_insn_array_value);
+		break;
 	case BPF_MAP_TYPE_UNSPEC:
 	default:
 		return -EOPNOTSUPP;
diff --git a/tools/lib/bpf/libbpf_utils.c b/tools/lib/bpf/libbpf_utils.c
new file mode 100644
index 000000000000..ac3beae54cf6
--- /dev/null
+++ b/tools/lib/bpf/libbpf_utils.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ * Copyright (C) 2017 Nicira, Inc.
+ */
+
+#undef _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+#ifndef ENOTSUPP
+#define ENOTSUPP	524
+#endif
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+#define ERRNO_OFFSET(e)		((e) - __LIBBPF_ERRNO__START)
+#define ERRCODE_OFFSET(c)	ERRNO_OFFSET(LIBBPF_ERRNO__##c)
+#define NR_ERRNO	(__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
+
+static const char *libbpf_strerror_table[NR_ERRNO] = {
+	[ERRCODE_OFFSET(LIBELF)]	= "Something wrong in libelf",
+	[ERRCODE_OFFSET(FORMAT)]	= "BPF object format invalid",
+	[ERRCODE_OFFSET(KVERSION)]	= "'version' section incorrect or lost",
+	[ERRCODE_OFFSET(ENDIAN)]	= "Endian mismatch",
+	[ERRCODE_OFFSET(INTERNAL)]	= "Internal error in libbpf",
+	[ERRCODE_OFFSET(RELOC)]		= "Relocation failed",
+	[ERRCODE_OFFSET(VERIFY)]	= "Kernel verifier blocks program loading",
+	[ERRCODE_OFFSET(PROG2BIG)]	= "Program too big",
+	[ERRCODE_OFFSET(KVER)]		= "Incorrect kernel version",
+	[ERRCODE_OFFSET(PROGTYPE)]	= "Kernel doesn't support this program type",
+	[ERRCODE_OFFSET(WRNGPID)]	= "Wrong pid in netlink message",
+	[ERRCODE_OFFSET(INVSEQ)]	= "Invalid netlink sequence",
+	[ERRCODE_OFFSET(NLPARSE)]	= "Incorrect netlink message parsing",
+};
+
+int libbpf_strerror(int err, char *buf, size_t size)
+{
+	int ret;
+
+	if (!buf || !size)
+		return libbpf_err(-EINVAL);
+
+	err = err > 0 ? err : -err;
+
+	if (err < __LIBBPF_ERRNO__START) {
+		ret = strerror_r(err, buf, size);
+		buf[size - 1] = '\0';
+		return libbpf_err_errno(ret);
+	}
+
+	if (err < __LIBBPF_ERRNO__END) {
+		const char *msg;
+
+		msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
+		ret = snprintf(buf, size, "%s", msg);
+		buf[size - 1] = '\0';
+		/* The length of the buf and msg is positive.
+		 * A negative number may be returned only when the
+		 * size exceeds INT_MAX. Not likely to appear.
+		 */
+		if (ret >= size)
+			return libbpf_err(-ERANGE);
+		return 0;
+	}
+
+	ret = snprintf(buf, size, "Unknown libbpf error %d", err);
+	buf[size - 1] = '\0';
+	if (ret >= size)
+		return libbpf_err(-ERANGE);
+	return libbpf_err(-ENOENT);
+}
+
+const char *libbpf_errstr(int err)
+{
+	static __thread char buf[12];
+
+	if (err > 0)
+		err = -err;
+
+	switch (err) {
+	case -E2BIG:		return "-E2BIG";
+	case -EACCES:		return "-EACCES";
+	case -EADDRINUSE:	return "-EADDRINUSE";
+	case -EADDRNOTAVAIL:	return "-EADDRNOTAVAIL";
+	case -EAGAIN:		return "-EAGAIN";
+	case -EALREADY:		return "-EALREADY";
+	case -EBADF:		return "-EBADF";
+	case -EBADFD:		return "-EBADFD";
+	case -EBUSY:		return "-EBUSY";
+	case -ECANCELED:	return "-ECANCELED";
+	case -ECHILD:		return "-ECHILD";
+	case -EDEADLK:		return "-EDEADLK";
+	case -EDOM:		return "-EDOM";
+	case -EEXIST:		return "-EEXIST";
+	case -EFAULT:		return "-EFAULT";
+	case -EFBIG:		return "-EFBIG";
+	case -EILSEQ:		return "-EILSEQ";
+	case -EINPROGRESS:	return "-EINPROGRESS";
+	case -EINTR:		return "-EINTR";
+	case -EINVAL:		return "-EINVAL";
+	case -EIO:		return "-EIO";
+	case -EISDIR:		return "-EISDIR";
+	case -ELOOP:		return "-ELOOP";
+	case -EMFILE:		return "-EMFILE";
+	case -EMLINK:		return "-EMLINK";
+	case -EMSGSIZE:		return "-EMSGSIZE";
+	case -ENAMETOOLONG:	return "-ENAMETOOLONG";
+	case -ENFILE:		return "-ENFILE";
+	case -ENODATA:		return "-ENODATA";
+	case -ENODEV:		return "-ENODEV";
+	case -ENOENT:		return "-ENOENT";
+	case -ENOEXEC:		return "-ENOEXEC";
+	case -ENOLINK:		return "-ENOLINK";
+	case -ENOMEM:		return "-ENOMEM";
+	case -ENOSPC:		return "-ENOSPC";
+	case -ENOTBLK:		return "-ENOTBLK";
+	case -ENOTDIR:		return "-ENOTDIR";
+	case -ENOTSUPP:		return "-ENOTSUPP";
+	case -ENOTTY:		return "-ENOTTY";
+	case -ENXIO:		return "-ENXIO";
+	case -EOPNOTSUPP:	return "-EOPNOTSUPP";
+	case -EOVERFLOW:	return "-EOVERFLOW";
+	case -EPERM:		return "-EPERM";
+	case -EPIPE:		return "-EPIPE";
+	case -EPROTO:		return "-EPROTO";
+	case -EPROTONOSUPPORT:	return "-EPROTONOSUPPORT";
+	case -ERANGE:		return "-ERANGE";
+	case -EROFS:		return "-EROFS";
+	case -ESPIPE:		return "-ESPIPE";
+	case -ESRCH:		return "-ESRCH";
+	case -ETXTBSY:		return "-ETXTBSY";
+	case -EUCLEAN:		return "-EUCLEAN";
+	case -EXDEV:		return "-EXDEV";
+	default:
+		snprintf(buf, sizeof(buf), "%d", err);
+		return buf;
+	}
+}
+
+static inline __u32 get_unaligned_be32(const void *p)
+{
+	__be32 val;
+
+	memcpy(&val, p, sizeof(val));
+	return be32_to_cpu(val);
+}
+
+static inline void put_unaligned_be32(__u32 val, void *p)
+{
+	__be32 be_val = cpu_to_be32(val);
+
+	memcpy(p, &be_val, sizeof(be_val));
+}
+
+#define SHA256_BLOCK_LENGTH 64
+#define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z)))
+#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+#define Sigma_0(x) (ror32((x), 2) ^ ror32((x), 13) ^ ror32((x), 22))
+#define Sigma_1(x) (ror32((x), 6) ^ ror32((x), 11) ^ ror32((x), 25))
+#define sigma_0(x) (ror32((x), 7) ^ ror32((x), 18) ^ ((x) >> 3))
+#define sigma_1(x) (ror32((x), 17) ^ ror32((x), 19) ^ ((x) >> 10))
+
+static const __u32 sha256_K[64] = {
+	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1,
+	0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+	0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786,
+	0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
+	0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+	0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
+	0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a,
+	0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+	0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+};
+
+#define SHA256_ROUND(i, a, b, c, d, e, f, g, h)                                \
+	{                                                                      \
+		__u32 tmp = h + Sigma_1(e) + Ch(e, f, g) + sha256_K[i] + w[i]; \
+		d += tmp;                                                      \
+		h = tmp + Sigma_0(a) + Maj(a, b, c);                           \
+	}
+
+static void sha256_blocks(__u32 state[8], const __u8 *data, size_t nblocks)
+{
+	while (nblocks--) {
+		__u32 a = state[0];
+		__u32 b = state[1];
+		__u32 c = state[2];
+		__u32 d = state[3];
+		__u32 e = state[4];
+		__u32 f = state[5];
+		__u32 g = state[6];
+		__u32 h = state[7];
+		__u32 w[64];
+		int i;
+
+		for (i = 0; i < 16; i++)
+			w[i] = get_unaligned_be32(&data[4 * i]);
+		for (; i < ARRAY_SIZE(w); i++)
+			w[i] = sigma_1(w[i - 2]) + w[i - 7] +
+			       sigma_0(w[i - 15]) + w[i - 16];
+		for (i = 0; i < ARRAY_SIZE(w); i += 8) {
+			SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h);
+			SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g);
+			SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f);
+			SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e);
+			SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d);
+			SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c);
+			SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b);
+			SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a);
+		}
+		state[0] += a;
+		state[1] += b;
+		state[2] += c;
+		state[3] += d;
+		state[4] += e;
+		state[5] += f;
+		state[6] += g;
+		state[7] += h;
+		data += SHA256_BLOCK_LENGTH;
+	}
+}
+
+void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH])
+{
+	__u32 state[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+			   0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
+	const __be64 bitcount = cpu_to_be64((__u64)len * 8);
+	__u8 final_data[2 * SHA256_BLOCK_LENGTH] = { 0 };
+	size_t final_len = len % SHA256_BLOCK_LENGTH;
+	int i;
+
+	sha256_blocks(state, data, len / SHA256_BLOCK_LENGTH);
+
+	memcpy(final_data, data + len - final_len, final_len);
+	final_data[final_len] = 0x80;
+	final_len = roundup(final_len + 9, SHA256_BLOCK_LENGTH);
+	memcpy(&final_data[final_len - 8], &bitcount, 8);
+
+	sha256_blocks(state, final_data, final_len / SHA256_BLOCK_LENGTH);
+
+	for (i = 0; i < ARRAY_SIZE(state); i++)
+		put_unaligned_be32(state[i], &out[4 * i]);
+}
diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h
index 28c58fb17250..c446c0cd8cf9 100644
--- a/tools/lib/bpf/libbpf_version.h
+++ b/tools/lib/bpf/libbpf_version.h
@@ -4,6 +4,6 @@
 #define __LIBBPF_VERSION_H
 
 #define LIBBPF_MAJOR_VERSION 1
-#define LIBBPF_MINOR_VERSION 6
+#define LIBBPF_MINOR_VERSION 8
 
 #endif /* __LIBBPF_VERSION_H */
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index b52f71c59616..78f92c39290a 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -25,7 +25,6 @@
 #include "btf.h"
 #include "libbpf_internal.h"
 #include "strset.h"
-#include "str_error.h"
 
 #define BTF_EXTERN_SEC ".extern"
 
@@ -573,7 +572,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz,
 
 	snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz);
 
-	fd = memfd_create(filename, 0);
+	fd = sys_memfd_create(filename, 0);
 	if (fd < 0) {
 		ret = -errno;
 		pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret));
@@ -582,7 +581,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz,
 
 	written = 0;
 	while (written < buf_sz) {
-		ret = write(fd, buf, buf_sz);
+		ret = write(fd, buf + written, buf_sz - written);
 		if (ret < 0) {
 			ret = -errno;
 			pr_warn("failed to write '%s': %s\n", filename, errstr(ret));
@@ -1376,7 +1375,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj
 		} else {
 			if (!secs_match(dst_sec, src_sec)) {
 				pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name);
-				return -1;
+				return -EINVAL;
 			}
 
 			/* "license" and "version" sections are deduped */
@@ -2026,6 +2025,9 @@ static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
 			obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx;
 			return 0;
 		}
+
+		if (strcmp(src_sec->sec_name, JUMPTABLES_SEC) == 0)
+			goto add_sym;
 	}
 
 	if (sym_bind == STB_LOCAL)
@@ -2163,7 +2165,7 @@ add_sym:
 
 	obj->sym_map[src_sym_idx] = dst_sym_idx;
 
-	if (sym_type == STT_SECTION && dst_sym) {
+	if (sym_type == STT_SECTION && dst_sec) {
 		dst_sec->sec_sym_idx = dst_sym_idx;
 		dst_sym->st_value = 0;
 	}
@@ -2223,7 +2225,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob
 			}
 		} else if (!secs_match(dst_sec, src_sec)) {
 			pr_warn("sections %s are not compatible\n", src_sec->sec_name);
-			return -1;
+			return -EINVAL;
 		}
 
 		/* shdr->sh_link points to SYMTAB */
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 68a2def17175..c9a78fb16f11 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -143,7 +143,7 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
 	struct nlmsghdr *nh;
 	int len, ret;
 
-	ret = alloc_iov(&iov, 4096);
+	ret = alloc_iov(&iov, 8192);
 	if (ret)
 		goto done;
 
@@ -212,6 +212,8 @@ start:
 				}
 			}
 		}
+		if (len)
+			pr_warn("Invalid message or trailing data in Netlink response: %d bytes left\n", len);
 	}
 	ret = 0;
 done:
@@ -529,9 +531,9 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
 }
 
 
-typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
+typedef int (*qdisc_config_t)(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook);
 
-static int clsact_config(struct libbpf_nla_req *req)
+static int clsact_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook)
 {
 	req->tc.tcm_parent = TC_H_CLSACT;
 	req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
@@ -539,6 +541,16 @@ static int clsact_config(struct libbpf_nla_req *req)
 	return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));
 }
 
+static int qdisc_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook)
+{
+	const char *qdisc = OPTS_GET(hook, qdisc, NULL);
+
+	req->tc.tcm_parent = OPTS_GET(hook, parent, TC_H_ROOT);
+	req->tc.tcm_handle = OPTS_GET(hook, handle, 0);
+
+	return nlattr_add(req, TCA_KIND, qdisc, strlen(qdisc) + 1);
+}
+
 static int attach_point_to_config(struct bpf_tc_hook *hook,
 				  qdisc_config_t *config)
 {
@@ -552,6 +564,9 @@ static int attach_point_to_config(struct bpf_tc_hook *hook,
 		return 0;
 	case BPF_TC_CUSTOM:
 		return -EOPNOTSUPP;
+	case BPF_TC_QDISC:
+		*config = &qdisc_config;
+		return 0;
 	default:
 		return -EINVAL;
 	}
@@ -596,7 +611,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
 	req.tc.tcm_family  = AF_UNSPEC;
 	req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
 
-	ret = config(&req);
+	ret = config(&req, hook);
 	if (ret < 0)
 		return ret;
 
@@ -639,6 +654,7 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
 	case BPF_TC_INGRESS:
 	case BPF_TC_EGRESS:
 		return libbpf_err(__bpf_tc_detach(hook, NULL, true));
+	case BPF_TC_QDISC:
 	case BPF_TC_INGRESS | BPF_TC_EGRESS:
 		return libbpf_err(tc_qdisc_delete(hook));
 	case BPF_TC_CUSTOM:
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index 975e265eab3b..06663f9ea581 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -63,16 +63,16 @@ static int validate_nla(struct nlattr *nla, int maxtype,
 		minlen = nla_attr_minlen[pt->type];
 
 	if (libbpf_nla_len(nla) < minlen)
-		return -1;
+		return -EINVAL;
 
 	if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen)
-		return -1;
+		return -EINVAL;
 
 	if (pt->type == LIBBPF_NLA_STRING) {
 		char *data = libbpf_nla_data(nla);
 
 		if (data[libbpf_nla_len(nla) - 1] != '\0')
-			return -1;
+			return -EINVAL;
 	}
 
 	return 0;
@@ -118,19 +118,18 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
 		if (policy) {
 			err = validate_nla(nla, maxtype, policy);
 			if (err < 0)
-				goto errout;
+				return err;
 		}
 
-		if (tb[type])
+		if (tb[type]) {
 			pr_warn("Attribute of type %#x found multiple times in message, "
 				"previous attribute is being ignored.\n", type);
+		}
 
 		tb[type] = nla;
 	}
 
-	err = 0;
-errout:
-	return err;
+	return 0;
 }
 
 /**
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index 7632e9d41827..0ccc8f548cba 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -64,7 +64,6 @@ enum libbpf_print_level {
 #include "libbpf.h"
 #include "bpf.h"
 #include "btf.h"
-#include "str_error.h"
 #include "libbpf_internal.h"
 #endif
 
@@ -293,6 +292,8 @@ int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,
 			++spec_str;
 		if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
 			return -EINVAL;
+		if (access_idx < 0)
+			return -EINVAL;
 		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
 			return -E2BIG;
 		spec_str += parsed_len;
@@ -683,7 +684,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,
 {
 	const struct bpf_core_accessor *acc;
 	const struct btf_type *t;
-	__u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
+	__u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id, elem_id;
 	const struct btf_member *m;
 	const struct btf_type *mt;
 	bool bitfield;
@@ -706,8 +707,14 @@ static int bpf_core_calc_field_relo(const char *prog_name,
 	if (!acc->name) {
 		if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) {
 			*val = spec->bit_offset / 8;
-			/* remember field size for load/store mem size */
-			sz = btf__resolve_size(spec->btf, acc->type_id);
+			/* remember field size for load/store mem size;
+			 * note, for arrays we care about individual element
+			 * sizes, not the overall array size
+			 */
+			t = skip_mods_and_typedefs(spec->btf, acc->type_id, &elem_id);
+			while (btf_is_array(t))
+				t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id);
+			sz = btf__resolve_size(spec->btf, elem_id);
 			if (sz < 0)
 				return -EINVAL;
 			*field_sz = sz;
@@ -767,7 +774,17 @@ static int bpf_core_calc_field_relo(const char *prog_name,
 	case BPF_CORE_FIELD_BYTE_OFFSET:
 		*val = byte_off;
 		if (!bitfield) {
-			*field_sz = byte_sz;
+			/* remember field size for load/store mem size;
+			 * note, for arrays we care about individual element
+			 * sizes, not the overall array size
+			 */
+			t = skip_mods_and_typedefs(spec->btf, field_type_id, &elem_id);
+			while (btf_is_array(t))
+				t = skip_mods_and_typedefs(spec->btf, btf_array(t)->type, &elem_id);
+			sz = btf__resolve_size(spec->btf, elem_id);
+			if (sz < 0)
+				return -EINVAL;
+			*field_sz = sz;
 			*type_id = field_type_id;
 		}
 		break;
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 9702b70da444..00ec4837a06d 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -21,7 +21,6 @@
 #include "libbpf.h"
 #include "libbpf_internal.h"
 #include "bpf.h"
-#include "str_error.h"
 
 struct ring {
 	ring_buffer_sample_fn sample_cb;
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index 4d5fa079b5d6..6a8f5c7a02eb 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -13,10 +13,15 @@
 #include <unistd.h>
 #include <sys/syscall.h>
 #include <sys/mman.h>
+#include <linux/keyctl.h>
 #include <stdlib.h>
 #include "bpf.h"
 #endif
 
+#ifndef SHA256_DIGEST_LENGTH
+#define SHA256_DIGEST_LENGTH 32
+#endif
+
 #ifndef __NR_bpf
 # if defined(__mips__) && defined(_ABIO32)
 #  define __NR_bpf 4355
@@ -64,6 +69,11 @@ struct bpf_load_and_run_opts {
 	__u32 data_sz;
 	__u32 insns_sz;
 	const char *errstr;
+	void *signature;
+	__u32 signature_sz;
+	__s32 keyring_id;
+	void *excl_prog_hash;
+	__u32 excl_prog_hash_sz;
 };
 
 long kern_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
@@ -220,14 +230,19 @@ static inline int skel_map_create(enum bpf_map_type map_type,
 				  const char *map_name,
 				  __u32 key_size,
 				  __u32 value_size,
-				  __u32 max_entries)
+				  __u32 max_entries,
+				  const void *excl_prog_hash,
+				  __u32 excl_prog_hash_sz)
 {
-	const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+	const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
 	union bpf_attr attr;
 
 	memset(&attr, 0, attr_sz);
 
 	attr.map_type = map_type;
+	attr.excl_prog_hash = (unsigned long) excl_prog_hash;
+	attr.excl_prog_hash_size = excl_prog_hash_sz;
+
 	strncpy(attr.map_name, map_name, sizeof(attr.map_name));
 	attr.key_size = key_size;
 	attr.value_size = value_size;
@@ -300,6 +315,35 @@ static inline int skel_link_create(int prog_fd, int target_fd,
 	return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz);
 }
 
+static inline int skel_obj_get_info_by_fd(int fd)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, info);
+	__u8 sha[SHA256_DIGEST_LENGTH];
+	struct bpf_map_info info;
+	__u32 info_len = sizeof(info);
+	union bpf_attr attr;
+
+	memset(&info, 0, sizeof(info));
+	info.hash = (long) &sha;
+	info.hash_size = SHA256_DIGEST_LENGTH;
+
+	memset(&attr, 0, attr_sz);
+	attr.info.bpf_fd = fd;
+	attr.info.info = (long) &info;
+	attr.info.info_len = info_len;
+	return skel_sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz);
+}
+
+static inline int skel_map_freeze(int fd)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, map_fd);
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_sz);
+	attr.map_fd = fd;
+
+	return skel_sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz);
+}
 #ifdef __KERNEL__
 #define set_err
 #else
@@ -308,12 +352,13 @@ static inline int skel_link_create(int prog_fd, int target_fd,
 
 static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 {
-	const size_t prog_load_attr_sz = offsetofend(union bpf_attr, fd_array);
+	const size_t prog_load_attr_sz = offsetofend(union bpf_attr, keyring_id);
 	const size_t test_run_attr_sz = offsetofend(union bpf_attr, test);
 	int map_fd = -1, prog_fd = -1, key = 0, err;
 	union bpf_attr attr;
 
-	err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
+	err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1,
+				       opts->excl_prog_hash, opts->excl_prog_hash_sz);
 	if (map_fd < 0) {
 		opts->errstr = "failed to create loader map";
 		set_err;
@@ -327,11 +372,34 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 		goto out;
 	}
 
+#ifndef __KERNEL__
+	err = skel_map_freeze(map_fd);
+	if (err < 0) {
+		opts->errstr = "failed to freeze map";
+		set_err;
+		goto out;
+	}
+	err = skel_obj_get_info_by_fd(map_fd);
+	if (err < 0) {
+		opts->errstr = "failed to fetch obj info";
+		set_err;
+		goto out;
+	}
+#endif
+
 	memset(&attr, 0, prog_load_attr_sz);
 	attr.prog_type = BPF_PROG_TYPE_SYSCALL;
 	attr.insns = (long) opts->insns;
 	attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn);
 	attr.license = (long) "Dual BSD/GPL";
+#ifndef __KERNEL__
+	attr.signature = (long) opts->signature;
+	attr.signature_size = opts->signature_sz;
+#else
+	if (opts->signature || opts->signature_sz)
+		pr_warn("signatures are not supported from bpf_preload\n");
+#endif
+	attr.keyring_id = opts->keyring_id;
 	memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog"));
 	attr.fd_array = (long) &map_fd;
 	attr.log_level = opts->ctx->log_level;
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
deleted file mode 100644
index 8743049e32b7..000000000000
--- a/tools/lib/bpf/str_error.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-#undef _GNU_SOURCE
-#include <string.h>
-#include <stdio.h>
-#include <errno.h>
-#include "str_error.h"
-
-#ifndef ENOTSUPP
-#define ENOTSUPP	524
-#endif
-
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
-/*
- * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl
- * libc, while checking strerror_r() return to avoid having to check this in
- * all places calling it.
- */
-char *libbpf_strerror_r(int err, char *dst, int len)
-{
-	int ret = strerror_r(err < 0 ? -err : err, dst, len);
-	/* on glibc <2.13, ret == -1 and errno is set, if strerror_r() can't
-	 * handle the error, on glibc >=2.13 *positive* (errno-like) error
-	 * code is returned directly
-	 */
-	if (ret == -1)
-		ret = errno;
-	if (ret) {
-		if (ret == EINVAL)
-			/* strerror_r() doesn't recognize this specific error */
-			snprintf(dst, len, "unknown error (%d)", err < 0 ? err : -err);
-		else
-			snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
-	}
-	return dst;
-}
-
-const char *errstr(int err)
-{
-	static __thread char buf[12];
-
-	if (err > 0)
-		err = -err;
-
-	switch (err) {
-	case -E2BIG:		return "-E2BIG";
-	case -EACCES:		return "-EACCES";
-	case -EADDRINUSE:	return "-EADDRINUSE";
-	case -EADDRNOTAVAIL:	return "-EADDRNOTAVAIL";
-	case -EAGAIN:		return "-EAGAIN";
-	case -EALREADY:		return "-EALREADY";
-	case -EBADF:		return "-EBADF";
-	case -EBADFD:		return "-EBADFD";
-	case -EBUSY:		return "-EBUSY";
-	case -ECANCELED:	return "-ECANCELED";
-	case -ECHILD:		return "-ECHILD";
-	case -EDEADLK:		return "-EDEADLK";
-	case -EDOM:		return "-EDOM";
-	case -EEXIST:		return "-EEXIST";
-	case -EFAULT:		return "-EFAULT";
-	case -EFBIG:		return "-EFBIG";
-	case -EILSEQ:		return "-EILSEQ";
-	case -EINPROGRESS:	return "-EINPROGRESS";
-	case -EINTR:		return "-EINTR";
-	case -EINVAL:		return "-EINVAL";
-	case -EIO:		return "-EIO";
-	case -EISDIR:		return "-EISDIR";
-	case -ELOOP:		return "-ELOOP";
-	case -EMFILE:		return "-EMFILE";
-	case -EMLINK:		return "-EMLINK";
-	case -EMSGSIZE:		return "-EMSGSIZE";
-	case -ENAMETOOLONG:	return "-ENAMETOOLONG";
-	case -ENFILE:		return "-ENFILE";
-	case -ENODATA:		return "-ENODATA";
-	case -ENODEV:		return "-ENODEV";
-	case -ENOENT:		return "-ENOENT";
-	case -ENOEXEC:		return "-ENOEXEC";
-	case -ENOLINK:		return "-ENOLINK";
-	case -ENOMEM:		return "-ENOMEM";
-	case -ENOSPC:		return "-ENOSPC";
-	case -ENOTBLK:		return "-ENOTBLK";
-	case -ENOTDIR:		return "-ENOTDIR";
-	case -ENOTSUPP:		return "-ENOTSUPP";
-	case -ENOTTY:		return "-ENOTTY";
-	case -ENXIO:		return "-ENXIO";
-	case -EOPNOTSUPP:	return "-EOPNOTSUPP";
-	case -EOVERFLOW:	return "-EOVERFLOW";
-	case -EPERM:		return "-EPERM";
-	case -EPIPE:		return "-EPIPE";
-	case -EPROTO:		return "-EPROTO";
-	case -EPROTONOSUPPORT:	return "-EPROTONOSUPPORT";
-	case -ERANGE:		return "-ERANGE";
-	case -EROFS:		return "-EROFS";
-	case -ESPIPE:		return "-ESPIPE";
-	case -ESRCH:		return "-ESRCH";
-	case -ETXTBSY:		return "-ETXTBSY";
-	case -EUCLEAN:		return "-EUCLEAN";
-	case -EXDEV:		return "-EXDEV";
-	default:
-		snprintf(buf, sizeof(buf), "%d", err);
-		return buf;
-	}
-}
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
deleted file mode 100644
index 66ffebde0684..000000000000
--- a/tools/lib/bpf/str_error.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __LIBBPF_STR_ERROR_H
-#define __LIBBPF_STR_ERROR_H
-
-#define STRERR_BUFSIZE  128
-
-char *libbpf_strerror_r(int err, char *dst, int len);
-
-/**
- * @brief **errstr()** returns string corresponding to numeric errno
- * @param err negative numeric errno
- * @return pointer to string representation of the errno, that is invalidated
- * upon the next call.
- */
-const char *errstr(int err);
-#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
index b811f754939f..43deb05a5197 100644
--- a/tools/lib/bpf/usdt.bpf.h
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -34,13 +34,32 @@ enum __bpf_usdt_arg_type {
 	BPF_USDT_ARG_CONST,
 	BPF_USDT_ARG_REG,
 	BPF_USDT_ARG_REG_DEREF,
+	BPF_USDT_ARG_SIB,
 };
 
+/*
+ * This struct layout is designed specifically to be backwards/forward
+ * compatible between libbpf versions for ARG_CONST, ARG_REG, and
+ * ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+.
+ */
 struct __bpf_usdt_arg_spec {
 	/* u64 scalar interpreted depending on arg_type, see below */
 	__u64 val_off;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	/* arg location case, see bpf_usdt_arg() for details */
-	enum __bpf_usdt_arg_type arg_type;
+	enum __bpf_usdt_arg_type arg_type: 8;
+	/* index register offset within struct pt_regs */
+	__u16 idx_reg_off: 12;
+	/* scale factor for index register (1, 2, 4, or 8) */
+	__u16 scale_bitshift: 4;
+	/* reserved for future use, keeps reg_off offset stable */
+	__u8 __reserved: 8;
+#else
+	__u8 __reserved: 8;
+	__u16 idx_reg_off: 12;
+	__u16 scale_bitshift: 4;
+	enum __bpf_usdt_arg_type arg_type: 8;
+#endif
 	/* offset of referenced register within struct pt_regs */
 	short reg_off;
 	/* whether arg should be interpreted as signed value */
@@ -108,6 +127,38 @@ int bpf_usdt_arg_cnt(struct pt_regs *ctx)
 	return spec->arg_cnt;
 }
 
+/* Returns the size in bytes of the #*arg_num* (zero-indexed) USDT argument.
+ * Returns negative error if argument is not found or arg_num is invalid.
+ */
+static __always_inline
+int bpf_usdt_arg_size(struct pt_regs *ctx, __u64 arg_num)
+{
+	struct __bpf_usdt_arg_spec *arg_spec;
+	struct __bpf_usdt_spec *spec;
+	int spec_id;
+
+	spec_id = __bpf_usdt_spec_id(ctx);
+	if (spec_id < 0)
+		return -ESRCH;
+
+	spec = bpf_map_lookup_elem(&__bpf_usdt_specs, &spec_id);
+	if (!spec)
+		return -ESRCH;
+
+	if (arg_num >= BPF_USDT_MAX_ARG_CNT)
+		return -ENOENT;
+	barrier_var(arg_num);
+	if (arg_num >= spec->arg_cnt)
+		return -ENOENT;
+
+	arg_spec = &spec->args[arg_num];
+
+	/* arg_spec->arg_bitshift = 64 - arg_sz * 8
+	 * so: arg_sz = (64 - arg_spec->arg_bitshift) / 8
+	 */
+	return (unsigned int)(64 - arg_spec->arg_bitshift) / 8;
+}
+
 /* Fetch USDT argument #*arg_num* (zero-indexed) and put its value into *res.
  * Returns 0 on success; negative error, otherwise.
  * On error *res is guaranteed to be set to zero.
@@ -117,7 +168,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
 {
 	struct __bpf_usdt_spec *spec;
 	struct __bpf_usdt_arg_spec *arg_spec;
-	unsigned long val;
+	unsigned long val, idx;
 	int err, spec_id;
 
 	*res = 0;
@@ -172,6 +223,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
 		val >>= arg_spec->arg_bitshift;
 #endif
 		break;
+	case BPF_USDT_ARG_SIB:
+		/* Arg is in memory addressed by SIB (Scale-Index-Base) mode
+		 * (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first
+		 * fetch the base register contents and the index register
+		 * contents from pt_regs. Then we calculate the final address
+		 * as base + (index * scale) + offset, and do a user-space
+		 * probe read to fetch the argument value.
+		 */
+		err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+		if (err)
+			return err;
+		err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off);
+		if (err)
+			return err;
+		err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx << arg_spec->scale_bitshift) + arg_spec->val_off));
+		if (err)
+			return err;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+		val >>= arg_spec->arg_bitshift;
+#endif
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index 4e4a52742b01..e3710933fd52 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -20,7 +20,6 @@
 #include "libbpf_common.h"
 #include "libbpf_internal.h"
 #include "hashmap.h"
-#include "str_error.h"
 
 /* libbpf's USDT support consists of BPF-side state/code and user-space
  * state/code working together in concert. BPF-side parts are defined in
@@ -59,7 +58,7 @@
  *
  * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y);
  *
- * USDT is identified by it's <provider-name>:<probe-name> pair of names. Each
+ * USDT is identified by its <provider-name>:<probe-name> pair of names. Each
  * individual USDT has a fixed number of arguments (3 in the above example)
  * and specifies values of each argument as if it was a function call.
  *
@@ -81,7 +80,7 @@
  * NOP instruction that kernel can replace with an interrupt instruction to
  * trigger instrumentation code (BPF program for all that we care about).
  *
- * Semaphore above is and optional feature. It records an address of a 2-byte
+ * Semaphore above is an optional feature. It records an address of a 2-byte
  * refcount variable (normally in '.probes' ELF section) used for signaling if
  * there is anything that is attached to USDT. This is useful for user
  * applications if, for example, they need to prepare some arguments that are
@@ -121,7 +120,7 @@
  * a uprobe BPF program (which for kernel, at least currently, is just a kprobe
  * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference
  * that uprobe is usually attached at the function entry, while USDT will
- * normally will be somewhere inside the function. But it should always be
+ * normally be somewhere inside the function. But it should always be
  * pointing to NOP instruction, which makes such uprobes the fastest uprobe
  * kind.
  *
@@ -151,7 +150,7 @@
  * libbpf sets to spec ID during attach time, or, if kernel is too old to
  * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such
  * case. The latter means that some modes of operation can't be supported
- * without BPF cookie. Such mode is attaching to shared library "generically",
+ * without BPF cookie. Such a mode is attaching to shared library "generically",
  * without specifying target process. In such case, it's impossible to
  * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode
  * is not supported without BPF cookie support.
@@ -185,7 +184,7 @@
  * as even if USDT spec string is the same, USDT cookie value can be
  * different. It was deemed excessive to try to deduplicate across independent
  * USDT attachments by taking into account USDT spec string *and* USDT cookie
- * value, which would complicated spec ID accounting significantly for little
+ * value, which would complicate spec ID accounting significantly for little
  * gain.
  */
 
@@ -200,12 +199,23 @@ enum usdt_arg_type {
 	USDT_ARG_CONST,
 	USDT_ARG_REG,
 	USDT_ARG_REG_DEREF,
+	USDT_ARG_SIB,
 };
 
 /* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */
 struct usdt_arg_spec {
 	__u64 val_off;
-	enum usdt_arg_type arg_type;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	enum usdt_arg_type arg_type: 8;
+	__u16	idx_reg_off: 12;
+	__u16	scale_bitshift: 4;
+	__u8 __reserved: 8;     /* keep reg_off offset stable */
+#else
+	__u8 __reserved: 8;     /* keep reg_off offset stable */
+	__u16	idx_reg_off: 12;
+	__u16	scale_bitshift: 4;
+	enum usdt_arg_type arg_type: 8;
+#endif
 	short reg_off;
 	bool arg_signed;
 	char arg_bitshift;
@@ -252,6 +262,7 @@ struct usdt_manager {
 	bool has_bpf_cookie;
 	bool has_sema_refcnt;
 	bool has_uprobe_multi;
+	bool has_uprobe_syscall;
 };
 
 struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
@@ -291,6 +302,13 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
 	 * usdt probes.
 	 */
 	man->has_uprobe_multi = kernel_supports(obj, FEAT_UPROBE_MULTI_LINK);
+
+	/*
+	 * Detect kernel support for uprobe() syscall, it's presence means we can
+	 * take advantage of faster nop5 uprobe handling.
+	 * Added in: 56101b69c919 ("uprobes/x86: Add uprobe syscall to speed up uprobe")
+	 */
+	man->has_uprobe_syscall = kernel_supports(obj, FEAT_UPROBE_SYSCALL);
 	return man;
 }
 
@@ -570,19 +588,39 @@ static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long o
 	return NULL;
 }
 
-static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
-			   const char *data, size_t name_off, size_t desc_off,
-			   struct usdt_note *usdt_note);
+static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off,
+			   size_t desc_off, struct usdt_note *usdt_note);
 
 static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
 
-static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *path, pid_t pid,
-				const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie,
-				struct usdt_target **out_targets, size_t *out_target_cnt)
+#if defined(__x86_64__)
+static bool has_nop_combo(int fd, long off)
+{
+	unsigned char nop_combo[6] = {
+		0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 /* nop,nop5 */
+	};
+	unsigned char buf[6];
+
+	if (pread(fd, buf, 6, off) != 6)
+		return false;
+	return memcmp(buf, nop_combo, 6) == 0;
+}
+#else
+static bool has_nop_combo(int fd, long off)
+{
+	return false;
+}
+#endif
+
+static int collect_usdt_targets(struct usdt_manager *man, struct elf_fd *elf_fd, const char *path,
+				pid_t pid, const char *usdt_provider, const char *usdt_name,
+				__u64 usdt_cookie, struct usdt_target **out_targets,
+				size_t *out_target_cnt)
 {
 	size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0;
 	struct elf_seg *segs = NULL, *vma_segs = NULL;
 	struct usdt_target *targets = NULL, *target;
+	Elf *elf = elf_fd->elf;
 	long base_addr = 0;
 	Elf_Scn *notes_scn, *base_scn;
 	GElf_Shdr base_shdr, notes_shdr;
@@ -626,7 +664,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
 		struct elf_seg *seg = NULL;
 		void *tmp;
 
-		err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, &note);
+		err = parse_usdt_note(&nhdr, data->d_buf, name_off, desc_off, &note);
 		if (err)
 			goto err_out;
 
@@ -775,6 +813,16 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
 		target = &targets[target_cnt];
 		memset(target, 0, sizeof(*target));
 
+		/*
+		 * We have uprobe syscall and usdt with nop,nop5 instructions combo,
+		 * so we can place the uprobe directly on nop5 (+1) and get this probe
+		 * optimized.
+		 */
+		if (man->has_uprobe_syscall && has_nop_combo(elf_fd->fd, usdt_rel_ip)) {
+			usdt_abs_ip++;
+			usdt_rel_ip++;
+		}
+
 		target->abs_ip = usdt_abs_ip;
 		target->rel_ip = usdt_rel_ip;
 		target->sema_off = usdt_sema_off;
@@ -989,7 +1037,7 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct
 	/* discover USDT in given binary, optionally limiting
 	 * activations to a given PID, if pid > 0
 	 */
-	err = collect_usdt_targets(man, elf_fd.elf, path, pid, usdt_provider, usdt_name,
+	err = collect_usdt_targets(man, &elf_fd, path, pid, usdt_provider, usdt_name,
 				   usdt_cookie, &targets, &target_cnt);
 	if (err <= 0) {
 		err = (err == 0) ? -ENOENT : err;
@@ -1132,8 +1180,7 @@ err_out:
 /* Parse out USDT ELF note from '.note.stapsdt' section.
  * Logic inspired by perf's code.
  */
-static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
-			   const char *data, size_t name_off, size_t desc_off,
+static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
 			   struct usdt_note *note)
 {
 	const char *provider, *name, *args;
@@ -1283,11 +1330,51 @@ static int calc_pt_regs_off(const char *reg_name)
 
 static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
 {
-	char reg_name[16];
-	int len, reg_off;
-	long off;
+	char reg_name[16] = {0}, idx_reg_name[16] = {0};
+	int len, reg_off, idx_reg_off, scale = 1;
+	long off = 0;
+
+	if (sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^,] , %d ) %n",
+		   arg_sz, &off, reg_name, idx_reg_name, &scale, &len) == 5 ||
+		sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^,] , %d ) %n",
+		       arg_sz, reg_name, idx_reg_name, &scale, &len) == 4 ||
+		sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^)] ) %n",
+		       arg_sz, &off, reg_name, idx_reg_name, &len) == 4 ||
+		sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^)] ) %n",
+		       arg_sz, reg_name, idx_reg_name, &len) == 3
+		) {
+		/*
+		 * Scale Index Base case:
+		 * 1@-96(%rbp,%rax,8)
+		 * 1@(%rbp,%rax,8)
+		 * 1@-96(%rbp,%rax)
+		 * 1@(%rbp,%rax)
+		 */
+		arg->arg_type = USDT_ARG_SIB;
+		arg->val_off = off;
+
+		reg_off = calc_pt_regs_off(reg_name);
+		if (reg_off < 0)
+			return reg_off;
+		arg->reg_off = reg_off;
 
-	if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
+		idx_reg_off = calc_pt_regs_off(idx_reg_name);
+		if (idx_reg_off < 0)
+			return idx_reg_off;
+		arg->idx_reg_off = idx_reg_off;
+
+		/* validate scale factor and set fields directly */
+		switch (scale) {
+		case 1: arg->scale_bitshift = 0; break;
+		case 2: arg->scale_bitshift = 1; break;
+		case 4: arg->scale_bitshift = 2; break;
+		case 8: arg->scale_bitshift = 3; break;
+		default:
+			pr_warn("usdt: invalid SIB scale %d, expected 1, 2, 4, 8\n", scale);
+			return -EINVAL;
+		}
+	} else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n",
+				arg_sz, &off, reg_name, &len) == 3) {
 		/* Memory dereference case, e.g., -4@-20(%rbp) */
 		arg->arg_type = USDT_ARG_REG_DEREF;
 		arg->val_off = off;
@@ -1306,6 +1393,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
 	} else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) {
 		/* Register read case, e.g., -4@%eax */
 		arg->arg_type = USDT_ARG_REG;
+		/* register read has no memory offset */
 		arg->val_off = 0;
 
 		reg_off = calc_pt_regs_off(reg_name);
@@ -1327,8 +1415,6 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
 
 #elif defined(__s390x__)
 
-/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */
-
 static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
 {
 	unsigned int reg;
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index 59aabdd3cabf..576ecc5fc312 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -210,6 +210,9 @@ SYNOPSIS
   struct perf_record_time_conv;
   struct perf_record_header_feature;
   struct perf_record_compressed;
+  struct perf_record_compressed2;
+  struct perf_record_schedstat_cpu;
+  struct perf_record_schedstat_domain;
 --
 
 DESCRIPTION
diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index e9a7ac2c062e..32301a1d8f0c 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -41,13 +41,6 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
 
 TEST_ARGS := $(if $(V),-v)
 
-# Set compile option CFLAGS
-ifdef EXTRA_CFLAGS
-  CFLAGS := $(EXTRA_CFLAGS)
-else
-  CFLAGS := -g -Wall
-endif
-
 INCLUDES = \
 -I$(srctree)/tools/lib/perf/include \
 -I$(srctree)/tools/lib/ \
@@ -57,11 +50,12 @@ INCLUDES = \
 -I$(srctree)/tools/include/uapi
 
 # Append required CFLAGS
-override CFLAGS += $(EXTRA_WARNINGS)
-override CFLAGS += -Werror -Wall
+override CFLAGS := $(INCLUDES) $(CFLAGS)
+override CFLAGS += -g -Werror -Wall
 override CFLAGS += -fPIC
-override CFLAGS += $(INCLUDES)
 override CFLAGS += -fvisibility=hidden
+override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += $(EXTRA_CFLAGS)
 
 all:
 
@@ -175,6 +169,7 @@ install_lib: libs
 		cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
 
 HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h
+HDRS += schedstat-v15.h schedstat-v16.h schedstat-v17.h
 INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h
 
 INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index fcc47214062a..e51b0490ad57 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -15,12 +15,12 @@
 
 #define MAX_NR_CPUS 4096
 
-void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus)
+void perf_cpu_map__set_nr(struct perf_cpu_map *map, unsigned int nr_cpus)
 {
 	RC_CHK_ACCESS(map)->nr = nr_cpus;
 }
 
-struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
+struct perf_cpu_map *perf_cpu_map__alloc(unsigned int nr_cpus)
 {
 	RC_STRUCT(perf_cpu_map) *cpus;
 	struct perf_cpu_map *result;
@@ -78,7 +78,7 @@ void perf_cpu_map__put(struct perf_cpu_map *map)
 static struct perf_cpu_map *cpu_map__new_sysconf(void)
 {
 	struct perf_cpu_map *cpus;
-	int nr_cpus, nr_cpus_conf;
+	long nr_cpus, nr_cpus_conf;
 
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 	if (nr_cpus < 0)
@@ -86,15 +86,13 @@ static struct perf_cpu_map *cpu_map__new_sysconf(void)
 
 	nr_cpus_conf = sysconf(_SC_NPROCESSORS_CONF);
 	if (nr_cpus != nr_cpus_conf) {
-		pr_warning("Number of online CPUs (%d) differs from the number configured (%d) the CPU map will only cover the first %d CPUs.",
+		pr_warning("Number of online CPUs (%ld) differs from the number configured (%ld) the CPU map will only cover the first %ld CPUs.",
 			nr_cpus, nr_cpus_conf, nr_cpus);
 	}
 
 	cpus = perf_cpu_map__alloc(nr_cpus);
 	if (cpus != NULL) {
-		int i;
-
-		for (i = 0; i < nr_cpus; ++i)
+		for (long i = 0; i < nr_cpus; ++i)
 			RC_CHK_ACCESS(cpus)->map[i].cpu = i;
 	}
 
@@ -132,23 +130,23 @@ static int cmp_cpu(const void *a, const void *b)
 	return cpu_a->cpu - cpu_b->cpu;
 }
 
-static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
+static struct perf_cpu __perf_cpu_map__cpu(const struct perf_cpu_map *cpus, unsigned int idx)
 {
 	return RC_CHK_ACCESS(cpus)->map[idx];
 }
 
-static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus)
+static struct perf_cpu_map *cpu_map__trim_new(unsigned int nr_cpus, const struct perf_cpu *tmp_cpus)
 {
 	size_t payload_size = nr_cpus * sizeof(struct perf_cpu);
 	struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr_cpus);
-	int i, j;
 
 	if (cpus != NULL) {
+		unsigned int j = 0;
+
 		memcpy(RC_CHK_ACCESS(cpus)->map, tmp_cpus, payload_size);
 		qsort(RC_CHK_ACCESS(cpus)->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu);
 		/* Remove dups */
-		j = 0;
-		for (i = 0; i < nr_cpus; i++) {
+		for (unsigned int i = 0; i < nr_cpus; i++) {
 			if (i == 0 ||
 			    __perf_cpu_map__cpu(cpus, i).cpu !=
 			    __perf_cpu_map__cpu(cpus, i - 1).cpu) {
@@ -167,9 +165,8 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 	struct perf_cpu_map *cpus = NULL;
 	unsigned long start_cpu, end_cpu = 0;
 	char *p = NULL;
-	int i, nr_cpus = 0;
+	unsigned int nr_cpus = 0, max_entries = 0;
 	struct perf_cpu *tmp_cpus = NULL, *tmp;
-	int max_entries = 0;
 
 	if (!cpu_list)
 		return perf_cpu_map__new_online_cpus();
@@ -185,7 +182,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 	while (isdigit(*cpu_list)) {
 		p = NULL;
 		start_cpu = strtoul(cpu_list, &p, 0);
-		if (start_cpu >= INT_MAX
+		if (start_cpu >= INT16_MAX
 		    || (*p != '\0' && *p != ',' && *p != '-' && *p != '\n'))
 			goto invalid;
 
@@ -194,7 +191,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 			p = NULL;
 			end_cpu = strtoul(cpu_list, &p, 0);
 
-			if (end_cpu >= INT_MAX || (*p != '\0' && *p != ',' && *p != '\n'))
+			if (end_cpu >= INT16_MAX || (*p != '\0' && *p != ',' && *p != '\n'))
 				goto invalid;
 
 			if (end_cpu < start_cpu)
@@ -208,9 +205,10 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 
 		for (; start_cpu <= end_cpu; start_cpu++) {
 			/* check for duplicates */
-			for (i = 0; i < nr_cpus; i++)
-				if (tmp_cpus[i].cpu == (int)start_cpu)
+			for (unsigned int i = 0; i < nr_cpus; i++) {
+				if (tmp_cpus[i].cpu == (int16_t)start_cpu)
 					goto invalid;
+			}
 
 			if (nr_cpus == max_entries) {
 				max_entries += max(end_cpu - start_cpu + 1, 16UL);
@@ -219,7 +217,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 					goto invalid;
 				tmp_cpus = tmp;
 			}
-			tmp_cpus[nr_cpus++].cpu = (int)start_cpu;
+			tmp_cpus[nr_cpus++].cpu = (int16_t)start_cpu;
 		}
 		if (*p)
 			++p;
@@ -242,12 +240,22 @@ out:
 	return cpus;
 }
 
-static int __perf_cpu_map__nr(const struct perf_cpu_map *cpus)
+struct perf_cpu_map *perf_cpu_map__new_int(int cpu)
+{
+	struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);
+
+	if (cpus)
+		RC_CHK_ACCESS(cpus)->map[0].cpu = cpu;
+
+	return cpus;
+}
+
+static unsigned int __perf_cpu_map__nr(const struct perf_cpu_map *cpus)
 {
 	return RC_CHK_ACCESS(cpus)->nr;
 }
 
-struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
+struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, unsigned int idx)
 {
 	struct perf_cpu result = {
 		.cpu = -1
@@ -259,7 +267,7 @@ struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
 	return result;
 }
 
-int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
+unsigned int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
 {
 	return cpus ? __perf_cpu_map__nr(cpus) : 1;
 }
@@ -284,7 +292,7 @@ bool perf_cpu_map__is_empty(const struct perf_cpu_map *map)
 
 int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
 {
-	int low, high;
+	unsigned int low, high;
 
 	if (!cpus)
 		return -1;
@@ -314,7 +322,7 @@ bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
 
 bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_map *rhs)
 {
-	int nr;
+	unsigned int nr;
 
 	if (lhs == rhs)
 		return true;
@@ -326,7 +334,7 @@ bool perf_cpu_map__equal(const struct perf_cpu_map *lhs, const struct perf_cpu_m
 	if (nr != __perf_cpu_map__nr(rhs))
 		return false;
 
-	for (int idx = 0; idx < nr; idx++) {
+	for (unsigned int idx = 0; idx < nr; idx++) {
 		if (__perf_cpu_map__cpu(lhs, idx).cpu != __perf_cpu_map__cpu(rhs, idx).cpu)
 			return false;
 	}
@@ -343,7 +351,7 @@ struct perf_cpu perf_cpu_map__min(const struct perf_cpu_map *map)
 	struct perf_cpu cpu, result = {
 		.cpu = -1
 	};
-	int idx;
+	unsigned int idx;
 
 	perf_cpu_map__for_each_cpu_skip_any(cpu, idx, map) {
 		result = cpu;
@@ -358,10 +366,12 @@ struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
 		.cpu = -1
 	};
 
-	// cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
-	return __perf_cpu_map__nr(map) > 0
-		? __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1)
-		: result;
+	if (!map)
+		return result;
+
+	// The CPUs are always sorted and nr is always > 0 as 0 length map is
+	// encoded as NULL.
+	return __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1);
 }
 
 /** Is 'b' a subset of 'a'. */
@@ -372,7 +382,7 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu
 	if (!a || __perf_cpu_map__nr(b) > __perf_cpu_map__nr(a))
 		return false;
 
-	for (int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) {
+	for (unsigned int i = 0, j = 0; i < __perf_cpu_map__nr(a); i++) {
 		if (__perf_cpu_map__cpu(a, i).cpu > __perf_cpu_map__cpu(b, j).cpu)
 			return false;
 		if (__perf_cpu_map__cpu(a, i).cpu == __perf_cpu_map__cpu(b, j).cpu) {
@@ -398,8 +408,7 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu
 int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other)
 {
 	struct perf_cpu *tmp_cpus;
-	int tmp_len;
-	int i, j, k;
+	unsigned int tmp_len, i, j, k;
 	struct perf_cpu_map *merged;
 
 	if (perf_cpu_map__is_subset(*orig, other))
@@ -443,21 +452,33 @@ int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other)
 struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
 					     struct perf_cpu_map *other)
 {
-	struct perf_cpu *tmp_cpus;
-	int tmp_len;
-	int i, j, k;
-	struct perf_cpu_map *merged = NULL;
+	unsigned int i, j, k;
+	struct perf_cpu_map *merged;
 
 	if (perf_cpu_map__is_subset(other, orig))
 		return perf_cpu_map__get(orig);
 	if (perf_cpu_map__is_subset(orig, other))
 		return perf_cpu_map__get(other);
 
-	tmp_len = max(__perf_cpu_map__nr(orig), __perf_cpu_map__nr(other));
-	tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
-	if (!tmp_cpus)
+	i = j = k = 0;
+	while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
+		if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
+			i++;
+		else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu)
+			j++;
+		else { /* CPUs match. */
+			i++;
+			j++;
+			k++;
+		}
+	}
+	if (k == 0) /* Maps are completely disjoint. */
 		return NULL;
 
+	merged = perf_cpu_map__alloc(k);
+	if (!merged)
+		return NULL;
+	/* Entries are added to merged in sorted order, so no need to sort again. */
 	i = j = k = 0;
 	while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
 		if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
@@ -466,11 +487,8 @@ struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
 			j++;
 		else {
 			j++;
-			tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
+			RC_CHK_ACCESS(merged)->map[k++] = __perf_cpu_map__cpu(orig, i++);
 		}
 	}
-	if (k)
-		merged = cpu_map__trim_new(k, tmp_cpus);
-	free(tmp_cpus);
 	return merged;
 }
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index b1f4c8176b32..1f210dadd666 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -36,49 +36,110 @@ void perf_evlist__init(struct perf_evlist *evlist)
 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
 					  struct perf_evsel *evsel)
 {
-	if (evsel->system_wide) {
-		/* System wide: set the cpu map of the evsel to all online CPUs. */
-		perf_cpu_map__put(evsel->cpus);
-		evsel->cpus = perf_cpu_map__new_online_cpus();
-	} else if (evlist->has_user_cpus && evsel->is_pmu_core) {
-		/*
-		 * User requested CPUs on a core PMU, ensure the requested CPUs
-		 * are valid by intersecting with those of the PMU.
-		 */
+	if (perf_cpu_map__is_empty(evsel->cpus)) {
+		if (perf_cpu_map__is_empty(evsel->pmu_cpus)) {
+			/*
+			 * Assume the unset PMU cpus were for a system-wide
+			 * event, like a software or tracepoint.
+			 */
+			evsel->pmu_cpus = perf_cpu_map__new_online_cpus();
+		}
+		if (evlist->has_user_cpus && !evsel->system_wide) {
+			/*
+			 * Use the user CPUs unless the evsel is set to be
+			 * system wide, such as the dummy event.
+			 */
+			evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
+		} else {
+			/*
+			 * System wide and other modes, assume the cpu map
+			 * should be set to all PMU CPUs.
+			 */
+			evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus);
+		}
+	}
+	/*
+	 * Avoid "any CPU"(-1) for uncore and PMUs that require a CPU, even if
+	 * requested.
+	 */
+	if (evsel->requires_cpu && perf_cpu_map__has_any_cpu(evsel->cpus)) {
 		perf_cpu_map__put(evsel->cpus);
-		evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus);
+		evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus);
+	}
 
-		/*
-		 * Empty cpu lists would eventually get opened as "any" so remove
-		 * genuinely empty ones before they're opened in the wrong place.
-		 */
-		if (perf_cpu_map__is_empty(evsel->cpus)) {
-			struct perf_evsel *next = perf_evlist__next(evlist, evsel);
-
-			perf_evlist__remove(evlist, evsel);
-			/* Keep idx contiguous */
-			if (next)
-				list_for_each_entry_from(next, &evlist->entries, node)
-					next->idx--;
+	/*
+	 * Globally requested CPUs replace user requested unless the evsel is
+	 * set to be system wide.
+	 */
+	if (evlist->has_user_cpus && !evsel->system_wide) {
+		assert(!perf_cpu_map__has_any_cpu(evlist->user_requested_cpus));
+		if (!perf_cpu_map__equal(evsel->cpus, evlist->user_requested_cpus)) {
+			perf_cpu_map__put(evsel->cpus);
+			evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
 		}
-	} else if (!evsel->own_cpus || evlist->has_user_cpus ||
-		(!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) {
-		/*
-		 * The PMU didn't specify a default cpu map, this isn't a core
-		 * event and the user requested CPUs or the evlist user
-		 * requested CPUs have the "any CPU" (aka dummy) CPU value. In
-		 * which case use the user requested CPUs rather than the PMU
-		 * ones.
-		 */
+	}
+
+	/* Ensure cpus only references valid PMU CPUs. */
+	if (!perf_cpu_map__has_any_cpu(evsel->cpus) &&
+	    !perf_cpu_map__is_subset(evsel->pmu_cpus, evsel->cpus)) {
+		struct perf_cpu_map *tmp = perf_cpu_map__intersect(evsel->pmu_cpus, evsel->cpus);
+
 		perf_cpu_map__put(evsel->cpus);
-		evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
-	} else if (evsel->cpus != evsel->own_cpus) {
-		/*
-		 * No user requested cpu map but the PMU cpu map doesn't match
-		 * the evsel's. Reset it back to the PMU cpu map.
-		 */
+		evsel->cpus = tmp;
+	}
+
+	/*
+	 * Was event requested on all the PMU's CPUs but the user requested is
+	 * any CPU (-1)? If so switch to using any CPU (-1) to reduce the number
+	 * of events.
+	 */
+	if (!evsel->system_wide &&
+	    !evsel->requires_cpu &&
+	    perf_cpu_map__equal(evsel->cpus, evsel->pmu_cpus) &&
+	    perf_cpu_map__has_any_cpu(evlist->user_requested_cpus)) {
 		perf_cpu_map__put(evsel->cpus);
-		evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
+		evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
+	}
+
+	/*
+	 * Tool events may only read on the first CPU index to avoid double
+	 * counting things like duration_time. Make the evsel->cpus contain just
+	 * that single entry otherwise we may spend time changing affinity to
+	 * CPUs that just have tool events, etc.
+	 */
+	if (evsel->reads_only_on_cpu_idx0 && perf_cpu_map__nr(evsel->cpus) > 0) {
+		struct perf_cpu_map *srcs[3] = {
+			evlist->all_cpus,
+			evlist->user_requested_cpus,
+			evsel->pmu_cpus,
+		};
+		for (size_t i = 0; i < ARRAY_SIZE(srcs); i++) {
+			if (!srcs[i])
+				continue;
+
+			perf_cpu_map__put(evsel->cpus);
+			evsel->cpus = perf_cpu_map__new_int(perf_cpu_map__cpu(srcs[i], 0).cpu);
+			break;
+		}
+	}
+
+	/* Sanity check assert before the evsel is potentially removed. */
+	assert(!evsel->requires_cpu || !perf_cpu_map__has_any_cpu(evsel->cpus));
+
+	/*
+	 * Empty cpu lists would eventually get opened as "any" so remove
+	 * genuinely empty ones before they're opened in the wrong place.
+	 */
+	if (perf_cpu_map__is_empty(evsel->cpus)) {
+		struct perf_evsel *next = perf_evlist__next(evlist, evsel);
+
+		perf_evlist__remove(evlist, evsel);
+		/* Keep idx contiguous */
+		if (next)
+			list_for_each_entry_from(next, &evlist->entries, node)
+				next->idx--;
+
+		return;
 	}
 
 	if (evsel->system_wide) {
@@ -94,12 +155,22 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
 
 static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
 {
-	struct perf_evsel *evsel, *n;
-
 	evlist->needs_map_propagation = true;
 
-	list_for_each_entry_safe(evsel, n, &evlist->entries, node)
-		__perf_evlist__propagate_maps(evlist, evsel);
+	/* Clear the all_cpus set which will be merged into during propagation. */
+	perf_cpu_map__put(evlist->all_cpus);
+	evlist->all_cpus = NULL;
+
+	/* 2 rounds so that reads_only_on_cpu_idx0 benefit from knowing the other CPU maps. */
+	for (int round = 0; round < 2; round++) {
+		struct perf_evsel *evsel, *n;
+
+		list_for_each_entry_safe(evsel, n, &evlist->entries, node) {
+			if ((!evsel->reads_only_on_cpu_idx0 && round == 0) ||
+			    (evsel->reads_only_on_cpu_idx0 && round == 1))
+				__perf_evlist__propagate_maps(evlist, evsel);
+		}
+	}
 }
 
 void perf_evlist__add(struct perf_evlist *evlist,
diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c
index c475319e2e41..f747c0bc692d 100644
--- a/tools/lib/perf/evsel.c
+++ b/tools/lib/perf/evsel.c
@@ -40,8 +40,19 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr)
 	return evsel;
 }
 
+void perf_evsel__exit(struct perf_evsel *evsel)
+{
+	assert(evsel->fd == NULL);  /* If not fds were not closed. */
+	assert(evsel->mmap == NULL); /* If not munmap wasn't called. */
+	assert(evsel->sample_id == NULL); /* If not free_id wasn't called. */
+	perf_cpu_map__put(evsel->cpus);
+	perf_cpu_map__put(evsel->pmu_cpus);
+	perf_thread_map__put(evsel->threads);
+}
+
 void perf_evsel__delete(struct perf_evsel *evsel)
 {
+	perf_evsel__exit(evsel);
 	free(evsel);
 }
 
@@ -116,7 +127,8 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
 		     struct perf_thread_map *threads)
 {
 	struct perf_cpu cpu;
-	int idx, thread, err = 0;
+	unsigned int idx;
+	int thread, err = 0;
 
 	if (cpus == NULL) {
 		static struct perf_cpu_map *empty_cpu_map;
@@ -449,7 +461,7 @@ int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
 int perf_evsel__enable_thread(struct perf_evsel *evsel, int thread)
 {
 	struct perf_cpu cpu __maybe_unused;
-	int idx;
+	unsigned int idx;
 	int err;
 
 	perf_cpu_map__for_each_cpu(cpu, idx, evsel->cpus) {
@@ -488,12 +500,13 @@ int perf_evsel__disable(struct perf_evsel *evsel)
 
 int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
 {
-	int err = 0, i;
+	int err = 0;
 
-	for (i = 0; i < perf_cpu_map__nr(evsel->cpus) && !err; i++)
+	for (unsigned int i = 0; i < perf_cpu_map__nr(evsel->cpus) && !err; i++) {
 		err = perf_evsel__run_ioctl(evsel,
 				     PERF_EVENT_IOC_SET_FILTER,
 				     (void *)filter, i);
+	}
 	return err;
 }
 
diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h
index e2be2d17c32b..c19678188b17 100644
--- a/tools/lib/perf/include/internal/cpumap.h
+++ b/tools/lib/perf/include/internal/cpumap.h
@@ -16,16 +16,16 @@
 DECLARE_RC_STRUCT(perf_cpu_map) {
 	refcount_t	refcnt;
 	/** Length of the map array. */
-	int		nr;
+	unsigned int	nr;
 	/** The CPU values. */
 	struct perf_cpu	map[];
 };
 
-struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus);
+struct perf_cpu_map *perf_cpu_map__alloc(unsigned int nr_cpus);
 int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
 bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b);
 
-void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus);
+void perf_cpu_map__set_nr(struct perf_cpu_map *map, unsigned int nr_cpus);
 
 static inline refcount_t *perf_cpu_map__refcnt(struct perf_cpu_map *map)
 {
diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h
index ea78defa77d0..b988034f1371 100644
--- a/tools/lib/perf/include/internal/evsel.h
+++ b/tools/lib/perf/include/internal/evsel.h
@@ -99,7 +99,7 @@ struct perf_evsel {
 	 * cpu map for opening the event on, for example, the first CPU on a
 	 * socket for an uncore event.
 	 */
-	struct perf_cpu_map	*own_cpus;
+	struct perf_cpu_map	*pmu_cpus;
 	struct perf_thread_map	*threads;
 	struct xyarray		*fd;
 	struct xyarray		*mmap;
@@ -128,11 +128,14 @@ struct perf_evsel {
 	bool			 requires_cpu;
 	/** Is the PMU for the event a core one? Effects the handling of own_cpus. */
 	bool			 is_pmu_core;
+	/** Does the evsel on read on the first CPU index such as tool time events? */
+	bool			 reads_only_on_cpu_idx0;
 	int			 idx;
 };
 
 void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr,
 		      int idx);
+void perf_evsel__exit(struct perf_evsel *evsel);
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 void perf_evsel__close_fd(struct perf_evsel *evsel);
 void perf_evsel__free_fd(struct perf_evsel *evsel);
diff --git a/tools/lib/perf/include/perf/core.h b/tools/lib/perf/include/perf/core.h
index a3f6d68edad7..06cc132d88cf 100644
--- a/tools/lib/perf/include/perf/core.h
+++ b/tools/lib/perf/include/perf/core.h
@@ -5,7 +5,7 @@
 #include <stdarg.h>
 
 #ifndef LIBPERF_API
-#define LIBPERF_API __attribute__((visibility("default")))
+#define LIBPERF_API extern __attribute__((visibility("default")))
 #endif
 
 enum libperf_print_level {
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 188a667babc6..a1dd25db65b6 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -4,10 +4,11 @@
 
 #include <perf/core.h>
 #include <stdbool.h>
+#include <stdint.h>
 
 /** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
 struct perf_cpu {
-	int cpu;
+	int16_t cpu;
 };
 
 struct perf_cache {
@@ -36,6 +37,8 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void);
  *                     perf_cpu_map__new_online_cpus is returned.
  */
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
+/** perf_cpu_map__new_int - create a map with the one given cpu. */
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_int(int cpu);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
 LIBPERF_API int perf_cpu_map__merge(struct perf_cpu_map **orig,
 				    struct perf_cpu_map *other);
@@ -46,7 +49,7 @@ LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
  * perf_cpu_map__cpu - get the CPU value at the given index. Returns -1 if index
  *                     is invalid.
  */
-LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
+LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, unsigned int idx);
 /**
  * perf_cpu_map__nr - for an empty map returns 1, as perf_cpu_map__cpu returns a
  *                    cpu of -1 for an invalid index, this makes an empty map
@@ -54,7 +57,7 @@ LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, i
  *                    the result is the number CPUs in the map plus one if the
  *                    "any CPU"/dummy value is present.
  */
-LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
+LIBPERF_API unsigned int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
 /**
  * perf_cpu_map__has_any_cpu_or_is_empty - is map either empty or has the "any CPU"/dummy value.
  */
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 37bb7771d914..9043dc72b5d6 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -151,6 +151,18 @@ struct perf_record_switch {
 	__u32			 next_prev_tid;
 };
 
+struct perf_record_callchain_deferred {
+	struct perf_event_header header;
+	/*
+	 * This is to match kernel and (deferred) user stacks together.
+	 * The kernel part will be in the sample callchain array after
+	 * the PERF_CONTEXT_USER_DEFERRED entry.
+	 */
+	__u64			 cookie;
+	__u64			 nr;
+	__u64			 ips[];
+};
+
 struct perf_record_header_attr {
 	struct perf_event_header header;
 	struct perf_event_attr	 attr;
@@ -291,6 +303,7 @@ struct perf_record_header_event_type {
 struct perf_record_header_tracing_data {
 	struct perf_event_header header;
 	__u32			 size;
+	__u32			 pad;
 };
 
 #define PERF_RECORD_MISC_BUILD_ID_SIZE (1 << 15)
@@ -457,6 +470,97 @@ struct perf_record_compressed {
 	char			 data[];
 };
 
+/*
+ * `header.size` includes the padding we are going to add while writing the record.
+ * `data_size` only includes the size of `data[]` itself.
+ */
+struct perf_record_compressed2 {
+	struct perf_event_header header;
+	__u64			 data_size;
+	char			 data[];
+};
+
+#define BPF_METADATA_KEY_LEN   64
+#define BPF_METADATA_VALUE_LEN 256
+#define BPF_PROG_NAME_LEN      KSYM_NAME_LEN
+
+struct perf_record_bpf_metadata_entry {
+	char key[BPF_METADATA_KEY_LEN];
+	char value[BPF_METADATA_VALUE_LEN];
+};
+
+struct perf_record_bpf_metadata {
+	struct perf_event_header	      header;
+	char				      prog_name[BPF_PROG_NAME_LEN];
+	__u64				      nr_entries;
+	struct perf_record_bpf_metadata_entry entries[];
+};
+
+struct perf_record_schedstat_cpu_v15 {
+#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver)		_type _name
+#include "schedstat-v15.h"
+#undef CPU_FIELD
+};
+
+struct perf_record_schedstat_cpu_v16 {
+#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver)		_type _name
+#include "schedstat-v16.h"
+#undef CPU_FIELD
+};
+
+struct perf_record_schedstat_cpu_v17 {
+#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver)		_type _name
+#include "schedstat-v17.h"
+#undef CPU_FIELD
+};
+
+struct perf_record_schedstat_cpu {
+	struct perf_event_header header;
+	__u64			 timestamp;
+	__u32			 cpu;
+	__u16			 version;
+	/* Padding */
+	char			 __pad[2];
+	union {
+		struct perf_record_schedstat_cpu_v15 v15;
+		struct perf_record_schedstat_cpu_v16 v16;
+		struct perf_record_schedstat_cpu_v17 v17;
+	};
+};
+
+struct perf_record_schedstat_domain_v15 {
+#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver)		_type _name
+#include "schedstat-v15.h"
+#undef DOMAIN_FIELD
+};
+
+struct perf_record_schedstat_domain_v16 {
+#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver)		_type _name
+#include "schedstat-v16.h"
+#undef DOMAIN_FIELD
+};
+
+struct perf_record_schedstat_domain_v17 {
+#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver)		_type _name
+#include "schedstat-v17.h"
+#undef DOMAIN_FIELD
+};
+
+#define DOMAIN_NAME_LEN		16
+
+struct perf_record_schedstat_domain {
+	struct perf_event_header header;
+	__u64			 timestamp;
+	__u32			 cpu;
+	__u16			 version;
+	__u16			 domain;
+	union {
+		struct perf_record_schedstat_domain_v15 v15;
+		struct perf_record_schedstat_domain_v16 v16;
+		struct perf_record_schedstat_domain_v17 v17;
+	};
+};
+
 enum perf_user_event_type { /* above any possible kernel type */
 	PERF_RECORD_USER_TYPE_START		= 64,
 	PERF_RECORD_HEADER_ATTR			= 64,
@@ -478,6 +582,10 @@ enum perf_user_event_type { /* above any possible kernel type */
 	PERF_RECORD_HEADER_FEATURE		= 80,
 	PERF_RECORD_COMPRESSED			= 81,
 	PERF_RECORD_FINISHED_INIT		= 82,
+	PERF_RECORD_COMPRESSED2			= 83,
+	PERF_RECORD_BPF_METADATA		= 84,
+	PERF_RECORD_SCHEDSTAT_CPU		= 85,
+	PERF_RECORD_SCHEDSTAT_DOMAIN		= 86,
 	PERF_RECORD_HEADER_MAX
 };
 
@@ -494,6 +602,7 @@ union perf_event {
 	struct perf_record_read			read;
 	struct perf_record_throttle		throttle;
 	struct perf_record_sample		sample;
+	struct perf_record_callchain_deferred	callchain_deferred;
 	struct perf_record_bpf_event		bpf;
 	struct perf_record_ksymbol		ksymbol;
 	struct perf_record_text_poke_event	text_poke;
@@ -518,6 +627,10 @@ union perf_event {
 	struct perf_record_time_conv		time_conv;
 	struct perf_record_header_feature	feat;
 	struct perf_record_compressed		pack;
+	struct perf_record_compressed2		pack2;
+	struct perf_record_bpf_metadata		bpf_metadata;
+	struct perf_record_schedstat_cpu	schedstat_cpu;
+	struct perf_record_schedstat_domain	schedstat_domain;
 };
 
 #endif /* __LIBPERF_EVENT_H */
diff --git a/tools/lib/perf/include/perf/schedstat-v15.h b/tools/lib/perf/include/perf/schedstat-v15.h
new file mode 100644
index 000000000000..639458df05f8
--- /dev/null
+++ b/tools/lib/perf/include/perf/schedstat-v15.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef CPU_FIELD
+CPU_FIELD(__u32, yld_count, "sched_yield() count",
+	  "%11u", false, yld_count, v15);
+CPU_FIELD(__u32, array_exp, "Legacy counter can be ignored",
+	  "%11u", false, array_exp, v15);
+CPU_FIELD(__u32, sched_count, "schedule() called",
+	  "%11u", false, sched_count, v15);
+CPU_FIELD(__u32, sched_goidle, "schedule() left the processor idle",
+	  "%11u", true, sched_count, v15);
+CPU_FIELD(__u32, ttwu_count, "try_to_wake_up() was called",
+	  "%11u", false, ttwu_count, v15);
+CPU_FIELD(__u32, ttwu_local, "try_to_wake_up() was called to wake up the local cpu",
+	  "%11u", true, ttwu_count, v15);
+CPU_FIELD(__u64, rq_cpu_time, "total runtime by tasks on this processor (in jiffies)",
+	  "%11llu", false, rq_cpu_time, v15);
+CPU_FIELD(__u64, run_delay, "total waittime by tasks on this processor (in jiffies)",
+	  "%11llu", true, rq_cpu_time, v15);
+CPU_FIELD(__u64, pcount, "total timeslices run on this cpu",
+	  "%11llu", false, pcount, v15);
+#endif
+
+#ifdef DOMAIN_FIELD
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category idle> ");
+#endif
+DOMAIN_FIELD(__u32, idle_lb_count,
+	     "load_balance() count on cpu idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, idle_lb_balanced,
+	     "load_balance() found balanced on cpu idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, idle_lb_failed,
+	     "load_balance() move task failed on cpu idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, idle_lb_imbalance,
+	     "imbalance sum on cpu idle", "%11u", false, v15);
+DOMAIN_FIELD(__u32, idle_lb_gained,
+	     "pull_task() count on cpu idle", "%11u", false, v15);
+DOMAIN_FIELD(__u32, idle_lb_hot_gained,
+	     "pull_task() when target task was cache-hot on cpu idle", "%11u", false, v15);
+DOMAIN_FIELD(__u32, idle_lb_nobusyq,
+	     "load_balance() failed to find busier queue on cpu idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, idle_lb_nobusyg,
+	     "load_balance() failed to find busier group on cpu idle", "%11u", true, v15);
+#ifdef DERIVED_CNT_FIELD
+DERIVED_CNT_FIELD(idle_lb_success_count, "load_balance() success count on cpu idle", "%11u",
+		  idle_lb_count, idle_lb_balanced, idle_lb_failed, v15);
+#endif
+#ifdef DERIVED_AVG_FIELD
+DERIVED_AVG_FIELD(idle_lb_avg_pulled,
+		  "avg task pulled per successful lb attempt (cpu idle)", "%11.2Lf",
+		  idle_lb_count, idle_lb_balanced, idle_lb_failed, idle_lb_gained, v15);
+#endif
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category busy> ");
+#endif
+DOMAIN_FIELD(__u32, busy_lb_count,
+	     "load_balance() count on cpu busy", "%11u", true, v15);
+DOMAIN_FIELD(__u32, busy_lb_balanced,
+	     "load_balance() found balanced on cpu busy", "%11u", true, v15);
+DOMAIN_FIELD(__u32, busy_lb_failed,
+	     "load_balance() move task failed on cpu busy", "%11u", true, v15);
+DOMAIN_FIELD(__u32, busy_lb_imbalance,
+	     "imbalance sum on cpu busy", "%11u", false, v15);
+DOMAIN_FIELD(__u32, busy_lb_gained,
+	     "pull_task() count on cpu busy", "%11u", false, v15);
+DOMAIN_FIELD(__u32, busy_lb_hot_gained,
+	     "pull_task() when target task was cache-hot on cpu busy", "%11u", false, v15);
+DOMAIN_FIELD(__u32, busy_lb_nobusyq,
+	     "load_balance() failed to find busier queue on cpu busy", "%11u", true, v15);
+DOMAIN_FIELD(__u32, busy_lb_nobusyg,
+	     "load_balance() failed to find busier group on cpu busy", "%11u", true, v15);
+#ifdef DERIVED_CNT_FIELD
+DERIVED_CNT_FIELD(busy_lb_success_count, "load_balance() success count on cpu busy", "%11u",
+		  busy_lb_count, busy_lb_balanced, busy_lb_failed, v15);
+#endif
+#ifdef DERIVED_AVG_FIELD
+DERIVED_AVG_FIELD(busy_lb_avg_pulled,
+		  "avg task pulled per successful lb attempt (cpu busy)", "%11.2Lf",
+		  busy_lb_count, busy_lb_balanced, busy_lb_failed, busy_lb_gained, v15);
+#endif
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category newidle> ");
+#endif
+DOMAIN_FIELD(__u32, newidle_lb_count,
+	     "load_balance() count on cpu newly idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, newidle_lb_balanced,
+	     "load_balance() found balanced on cpu newly idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, newidle_lb_failed,
+	     "load_balance() move task failed on cpu newly idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, newidle_lb_imbalance,
+	     "imbalance sum on cpu newly idle", "%11u", false, v15);
+DOMAIN_FIELD(__u32, newidle_lb_gained,
+	     "pull_task() count on cpu newly idle", "%11u", false, v15);
+DOMAIN_FIELD(__u32, newidle_lb_hot_gained,
+	     "pull_task() when target task was cache-hot on cpu newly idle", "%11u", false, v15);
+DOMAIN_FIELD(__u32, newidle_lb_nobusyq,
+	     "load_balance() failed to find busier queue on cpu newly idle", "%11u", true, v15);
+DOMAIN_FIELD(__u32, newidle_lb_nobusyg,
+	     "load_balance() failed to find busier group on cpu newly idle", "%11u", true, v15);
+#ifdef DERIVED_CNT_FIELD
+DERIVED_CNT_FIELD(newidle_lb_success_count,
+		  "load_balance() success count on cpu newly idle", "%11u",
+		  newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, v15);
+#endif
+#ifdef DERIVED_AVG_FIELD
+DERIVED_AVG_FIELD(newidle_lb_avg_pulled,
+		  "avg task pulled per successful lb attempt (cpu newly idle)", "%11.2Lf",
+		  newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, newidle_lb_gained, v15);
+#endif
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category active_load_balance()> ");
+#endif
+DOMAIN_FIELD(__u32, alb_count,
+	     "active_load_balance() count", "%11u", false, v15);
+DOMAIN_FIELD(__u32, alb_failed,
+	     "active_load_balance() move task failed", "%11u", false, v15);
+DOMAIN_FIELD(__u32, alb_pushed,
+	     "active_load_balance() successfully moved a task", "%11u", false, v15);
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category sched_balance_exec()> ");
+#endif
+DOMAIN_FIELD(__u32, sbe_count,
+	     "sbe_count is not used", "%11u", false, v15);
+DOMAIN_FIELD(__u32, sbe_balanced,
+	     "sbe_balanced is not used", "%11u", false, v15);
+DOMAIN_FIELD(__u32, sbe_pushed,
+	     "sbe_pushed is not used", "%11u", false, v15);
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category sched_balance_fork()> ");
+#endif
+DOMAIN_FIELD(__u32, sbf_count,
+	     "sbf_count is not used", "%11u", false, v15);
+DOMAIN_FIELD(__u32, sbf_balanced,
+	     "sbf_balanced is not used", "%11u", false, v15);
+DOMAIN_FIELD(__u32, sbf_pushed,
+	     "sbf_pushed is not used", "%11u", false, v15);
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Wakeup Info> ");
+#endif
+DOMAIN_FIELD(__u32, ttwu_wake_remote,
+	     "try_to_wake_up() awoke a task that last ran on a diff cpu", "%11u", false, v15);
+DOMAIN_FIELD(__u32, ttwu_move_affine,
+	     "try_to_wake_up() moved task because cache-cold on own cpu", "%11u", false, v15);
+DOMAIN_FIELD(__u32, ttwu_move_balance,
+	     "try_to_wake_up() started passive balancing", "%11u", false, v15);
+#endif /* DOMAIN_FIELD */
diff --git a/tools/lib/perf/include/perf/schedstat-v16.h b/tools/lib/perf/include/perf/schedstat-v16.h
new file mode 100644
index 000000000000..3462b79c29af
--- /dev/null
+++ b/tools/lib/perf/include/perf/schedstat-v16.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef CPU_FIELD
+CPU_FIELD(__u32, yld_count, "sched_yield() count",
+	  "%11u", false, yld_count, v16);
+CPU_FIELD(__u32, array_exp, "Legacy counter can be ignored",
+	  "%11u", false, array_exp, v16);
+CPU_FIELD(__u32, sched_count, "schedule() called",
+	  "%11u", false, sched_count, v16);
+CPU_FIELD(__u32, sched_goidle, "schedule() left the processor idle",
+	  "%11u", true, sched_count, v16);
+CPU_FIELD(__u32, ttwu_count, "try_to_wake_up() was called",
+	  "%11u", false, ttwu_count, v16);
+CPU_FIELD(__u32, ttwu_local, "try_to_wake_up() was called to wake up the local cpu",
+	  "%11u", true, ttwu_count, v16);
+CPU_FIELD(__u64, rq_cpu_time, "total runtime by tasks on this processor (in jiffies)",
+	  "%11llu", false, rq_cpu_time, v16);
+CPU_FIELD(__u64, run_delay, "total waittime by tasks on this processor (in jiffies)",
+	  "%11llu", true, rq_cpu_time, v16);
+CPU_FIELD(__u64, pcount, "total timeslices run on this cpu",
+	  "%11llu", false, pcount, v16);
+#endif /* CPU_FIELD */
+
+#ifdef DOMAIN_FIELD
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category busy> ");
+#endif
+DOMAIN_FIELD(__u32, busy_lb_count,
+	     "load_balance() count on cpu busy", "%11u", true, v16);
+DOMAIN_FIELD(__u32, busy_lb_balanced,
+	     "load_balance() found balanced on cpu busy", "%11u", true, v16);
+DOMAIN_FIELD(__u32, busy_lb_failed,
+	     "load_balance() move task failed on cpu busy", "%11u", true, v16);
+DOMAIN_FIELD(__u32, busy_lb_imbalance,
+	     "imbalance sum on cpu busy", "%11u", false, v16);
+DOMAIN_FIELD(__u32, busy_lb_gained,
+	     "pull_task() count on cpu busy", "%11u", false, v16);
+DOMAIN_FIELD(__u32, busy_lb_hot_gained,
+	     "pull_task() when target task was cache-hot on cpu busy", "%11u", false, v16);
+DOMAIN_FIELD(__u32, busy_lb_nobusyq,
+	     "load_balance() failed to find busier queue on cpu busy", "%11u", true, v16);
+DOMAIN_FIELD(__u32, busy_lb_nobusyg,
+	     "load_balance() failed to find busier group on cpu busy", "%11u", true, v16);
+#ifdef DERIVED_CNT_FIELD
+DERIVED_CNT_FIELD(busy_lb_success_count, "load_balance() success count on cpu busy", "%11u",
+		  busy_lb_count, busy_lb_balanced, busy_lb_failed, v16);
+#endif
+#ifdef DERIVED_AVG_FIELD
+DERIVED_AVG_FIELD(busy_lb_avg_pulled,
+		  "avg task pulled per successful lb attempt (cpu busy)", "%11.2Lf",
+		  busy_lb_count, busy_lb_balanced, busy_lb_failed, busy_lb_gained, v16);
+#endif
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category idle> ");
+#endif
+DOMAIN_FIELD(__u32, idle_lb_count,
+	     "load_balance() count on cpu idle", "%11u", true, v16);
+DOMAIN_FIELD(__u32, idle_lb_balanced,
+	     "load_balance() found balanced on cpu idle", "%11u", true, v16);
+DOMAIN_FIELD(__u32, idle_lb_failed,
+	     "load_balance() move task failed on cpu idle", "%11u", true, v16);
+DOMAIN_FIELD(__u32, idle_lb_imbalance,
+	     "imbalance sum on cpu idle", "%11u", false, v16);
+DOMAIN_FIELD(__u32, idle_lb_gained,
+	     "pull_task() count on cpu idle", "%11u", false, v16);
+DOMAIN_FIELD(__u32, idle_lb_hot_gained,
+	     "pull_task() when target task was cache-hot on cpu idle", "%11u", false, v16);
+DOMAIN_FIELD(__u32, idle_lb_nobusyq,
+	     "load_balance() failed to find busier queue on cpu idle", "%11u", true, v16);
+DOMAIN_FIELD(__u32, idle_lb_nobusyg,
+	     "load_balance() failed to find busier group on cpu idle", "%11u", true, v16);
+#ifdef DERIVED_CNT_FIELD
+DERIVED_CNT_FIELD(idle_lb_success_count, "load_balance() success count on cpu idle", "%11u",
+		  idle_lb_count, idle_lb_balanced, idle_lb_failed, v16);
+#endif
+#ifdef DERIVED_AVG_FIELD
+DERIVED_AVG_FIELD(idle_lb_avg_pulled,
+		  "avg task pulled per successful lb attempt (cpu idle)", "%11.2Lf",
+		  idle_lb_count, idle_lb_balanced, idle_lb_failed, idle_lb_gained, v16);
+#endif
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category newidle> ");
+#endif
+DOMAIN_FIELD(__u32, newidle_lb_count,
+	     "load_balance() count on cpu newly idle", "%11u", true, v16);
+DOMAIN_FIELD(__u32, newidle_lb_balanced,
+	     "load_balance() found balanced on cpu newly idle", "%11u", true, v16);
+DOMAIN_FIELD(__u32, newidle_lb_failed,
+	     "load_balance() move task failed on cpu newly idle", "%11u", true, v16);
+DOMAIN_FIELD(__u32, newidle_lb_imbalance,
+	     "imbalance sum on cpu newly idle", "%11u", false, v16);
+DOMAIN_FIELD(__u32, newidle_lb_gained,
+	     "pull_task() count on cpu newly idle", "%11u", false, v16);
+DOMAIN_FIELD(__u32, newidle_lb_hot_gained,
+	     "pull_task() when target task was cache-hot on cpu newly idle", "%11u", false, v16);
+DOMAIN_FIELD(__u32, newidle_lb_nobusyq,
+	     "load_balance() failed to find busier queue on cpu newly idle", "%11u", true, v16);
+DOMAIN_FIELD(__u32, newidle_lb_nobusyg,
+	     "load_balance() failed to find busier group on cpu newly idle", "%11u", true, v16);
+#ifdef DERIVED_CNT_FIELD
+DERIVED_CNT_FIELD(newidle_lb_success_count,
+		  "load_balance() success count on cpu newly idle", "%11u",
+		  newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, v16);
+#endif
+#ifdef DERIVED_AVG_FIELD
+DERIVED_AVG_FIELD(newidle_lb_avg_count,
+		  "avg task pulled per successful lb attempt (cpu newly idle)", "%11.2Lf",
+		  newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, newidle_lb_gained, v16);
+#endif
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category active_load_balance()> ");
+#endif
+DOMAIN_FIELD(__u32, alb_count,
+	     "active_load_balance() count", "%11u", false, v16);
+DOMAIN_FIELD(__u32, alb_failed,
+	     "active_load_balance() move task failed", "%11u", false, v16);
+DOMAIN_FIELD(__u32, alb_pushed,
+	     "active_load_balance() successfully moved a task", "%11u", false, v16);
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category sched_balance_exec()> ");
+#endif
+DOMAIN_FIELD(__u32, sbe_count,
+	     "sbe_count is not used", "%11u", false, v16);
+DOMAIN_FIELD(__u32, sbe_balanced,
+	     "sbe_balanced is not used", "%11u", false, v16);
+DOMAIN_FIELD(__u32, sbe_pushed,
+	     "sbe_pushed is not used", "%11u", false, v16);
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category sched_balance_fork()> ");
+#endif
+DOMAIN_FIELD(__u32, sbf_count,
+	     "sbf_count is not used", "%11u", false, v16);
+DOMAIN_FIELD(__u32, sbf_balanced,
+	     "sbf_balanced is not used", "%11u", false, v16);
+DOMAIN_FIELD(__u32, sbf_pushed,
+	     "sbf_pushed is not used", "%11u", false, v16);
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Wakeup Info> ");
+#endif
+DOMAIN_FIELD(__u32, ttwu_wake_remote,
+	     "try_to_wake_up() awoke a task that last ran on a diff cpu", "%11u", false, v16);
+DOMAIN_FIELD(__u32, ttwu_move_affine,
+	     "try_to_wake_up() moved task because cache-cold on own cpu", "%11u", false, v16);
+DOMAIN_FIELD(__u32, ttwu_move_balance,
+	     "try_to_wake_up() started passive balancing", "%11u", false, v16);
+#endif /* DOMAIN_FIELD */
diff --git a/tools/lib/perf/include/perf/schedstat-v17.h b/tools/lib/perf/include/perf/schedstat-v17.h
new file mode 100644
index 000000000000..865dc7c1039c
--- /dev/null
+++ b/tools/lib/perf/include/perf/schedstat-v17.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef CPU_FIELD
+CPU_FIELD(__u32, yld_count, "sched_yield() count",
+	  "%11u", false, yld_count, v17);
+CPU_FIELD(__u32, array_exp, "Legacy counter can be ignored",
+	  "%11u", false, array_exp, v17);
+CPU_FIELD(__u32, sched_count, "schedule() called",
+	  "%11u", false, sched_count, v17);
+CPU_FIELD(__u32, sched_goidle, "schedule() left the processor idle",
+	  "%11u", true, sched_count, v17);
+CPU_FIELD(__u32, ttwu_count, "try_to_wake_up() was called",
+	  "%11u", false, ttwu_count, v17);
+CPU_FIELD(__u32, ttwu_local, "try_to_wake_up() was called to wake up the local cpu",
+	  "%11u", true, ttwu_count, v17);
+CPU_FIELD(__u64, rq_cpu_time, "total runtime by tasks on this processor (in jiffies)",
+	  "%11llu", false, rq_cpu_time, v17);
+CPU_FIELD(__u64, run_delay, "total waittime by tasks on this processor (in jiffies)",
+	  "%11llu", true, rq_cpu_time, v17);
+CPU_FIELD(__u64, pcount, "total timeslices run on this cpu",
+	  "%11llu", false, pcount, v17);
+#endif /* CPU_FIELD */
+
+#ifdef DOMAIN_FIELD
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category busy> ");
+#endif
+DOMAIN_FIELD(__u32, busy_lb_count,
+	     "load_balance() count on cpu busy", "%11u", true, v17);
+DOMAIN_FIELD(__u32, busy_lb_balanced,
+	     "load_balance() found balanced on cpu busy", "%11u", true, v17);
+DOMAIN_FIELD(__u32, busy_lb_failed,
+	     "load_balance() move task failed on cpu busy", "%11u", true, v17);
+DOMAIN_FIELD(__u32, busy_lb_imbalance_load,
+	     "imbalance in load on cpu busy", "%11u", false, v17);
+DOMAIN_FIELD(__u32, busy_lb_imbalance_util,
+	     "imbalance in utilization on cpu busy", "%11u", false, v17);
+DOMAIN_FIELD(__u32, busy_lb_imbalance_task,
+	     "imbalance in number of tasks on cpu busy", "%11u", false, v17);
+DOMAIN_FIELD(__u32, busy_lb_imbalance_misfit,
+	     "imbalance in misfit tasks on cpu busy", "%11u", false, v17);
+DOMAIN_FIELD(__u32, busy_lb_gained,
+	     "pull_task() count on cpu busy", "%11u", false, v17);
+DOMAIN_FIELD(__u32, busy_lb_hot_gained,
+	     "pull_task() when target task was cache-hot on cpu busy", "%11u", false, v17);
+DOMAIN_FIELD(__u32, busy_lb_nobusyq,
+	     "load_balance() failed to find busier queue on cpu busy", "%11u", true, v17);
+DOMAIN_FIELD(__u32, busy_lb_nobusyg,
+	     "load_balance() failed to find busier group on cpu busy", "%11u", true, v17);
+#ifdef DERIVED_CNT_FIELD
+DERIVED_CNT_FIELD(busy_lb_success_count, "load_balance() success count on cpu busy", "%11u",
+		  busy_lb_count, busy_lb_balanced, busy_lb_failed, v17);
+#endif
+#ifdef DERIVED_AVG_FIELD
+DERIVED_AVG_FIELD(busy_lb_avg_pulled,
+		  "avg task pulled per successful lb attempt (cpu busy)", "%11.2Lf",
+		  busy_lb_count, busy_lb_balanced, busy_lb_failed, busy_lb_gained, v17);
+#endif
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category idle> ");
+#endif
+DOMAIN_FIELD(__u32, idle_lb_count,
+	     "load_balance() count on cpu idle", "%11u", true, v17);
+DOMAIN_FIELD(__u32, idle_lb_balanced,
+	     "load_balance() found balanced on cpu idle", "%11u", true, v17);
+DOMAIN_FIELD(__u32, idle_lb_failed,
+	     "load_balance() move task failed on cpu idle", "%11u", true, v17);
+DOMAIN_FIELD(__u32, idle_lb_imbalance_load,
+	     "imbalance in load on cpu idle", "%11u", false, v17);
+DOMAIN_FIELD(__u32, idle_lb_imbalance_util,
+	     "imbalance in utilization on cpu idle", "%11u", false, v17);
+DOMAIN_FIELD(__u32, idle_lb_imbalance_task,
+	     "imbalance in number of tasks on cpu idle", "%11u", false, v17);
+DOMAIN_FIELD(__u32, idle_lb_imbalance_misfit,
+	     "imbalance in misfit tasks on cpu idle", "%11u", false, v17);
+DOMAIN_FIELD(__u32, idle_lb_gained,
+	     "pull_task() count on cpu idle", "%11u", false, v17);
+DOMAIN_FIELD(__u32, idle_lb_hot_gained,
+	     "pull_task() when target task was cache-hot on cpu idle", "%11u", false, v17);
+DOMAIN_FIELD(__u32, idle_lb_nobusyq,
+	     "load_balance() failed to find busier queue on cpu idle", "%11u", true, v17);
+DOMAIN_FIELD(__u32, idle_lb_nobusyg,
+	     "load_balance() failed to find busier group on cpu idle", "%11u", true, v17);
+#ifdef DERIVED_CNT_FIELD
+DERIVED_CNT_FIELD(idle_lb_success_count, "load_balance() success count on cpu idle", "%11u",
+		  idle_lb_count, idle_lb_balanced, idle_lb_failed, v17);
+#endif
+#ifdef DERIVED_AVG_FIELD
+DERIVED_AVG_FIELD(idle_lb_avg_pulled,
+		  "avg task pulled per successful lb attempt (cpu idle)", "%11.2Lf",
+		  idle_lb_count, idle_lb_balanced, idle_lb_failed, idle_lb_gained, v17);
+#endif
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category newidle> ");
+#endif
+DOMAIN_FIELD(__u32, newidle_lb_count,
+	     "load_balance() count on cpu newly idle", "%11u", true, v17);
+DOMAIN_FIELD(__u32, newidle_lb_balanced,
+	     "load_balance() found balanced on cpu newly idle", "%11u", true, v17);
+DOMAIN_FIELD(__u32, newidle_lb_failed,
+	     "load_balance() move task failed on cpu newly idle", "%11u", true, v17);
+DOMAIN_FIELD(__u32, newidle_lb_imbalance_load,
+	     "imbalance in load on cpu newly idle", "%11u", false, v17);
+DOMAIN_FIELD(__u32, newidle_lb_imbalance_util,
+	     "imbalance in utilization on cpu newly idle", "%11u", false, v17);
+DOMAIN_FIELD(__u32, newidle_lb_imbalance_task,
+	     "imbalance in number of tasks on cpu newly idle", "%11u", false, v17);
+DOMAIN_FIELD(__u32, newidle_lb_imbalance_misfit,
+	     "imbalance in misfit tasks on cpu newly idle", "%11u", false, v17);
+DOMAIN_FIELD(__u32, newidle_lb_gained,
+	     "pull_task() count on cpu newly idle", "%11u", false, v17);
+DOMAIN_FIELD(__u32, newidle_lb_hot_gained,
+	     "pull_task() when target task was cache-hot on cpu newly idle", "%11u", false, v17);
+DOMAIN_FIELD(__u32, newidle_lb_nobusyq,
+	     "load_balance() failed to find busier queue on cpu newly idle", "%11u", true, v17);
+DOMAIN_FIELD(__u32, newidle_lb_nobusyg,
+	     "load_balance() failed to find busier group on cpu newly idle", "%11u", true, v17);
+#ifdef DERIVED_CNT_FIELD
+DERIVED_CNT_FIELD(newidle_lb_success_count,
+		  "load_balance() success count on cpu newly idle", "%11u",
+		  newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, v17);
+#endif
+#ifdef DERIVED_AVG_FIELD
+DERIVED_AVG_FIELD(newidle_lb_avg_pulled,
+		  "avg task pulled per successful lb attempt (cpu newly idle)", "%11.2Lf",
+		  newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, newidle_lb_gained, v17);
+#endif
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category active_load_balance()> ");
+#endif
+DOMAIN_FIELD(__u32, alb_count,
+	     "active_load_balance() count", "%11u", false, v17);
+DOMAIN_FIELD(__u32, alb_failed,
+	     "active_load_balance() move task failed", "%11u", false, v17);
+DOMAIN_FIELD(__u32, alb_pushed,
+	     "active_load_balance() successfully moved a task", "%11u", false, v17);
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category sched_balance_exec()> ");
+#endif
+DOMAIN_FIELD(__u32, sbe_count,
+	     "sbe_count is not used", "%11u", false, v17);
+DOMAIN_FIELD(__u32, sbe_balanced,
+	     "sbe_balanced is not used", "%11u", false, v17);
+DOMAIN_FIELD(__u32, sbe_pushed,
+	     "sbe_pushed is not used", "%11u", false, v17);
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Category sched_balance_fork()> ");
+#endif
+DOMAIN_FIELD(__u32, sbf_count,
+	     "sbf_count is not used", "%11u", false, v17);
+DOMAIN_FIELD(__u32, sbf_balanced,
+	     "sbf_balanced is not used", "%11u", false, v17);
+DOMAIN_FIELD(__u32, sbf_pushed,
+	     "sbf_pushed is not used", "%11u", false, v17);
+#ifdef DOMAIN_CATEGORY
+DOMAIN_CATEGORY(" <Wakeup Info> ");
+#endif
+DOMAIN_FIELD(__u32, ttwu_wake_remote,
+	     "try_to_wake_up() awoke a task that last ran on a diff cpu", "%11u", false, v17);
+DOMAIN_FIELD(__u32, ttwu_move_affine,
+	     "try_to_wake_up() moved task because cache-cold on own cpu", "%11u", false, v17);
+DOMAIN_FIELD(__u32, ttwu_move_balance,
+	     "try_to_wake_up() started passive balancing", "%11u", false, v17);
+#endif /* DOMAIN_FIELD */
diff --git a/tools/lib/perf/include/perf/threadmap.h b/tools/lib/perf/include/perf/threadmap.h
index 8b40e7777cea..44deb815b817 100644
--- a/tools/lib/perf/include/perf/threadmap.h
+++ b/tools/lib/perf/include/perf/threadmap.h
@@ -14,6 +14,7 @@ LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int idx,
 LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int idx);
 LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads);
 LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx);
+LIBPERF_API int perf_thread_map__idx(struct perf_thread_map *map, pid_t pid);
 
 LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);
 LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map);
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index c1a51d925e0e..ec124eb0ec0a 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -508,7 +508,7 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count
 		idx = READ_ONCE(pc->index);
 		cnt = READ_ONCE(pc->offset);
 		if (pc->cap_user_rdpmc && idx) {
-			s64 evcnt = read_perf_counter(idx - 1);
+			u64 evcnt = read_perf_counter(idx - 1);
 			u16 width = READ_ONCE(pc->pmc_width);
 
 			evcnt <<= 64 - width;
diff --git a/tools/lib/perf/threadmap.c b/tools/lib/perf/threadmap.c
index 07968f3ea093..db431b036f57 100644
--- a/tools/lib/perf/threadmap.c
+++ b/tools/lib/perf/threadmap.c
@@ -97,5 +97,22 @@ int perf_thread_map__nr(struct perf_thread_map *threads)
 
 pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx)
 {
+	if (!map) {
+		assert(idx == 0);
+		return -1;
+	}
+
 	return map->map[idx].pid;
 }
+
+int perf_thread_map__idx(struct perf_thread_map *threads, pid_t pid)
+{
+	if (!threads)
+		return pid == -1 ? 0 : -1;
+
+	for (int i = 0; i < threads->nr; ++i) {
+		if (threads->map[i].pid == pid)
+			return i;
+	}
+	return -1;
+}
diff --git a/tools/lib/python/__init__.py b/tools/lib/python/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/lib/python/__init__.py
diff --git a/tools/lib/python/abi/__init__.py b/tools/lib/python/abi/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/lib/python/abi/__init__.py
diff --git a/tools/lib/python/abi/abi_parser.py b/tools/lib/python/abi/abi_parser.py
new file mode 100644
index 000000000000..d7bb20ef3acc
--- /dev/null
+++ b/tools/lib/python/abi/abi_parser.py
@@ -0,0 +1,631 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Parse ABI documentation and produce results from it.
+"""
+
+from argparse import Namespace
+import logging
+import os
+import re
+
+from pprint import pformat
+from random import randrange, seed
+
+# Import Python modules
+
+from abi.helpers import AbiDebug, ABI_DIR
+
+
+class AbiParser:
+    """Main class to parse ABI files."""
+
+    #: Valid tags at Documentation/ABI.
+    TAGS = r"(what|where|date|kernelversion|contact|description|users)"
+
+    #: ABI elements that will auto-generate cross-references.
+    XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
+
+    def __init__(self, directory, logger=None,
+                 enable_lineno=False, show_warnings=True, debug=0):
+        """Stores arguments for the class and initialize class vars."""
+
+        self.directory = directory
+        self.enable_lineno = enable_lineno
+        self.show_warnings = show_warnings
+        self.debug = debug
+
+        if not logger:
+            self.log = logging.getLogger("get_abi")
+        else:
+            self.log = logger
+
+        self.data = {}
+        self.what_symbols = {}
+        self.file_refs = {}
+        self.what_refs = {}
+
+        # Ignore files that contain such suffixes
+        self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")
+
+        # Regular expressions used on parser
+        self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
+        self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
+        self.re_valid = re.compile(self.TAGS)
+        self.re_start_spc = re.compile(r"(\s*)(\S.*)")
+        self.re_whitespace = re.compile(r"^\s+")
+
+        # Regular used on print
+        self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
+        self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
+        self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
+        self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
+        self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
+        self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
+        self.re_xref_node = re.compile(self.XREF)
+
+    def warn(self, fdata, msg, extra=None):
+        """Displays a parse error if warning is enabled."""
+
+        if not self.show_warnings:
+            return
+
+        msg = f"{fdata.fname}:{fdata.ln}: {msg}"
+        if extra:
+            msg += "\n\t\t" + extra
+
+        self.log.warning(msg)
+
+    def add_symbol(self, what, fname, ln=None, xref=None):
+        """Create a reference table describing where each 'what' is located."""
+
+        if what not in self.what_symbols:
+            self.what_symbols[what] = {"file": {}}
+
+        if fname not in self.what_symbols[what]["file"]:
+            self.what_symbols[what]["file"][fname] = []
+
+        if ln and ln not in self.what_symbols[what]["file"][fname]:
+            self.what_symbols[what]["file"][fname].append(ln)
+
+        if xref:
+            self.what_symbols[what]["xref"] = xref
+
+    def _parse_line(self, fdata, line):
+        """Parse a single line of an ABI file."""
+
+        new_what = False
+        new_tag = False
+        content = None
+
+        match = self.re_tag.match(line)
+        if match:
+            new = match.group(1).lower()
+            sep = match.group(2)
+            content = match.group(3)
+
+            match = self.re_valid.search(new)
+            if match:
+                new_tag = match.group(1)
+            else:
+                if fdata.tag == "description":
+                    # New "tag" is actually part of description.
+                    # Don't consider it a tag
+                    new_tag = False
+                elif fdata.tag != "":
+                    self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
+
+        if new_tag:
+            # "where" is Invalid, but was a common mistake. Warn if found
+            if new_tag == "where":
+                self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
+                new_tag = "what"
+
+            if new_tag == "what":
+                fdata.space = None
+
+                if content not in self.what_symbols:
+                    self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
+
+                if fdata.tag == "what":
+                    fdata.what.append(content.strip("\n"))
+                else:
+                    if fdata.key:
+                        if "description" not in self.data.get(fdata.key, {}):
+                            self.warn(fdata, f"{fdata.key} doesn't have a description")
+
+                        for w in fdata.what:
+                            self.add_symbol(what=w, fname=fdata.fname,
+                                            ln=fdata.what_ln, xref=fdata.key)
+
+                    fdata.label = content
+                    new_what = True
+
+                    key = "abi_" + content.lower()
+                    fdata.key = self.re_unprintable.sub("_", key).strip("_")
+
+                    # Avoid duplicated keys but using a defined seed, to make
+                    # the namespace identical if there aren't changes at the
+                    # ABI symbols
+                    seed(42)
+
+                    while fdata.key in self.data:
+                        char = randrange(0, 51) + ord("A")
+                        if char > ord("Z"):
+                            char += ord("a") - ord("Z") - 1
+
+                        fdata.key += chr(char)
+
+                    if fdata.key and fdata.key not in self.data:
+                        self.data[fdata.key] = {
+                            "what": [content],
+                            "file": [fdata.file_ref],
+                            "path": fdata.ftype,
+                            "line_no": fdata.ln,
+                        }
+
+                    fdata.what = self.data[fdata.key]["what"]
+
+                self.what_refs[content] = fdata.key
+                fdata.tag = new_tag
+                fdata.what_ln = fdata.ln
+
+                if fdata.nametag["what"]:
+                    t = (content, fdata.key)
+                    if t not in fdata.nametag["symbols"]:
+                        fdata.nametag["symbols"].append(t)
+
+                return
+
+            if fdata.tag and new_tag:
+                fdata.tag = new_tag
+
+                if new_what:
+                    fdata.label = ""
+
+                    if "description" in self.data[fdata.key]:
+                        self.data[fdata.key]["description"] += "\n\n"
+
+                    if fdata.file_ref not in self.data[fdata.key]["file"]:
+                        self.data[fdata.key]["file"].append(fdata.file_ref)
+
+                    if self.debug == AbiDebug.WHAT_PARSING:
+                        self.log.debug("what: %s", fdata.what)
+
+                if not fdata.what:
+                    self.warn(fdata, "'What:' should come first:", line)
+                    return
+
+                if new_tag == "description":
+                    fdata.space = None
+
+                    if content:
+                        sep = sep.replace(":", " ")
+
+                        c = " " * len(new_tag) + sep + content
+                        c = c.expandtabs()
+
+                        match = self.re_start_spc.match(c)
+                        if match:
+                            # Preserve initial spaces for the first line
+                            fdata.space = match.group(1)
+                            content = match.group(2) + "\n"
+
+                self.data[fdata.key][fdata.tag] = content
+
+            return
+
+        # Store any contents before tags at the database
+        if not fdata.tag and "what" in fdata.nametag:
+            fdata.nametag["description"] += line
+            return
+
+        if fdata.tag == "description":
+            content = line.expandtabs()
+
+            if self.re_whitespace.sub("", content) == "":
+                self.data[fdata.key][fdata.tag] += "\n"
+                return
+
+            if fdata.space is None:
+                match = self.re_start_spc.match(content)
+                if match:
+                    # Preserve initial spaces for the first line
+                    fdata.space = match.group(1)
+
+                    content = match.group(2) + "\n"
+            else:
+                if content.startswith(fdata.space):
+                    content = content[len(fdata.space):]
+
+                else:
+                    fdata.space = ""
+
+            if fdata.tag == "what":
+                w = content.strip("\n")
+                if w:
+                    self.data[fdata.key][fdata.tag].append(w)
+            else:
+                self.data[fdata.key][fdata.tag] += content
+            return
+
+        content = line.strip()
+        if fdata.tag:
+            if fdata.tag == "what":
+                w = content.strip("\n")
+                if w:
+                    self.data[fdata.key][fdata.tag].append(w)
+            else:
+                self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
+            return
+
+        # Everything else is error
+        if content:
+            self.warn(fdata, "Unexpected content", line)
+
+    def parse_readme(self, nametag, fname):
+        """Parse ABI README file."""
+
+        nametag["what"] = ["Introduction"]
+        nametag["path"] = "README"
+        with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
+            for line in fp:
+                match = self.re_tag.match(line)
+                if match:
+                    new = match.group(1).lower()
+
+                    match = self.re_valid.search(new)
+                    if match:
+                        nametag["description"] += "\n:" + line
+                        continue
+
+                nametag["description"] += line
+
+    def parse_file(self, fname, path, basename):
+        """Parse a single file."""
+
+        ref = f"abi_file_{path}_{basename}"
+        ref = self.re_unprintable.sub("_", ref).strip("_")
+
+        # Store per-file state into a namespace variable. This will be used
+        # by the per-line parser state machine and by the warning function.
+        fdata = Namespace
+
+        fdata.fname = fname
+        fdata.name = basename
+
+        pos = fname.find(ABI_DIR)
+        if pos > 0:
+            f = fname[pos:]
+        else:
+            f = fname
+
+        fdata.file_ref = (f, ref)
+        self.file_refs[f] = ref
+
+        fdata.ln = 0
+        fdata.what_ln = 0
+        fdata.tag = ""
+        fdata.label = ""
+        fdata.what = []
+        fdata.key = None
+        fdata.xrefs = None
+        fdata.space = None
+        fdata.ftype = path.split("/")[0]
+
+        fdata.nametag = {}
+        fdata.nametag["what"] = [f"ABI file {path}/{basename}"]
+        fdata.nametag["type"] = "File"
+        fdata.nametag["path"] = fdata.ftype
+        fdata.nametag["file"] = [fdata.file_ref]
+        fdata.nametag["line_no"] = 1
+        fdata.nametag["description"] = ""
+        fdata.nametag["symbols"] = []
+
+        self.data[ref] = fdata.nametag
+
+        if self.debug & AbiDebug.WHAT_OPEN:
+            self.log.debug("Opening file %s", fname)
+
+        if basename == "README":
+            self.parse_readme(fdata.nametag, fname)
+            return
+
+        with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
+            for line in fp:
+                fdata.ln += 1
+
+                self._parse_line(fdata, line)
+
+            if "description" in fdata.nametag:
+                fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
+
+            if fdata.key:
+                if "description" not in self.data.get(fdata.key, {}):
+                    self.warn(fdata, f"{fdata.key} doesn't have a description")
+
+                for w in fdata.what:
+                    self.add_symbol(what=w, fname=fname, xref=fdata.key)
+
+    def _parse_abi(self, root=None):
+        """Internal function to parse documentation ABI recursively."""
+
+        if not root:
+            root = self.directory
+
+        with os.scandir(root) as obj:
+            for entry in obj:
+                name = os.path.join(root, entry.name)
+
+                if entry.is_dir():
+                    self._parse_abi(name)
+                    continue
+
+                if not entry.is_file():
+                    continue
+
+                basename = os.path.basename(name)
+
+                if basename.startswith("."):
+                    continue
+
+                if basename.endswith(self.ignore_suffixes):
+                    continue
+
+                path = self.re_abi_dir.sub("", os.path.dirname(name))
+
+                self.parse_file(name, path, basename)
+
+    def parse_abi(self, root=None):
+        """Parse documentation ABI."""
+
+        self._parse_abi(root)
+
+        if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
+            self.log.debug(pformat(self.data))
+
+    def desc_txt(self, desc):
+        """Print description as found inside ABI files."""
+
+        desc = desc.strip(" \t\n")
+
+        return desc + "\n\n"
+
+    def xref(self, fname):
+        """
+        Converts a Documentation/ABI + basename into a ReST cross-reference.
+        """
+
+        xref = self.file_refs.get(fname)
+        if not xref:
+            return None
+        else:
+            return xref
+
+    def desc_rst(self, desc):
+        """Enrich ReST output by creating cross-references."""
+
+        # Remove title markups from the description
+        # Having titles inside ABI files will only work if extra
+        # care would be taken in order to strictly follow the same
+        # level order for each markup.
+        desc = self.re_title_mark.sub("\n\n", "\n" + desc)
+        desc = desc.rstrip(" \t\n").lstrip("\n")
+
+        # Python's regex performance for non-compiled expressions is a lot
+        # than Perl, as Perl automatically caches them at their
+        # first usage. Here, we'll need to do the same, as otherwise the
+        # performance penalty is be high
+
+        new_desc = ""
+        for d in desc.split("\n"):
+            if d == "":
+                new_desc += "\n"
+                continue
+
+            # Use cross-references for doc files where needed
+            d = self.re_doc.sub(r":doc:`/\1`", d)
+
+            # Use cross-references for ABI generated docs where needed
+            matches = self.re_abi.findall(d)
+            for m in matches:
+                abi = m[0] + m[1]
+
+                xref = self.file_refs.get(abi)
+                if not xref:
+                    # This may happen if ABI is on a separate directory,
+                    # like parsing ABI testing and symbol is at stable.
+                    # The proper solution is to move this part of the code
+                    # for it to be inside sphinx/kernel_abi.py
+                    self.log.info("Didn't find ABI reference for '%s'", abi)
+                else:
+                    new = self.re_escape.sub(r"\\\1", m[1])
+                    d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
+
+            # Seek for cross reference symbols like /sys/...
+            # Need to be careful to avoid doing it on a code block
+            if d[0] not in [" ", "\t"]:
+                matches = self.re_xref_node.findall(d)
+                for m in matches:
+                    # Finding ABI here is more complex due to wildcards
+                    xref = self.what_refs.get(m)
+                    if xref:
+                        new = self.re_escape.sub(r"\\\1", m)
+                        d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
+
+            new_desc += d + "\n"
+
+        return new_desc + "\n\n"
+
+    def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
+            filter_path=None):
+        """Print ABI at stdout."""
+
+        part = None
+        for key, v in sorted(self.data.items(),
+                             key=lambda x: (x[1].get("type", ""),
+                                            x[1].get("what"))):
+
+            wtype = v.get("type", "Symbol")
+            file_ref = v.get("file")
+            names = v.get("what", [""])
+
+            if wtype == "File":
+                if not show_file:
+                    continue
+            else:
+                if not show_symbols:
+                    continue
+
+            if filter_path:
+                if v.get("path") != filter_path:
+                    continue
+
+            msg = ""
+
+            if wtype != "File":
+                cur_part = names[0]
+                if cur_part.find("/") >= 0:
+                    match = self.re_what.match(cur_part)
+                    if match:
+                        symbol = match.group(1).rstrip("/")
+                        cur_part = "Symbols under " + symbol
+
+                if cur_part and cur_part != part:
+                    part = cur_part
+                    msg += part + "\n"+ "-" * len(part) +"\n\n"
+
+                msg += f".. _{key}:\n\n"
+
+                max_len = 0
+                for i in range(0, len(names)):           # pylint: disable=C0200
+                    names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
+
+                    max_len = max(max_len, len(names[i]))
+
+                msg += "+-" + "-" * max_len + "-+\n"
+                for name in names:
+                    msg += f"| {name}" + " " * (max_len - len(name)) + " |\n"
+                    msg += "+-" + "-" * max_len + "-+\n"
+                msg += "\n"
+
+            for ref in file_ref:
+                if wtype == "File":
+                    msg += f".. _{ref[1]}:\n\n"
+                else:
+                    base = os.path.basename(ref[0])
+                    msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"
+
+            if wtype == "File":
+                msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n"
+
+            desc = v.get("description")
+            if not desc and wtype != "File":
+                msg += f"DESCRIPTION MISSING for {names[0]}\n\n"
+
+            if desc:
+                if output_in_txt:
+                    msg += self.desc_txt(desc)
+                else:
+                    msg += self.desc_rst(desc)
+
+            symbols = v.get("symbols")
+            if symbols:
+                msg += "Has the following ABI:\n\n"
+
+                for w, label in symbols:
+                    # Escape special chars from content
+                    content = self.re_escape.sub(r"\\\1", w)
+
+                    msg += f"- :ref:`{content} <{label}>`\n\n"
+
+            users = v.get("users")
+            if users and users.strip(" \t\n"):
+                users = users.strip("\n").replace('\n', '\n\t')
+                msg += f"Users:\n\t{users}\n\n"
+
+            ln = v.get("line_no", 1)
+
+            yield (msg, file_ref[0][0], ln)
+
+    def check_issues(self):
+        """Warn about duplicated ABI entries."""
+
+        for what, v in self.what_symbols.items():
+            files = v.get("file")
+            if not files:
+                # Should never happen if the parser works properly
+                self.log.warning("%s doesn't have a file associated", what)
+                continue
+
+            if len(files) == 1:
+                continue
+
+            f = []
+            for fname, lines in sorted(files.items()):
+                if not lines:
+                    f.append(f"{fname}")
+                elif len(lines) == 1:
+                    f.append(f"{fname}:{lines[0]}")
+                else:
+                    m = fname + "lines "
+                    m += ", ".join(str(x) for x in lines)
+                    f.append(m)
+
+            self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))
+
+    def search_symbols(self, expr):
+        """ Searches for ABI symbols."""
+
+        regex = re.compile(expr, re.I)
+
+        found_keys = 0
+        for t in sorted(self.data.items(), key=lambda x: [0]):
+            v = t[1]
+
+            wtype = v.get("type", "")
+            if wtype == "File":
+                continue
+
+            for what in v.get("what", [""]):
+                if regex.search(what):
+                    found_keys += 1
+
+                    kernelversion = v.get("kernelversion", "").strip(" \t\n")
+                    date = v.get("date", "").strip(" \t\n")
+                    contact = v.get("contact", "").strip(" \t\n")
+                    users = v.get("users", "").strip(" \t\n")
+                    desc = v.get("description", "").strip(" \t\n")
+
+                    files = []
+                    for f in v.get("file", ()):
+                        files.append(f[0])
+
+                    what = str(found_keys) + ". " + what
+                    title_tag = "-" * len(what)
+
+                    print(f"\n{what}\n{title_tag}\n")
+
+                    if kernelversion:
+                        print(f"Kernel version:\t\t{kernelversion}")
+
+                    if date:
+                        print(f"Date:\t\t\t{date}")
+
+                    if contact:
+                        print(f"Contact:\t\t{contact}")
+
+                    if users:
+                        print(f"Users:\t\t\t{users}")
+
+                    print("Defined on file(s):\t" + ", ".join(files))
+
+                    if desc:
+                        desc = desc.strip("\n")
+                        print(f"\n{desc}\n")
+
+        if not found_keys:
+            print(f"Regular expression /{expr}/ not found.")
diff --git a/tools/lib/python/abi/abi_regex.py b/tools/lib/python/abi/abi_regex.py
new file mode 100644
index 000000000000..d0c5e3ede6b5
--- /dev/null
+++ b/tools/lib/python/abi/abi_regex.py
@@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+# xxpylint: disable=R0903
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Convert ABI what into regular expressions
+"""
+
+import re
+import sys
+
+from pprint import pformat
+
+from abi.abi_parser import AbiParser
+from abi.helpers import AbiDebug
+
+class AbiRegex(AbiParser):
+    """
+    Extends AbiParser to search ABI nodes with regular expressions.
+
+    There some optimizations here to allow a quick symbol search:
+    instead of trying to place all symbols altogether an doing linear
+    search which is very time consuming, create a tree with one depth,
+    grouping similar symbols altogether.
+
+    Yet, sometimes a full search will be needed, so we have a special branch
+    on such group tree where other symbols are placed.
+    """
+
+    #: Escape only ASCII visible characters.
+    escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])"
+
+    #: Special group for other nodes.
+    leave_others = "others"
+
+    # Tuples with regular expressions to be compiled and replacement data
+    re_whats = [
+        # Drop escape characters that might exist
+        (re.compile("\\\\"), ""),
+
+        # Temporarily escape dot characters
+        (re.compile(r"\."),  "\xf6"),
+
+        # Temporarily change [0-9]+ type of patterns
+        (re.compile(r"\[0\-9\]\+"),  "\xff"),
+
+        # Temporarily change [\d+-\d+] type of patterns
+        (re.compile(r"\[0\-\d+\]"),  "\xff"),
+        (re.compile(r"\[0:\d+\]"),  "\xff"),
+        (re.compile(r"\[(\d+)\]"),  "\xf4\\\\d+\xf5"),
+
+        # Temporarily change [0-9] type of patterns
+        (re.compile(r"\[(\d)\-(\d)\]"),  "\xf4\1-\2\xf5"),
+
+        # Handle multiple option patterns
+        (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"),
+
+        # Handle wildcards
+        (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"),
+        (re.compile(r"/\*/"), "/.*/"),
+        (re.compile(r"/\xf6\xf6\xf6"), "/.*"),
+        (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"),
+        (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"),
+        (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"),
+
+        (re.compile(r"XX+"), "\\\\w\xf7"),
+        (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"),
+        (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"),
+        (re.compile(r"_[AB]_"), "_\\\\w\xf7_"),
+
+        # Recover [0-9] type of patterns
+        (re.compile(r"\xf4"), "["),
+        (re.compile(r"\xf5"),  "]"),
+
+        # Remove duplicated spaces
+        (re.compile(r"\s+"), r" "),
+
+        # Special case: drop comparison as in:
+        # What: foo = <something>
+        # (this happens on a few IIO definitions)
+        (re.compile(r"\s*\=.*$"), ""),
+
+        # Escape all other symbols
+        (re.compile(escape_symbols), r"\\\1"),
+        (re.compile(r"\\\\"), r"\\"),
+        (re.compile(r"\\([\[\]\(\)\|])"), r"\1"),
+        (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"),
+
+        (re.compile(r"\xff"), r"\\d+"),
+
+        # Special case: IIO ABI which a parenthesis.
+        (re.compile(r"sqrt(.*)"), r"sqrt(.*)"),
+
+        # Simplify regexes with multiple .*
+        (re.compile(r"(?:\.\*){2,}"),  ""),
+
+        # Recover dot characters
+        (re.compile(r"\xf6"), "\\."),
+        # Recover plus characters
+        (re.compile(r"\xf7"), "+"),
+    ]
+
+    #: Regex to check if the symbol name has a number on it.
+    re_has_num = re.compile(r"\\d")
+
+    #: Symbol name after escape_chars that are considered a devnode basename.
+    re_symbol_name =  re.compile(r"(\w|\\[\.\-\:])+$")
+
+    #: List of popular group names to be skipped to minimize regex group size
+    #: Use AbiDebug.SUBGROUP_SIZE to detect those.
+    skip_names = set(["devices", "hwmon"])
+
+    def regex_append(self, what, new):
+        """
+        Get a search group for a subset of regular expressions.
+
+        As ABI may have thousands of symbols, using a for to search all
+        regular expressions is at least O(n^2). When there are wildcards,
+        the complexity increases substantially, eventually becoming exponential.
+
+        To avoid spending too much time on them, use a logic to split
+        them into groups. The smaller the group, the better, as it would
+        mean that searches will be confined to a small number of regular
+        expressions.
+
+        The conversion to a regex subset is tricky, as we need something
+        that can be easily obtained from the sysfs symbol and from the
+        regular expression. So, we need to discard nodes that have
+        wildcards.
+
+        If it can't obtain a subgroup, place the regular expression inside
+        a special group (self.leave_others).
+        """
+
+        search_group = None
+
+        for search_group in reversed(new.split("/")):
+            if not search_group or search_group in self.skip_names:
+                continue
+            if self.re_symbol_name.match(search_group):
+                break
+
+        if not search_group:
+            search_group = self.leave_others
+
+        if self.debug & AbiDebug.SUBGROUP_MAP:
+            self.log.debug("%s: mapped as %s", what, search_group)
+
+        try:
+            if search_group not in self.regex_group:
+                self.regex_group[search_group] = []
+
+            self.regex_group[search_group].append(re.compile(new))
+            if self.search_string:
+                if what.find(self.search_string) >= 0:
+                    print(f"What: {what}")
+        except re.PatternError:
+            self.log.warning("Ignoring '%s' as it produced an invalid regex:\n"
+                             "           '%s'", what, new)
+
+    def get_regexes(self, what):
+        """
+        Given an ABI devnode, return a list of all regular expressions that
+        may match it, based on the sub-groups created by regex_append().
+        """
+
+        re_list = []
+
+        patches = what.split("/")
+        patches.reverse()
+        patches.append(self.leave_others)
+
+        for search_group in patches:
+            if search_group in self.regex_group:
+                re_list += self.regex_group[search_group]
+
+        return re_list
+
+    def __init__(self, *args, **kwargs):
+        """
+        Override init method to get verbose argument
+        """
+
+        self.regex_group = None
+        self.search_string = None
+        self.re_string = None
+
+        if "search_string" in kwargs:
+            self.search_string = kwargs.get("search_string")
+            del kwargs["search_string"]
+
+            if self.search_string:
+
+                try:
+                    self.re_string = re.compile(self.search_string)
+                except re.PatternError as e:
+                    msg = f"{self.search_string} is not a valid regular expression"
+                    raise ValueError(msg) from e
+
+        super().__init__(*args, **kwargs)
+
+    def parse_abi(self, *args, **kwargs):
+
+        super().parse_abi(*args, **kwargs)
+
+        self.regex_group = {}
+
+        print("Converting ABI What fields into regexes...", file=sys.stderr)
+
+        for t in sorted(self.data.items(), key=lambda x: x[0]):
+            v = t[1]
+            if v.get("type") == "File":
+                continue
+
+            v["regex"] = []
+
+            for what in v.get("what", []):
+                if not what.startswith("/sys"):
+                    continue
+
+                new = what
+                for r, s in self.re_whats:
+                    try:
+                        new = r.sub(s, new)
+                    except re.PatternError as e:
+                        # Help debugging troubles with new regexes
+                        raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e
+
+                v["regex"].append(new)
+
+                if self.debug & AbiDebug.REGEX:
+                    self.log.debug("%-90s <== %s", new, what)
+
+                # Store regex into a subgroup to speedup searches
+                self.regex_append(what, new)
+
+        if self.debug & AbiDebug.SUBGROUP_DICT:
+            self.log.debug("%s", pformat(self.regex_group))
+
+        if self.debug & AbiDebug.SUBGROUP_SIZE:
+            biggestd_keys = sorted(self.regex_group.keys(),
+                                   key= lambda k: len(self.regex_group[k]),
+                                   reverse=True)
+
+            print("Top regex subgroups:", file=sys.stderr)
+            for k in biggestd_keys[:10]:
+                print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr)
diff --git a/tools/lib/python/abi/helpers.py b/tools/lib/python/abi/helpers.py
new file mode 100644
index 000000000000..2a378d780d3c
--- /dev/null
+++ b/tools/lib/python/abi/helpers.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# pylint: disable=R0903
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Helper classes for ABI parser
+"""
+
+ABI_DIR = "Documentation/ABI/"
+
+
+class AbiDebug:
+    """Debug levels"""
+
+    WHAT_PARSING = 1        #: Enable debug parsing logic.
+    WHAT_OPEN = 2           #: Enable debug messages on file open.
+    DUMP_ABI_STRUCTS = 4    #: Enable debug for ABI parse data.
+    UNDEFINED = 8           #: Enable extra undefined symbol data.
+    REGEX = 16              #: Enable debug for what to regex conversion.
+    SUBGROUP_MAP = 32       #: Enable debug for symbol regex subgroups
+    SUBGROUP_DICT = 64      #: Enable debug for sysfs graph tree variable.
+    SUBGROUP_SIZE = 128     #: Enable debug of search groups.
+    GRAPH = 256             #: Display ref tree graph for undefined symbols.
+
+#: Helper messages for each debug variable
+DEBUG_HELP = """
+1   - enable debug parsing logic
+2   - enable debug messages on file open
+4   - enable debug for ABI parse data
+8   - enable extra debug information to identify troubles
+      with ABI symbols found at the local machine that
+      weren't found on ABI documentation (used only for
+      undefined subcommand)
+16  - enable debug for what to regex conversion
+32  - enable debug for symbol regex subgroups
+64  - enable debug for sysfs graph tree variable
+128 - enable debug of search groups
+256 - enable displaying refrence tree graphs for undefined symbols.
+"""
diff --git a/tools/lib/python/abi/system_symbols.py b/tools/lib/python/abi/system_symbols.py
new file mode 100644
index 000000000000..7bbefd274ea2
--- /dev/null
+++ b/tools/lib/python/abi/system_symbols.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0912,R0914,R0915,R1702
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Parse ABI documentation and produce results from it.
+"""
+
+import os
+import re
+import sys
+
+from concurrent import futures
+from datetime import datetime
+from random import shuffle
+
+from abi.helpers import AbiDebug
+
+class SystemSymbols:
+    """Stores arguments for the class and initialize class vars."""
+
+    def graph_add_file(self, path, link=None):
+        """
+        add a file path to the sysfs graph stored at self.root.
+        """
+
+        if path in self.files:
+            return
+
+        name = ""
+        ref = self.root
+        for edge in path.split("/"):
+            name += edge + "/"
+            if edge not in ref:
+                ref[edge] = {"__name": [name.rstrip("/")]}
+
+            ref = ref[edge]
+
+        if link and link not in ref["__name"]:
+            ref["__name"].append(link.rstrip("/"))
+
+        self.files.add(path)
+
+    def print_graph(self, root_prefix="", root=None, level=0):
+        """Prints a reference tree graph using UTF-8 characters."""
+
+        if not root:
+            root = self.root
+            level = 0
+
+        # Prevent endless traverse
+        if level > 5:
+            return
+
+        if level > 0:
+            prefix = "├──"
+            last_prefix = "└──"
+        else:
+            prefix = ""
+            last_prefix = ""
+
+        items = list(root.items())
+
+        names = root.get("__name", [])
+        for k, edge in items:
+            if k == "__name":
+                continue
+
+            if not k:
+                k = "/"
+
+            if len(names) > 1:
+                k += " links: " + ",".join(names[1:])
+
+            if edge == items[-1][1]:
+                print(root_prefix + last_prefix + k)
+                p = root_prefix
+                if level > 0:
+                    p += "   "
+                self.print_graph(p, edge, level + 1)
+            else:
+                print(root_prefix + prefix + k)
+                p = root_prefix + "│   "
+                self.print_graph(p, edge, level + 1)
+
+    def _walk(self, root):
+        """
+        Walk through sysfs to get all devnodes that aren't ignored.
+
+        By default, uses /sys as sysfs mounting point. If another
+        directory is used, it replaces them to /sys at the patches.
+        """
+
+        with os.scandir(root) as obj:
+            for entry in obj:
+                path = os.path.join(root, entry.name)
+                if self.sysfs:
+                    p = path.replace(self.sysfs, "/sys", count=1)
+                else:
+                    p = path
+
+                if self.re_ignore.search(p):
+                    return
+
+                # Handle link first to avoid directory recursion
+                if entry.is_symlink():
+                    real = os.path.realpath(path)
+                    if not self.sysfs:
+                        self.aliases[path] = real
+                    else:
+                        real = real.replace(self.sysfs, "/sys", count=1)
+
+                    # Add absfile location to graph if it doesn't exist
+                    if not self.re_ignore.search(real):
+                        # Add link to the graph
+                        self.graph_add_file(real, p)
+
+                elif entry.is_file():
+                    self.graph_add_file(p)
+
+                elif entry.is_dir():
+                    self._walk(path)
+
+    def __init__(self, abi, sysfs="/sys", hints=False):
+        """
+        Initialize internal variables and get a list of all files inside
+        sysfs that can currently be parsed.
+
+        Please notice that there are several entries on sysfs that aren't
+        documented as ABI. Ignore those.
+
+        The real paths will be stored under self.files. Aliases will be
+        stored in separate, as self.aliases.
+        """
+
+        self.abi = abi
+        self.log = abi.log
+
+        if sysfs != "/sys":
+            self.sysfs = sysfs.rstrip("/")
+        else:
+            self.sysfs = None
+
+        self.hints = hints
+
+        self.root = {}
+        self.aliases = {}
+        self.files = set()
+
+        dont_walk = [
+            # Those require root access and aren't documented at ABI
+            f"^{sysfs}/kernel/debug",
+            f"^{sysfs}/kernel/tracing",
+            f"^{sysfs}/fs/pstore",
+            f"^{sysfs}/fs/bpf",
+            f"^{sysfs}/fs/fuse",
+
+            # This is not documented at ABI
+            f"^{sysfs}/module",
+
+            f"^{sysfs}/fs/cgroup",  # this is big and has zero docs under ABI
+            f"^{sysfs}/firmware",   # documented elsewhere: ACPI, DT bindings
+            "sections|notes",       # aren't actually part of ABI
+
+            # kernel-parameters.txt - not easy to parse
+            "parameters",
+        ]
+
+        self.re_ignore = re.compile("|".join(dont_walk))
+
+        print(f"Reading {sysfs} directory contents...", file=sys.stderr)
+        self._walk(sysfs)
+
+    def check_file(self, refs, found):
+        """Check missing ABI symbols for a given sysfs file."""
+
+        res_list = []
+
+        try:
+            for names in refs:
+                fname = names[0]
+
+                res = {
+                    "found": False,
+                    "fname": fname,
+                    "msg": "",
+                }
+                res_list.append(res)
+
+                re_what = self.abi.get_regexes(fname)
+                if not re_what:
+                    self.abi.log.warning(f"missing rules for {fname}")
+                    continue
+
+                for name in names:
+                    for r in re_what:
+                        if self.abi.debug & AbiDebug.UNDEFINED:
+                            self.log.debug("check if %s matches '%s'", name, r.pattern)
+                        if r.match(name):
+                            res["found"] = True
+                            if found:
+                                res["msg"] += f"  {fname}: regex:\n\t"
+                            continue
+
+                if self.hints and not res["found"]:
+                    res["msg"] += f"  {fname} not found. Tested regexes:\n"
+                    for r in re_what:
+                        res["msg"] += "    " + r.pattern + "\n"
+
+        except KeyboardInterrupt:
+            pass
+
+        return res_list
+
+    def _ref_interactor(self, root):
+        """Recursive function to interact over the sysfs tree."""
+
+        for k, v in root.items():
+            if isinstance(v, dict):
+                yield from self._ref_interactor(v)
+
+            if root == self.root or k == "__name":
+                continue
+
+            if self.abi.re_string:
+                fname = v["__name"][0]
+                if self.abi.re_string.search(fname):
+                    yield v
+            else:
+                yield v
+
+
+    def get_fileref(self, all_refs, chunk_size):
+        """Interactor to group refs into chunks."""
+
+        n = 0
+        refs = []
+
+        for ref in all_refs:
+            refs.append(ref)
+
+            n += 1
+            if n >= chunk_size:
+                yield refs
+                n = 0
+                refs = []
+
+        yield refs
+
+    def check_undefined_symbols(self, max_workers=None, chunk_size=50,
+                                found=None, dry_run=None):
+        """Seach ABI for sysfs symbols missing documentation."""
+
+        self.abi.parse_abi()
+
+        if self.abi.debug & AbiDebug.GRAPH:
+            self.print_graph()
+
+        all_refs = []
+        for ref in self._ref_interactor(self.root):
+            all_refs.append(ref["__name"])
+
+        if dry_run:
+            print("Would check", file=sys.stderr)
+            for ref in all_refs:
+                print(", ".join(ref))
+
+            return
+
+        print("Starting to search symbols (it may take several minutes):",
+              file=sys.stderr)
+        start = datetime.now()
+        old_elapsed = None
+
+        # Python doesn't support multithreading due to limitations on its
+        # global lock (GIL). While Python 3.13 finally made GIL optional,
+        # there are still issues related to it. Also, we want to have
+        # backward compatibility with older versions of Python.
+        #
+        # So, use instead multiprocess. However, Python is very slow passing
+        # data from/to multiple processes. Also, it may consume lots of memory
+        # if the data to be shared is not small.  So, we need to group workload
+        # in chunks that are big enough to generate performance gains while
+        # not being so big that would cause out-of-memory.
+
+        num_refs = len(all_refs)
+        print(f"Number of references to parse: {num_refs}", file=sys.stderr)
+
+        if not max_workers:
+            max_workers = os.cpu_count()
+        elif max_workers > os.cpu_count():
+            max_workers = os.cpu_count()
+
+        max_workers = max(max_workers, 1)
+
+        max_chunk_size = int((num_refs + max_workers - 1) / max_workers)
+        chunk_size = min(chunk_size, max_chunk_size)
+        chunk_size = max(1, chunk_size)
+
+        if max_workers > 1:
+            executor = futures.ProcessPoolExecutor
+
+            # Place references in a random order. This may help improving
+            # performance, by mixing complex/simple expressions when creating
+            # chunks
+            shuffle(all_refs)
+        else:
+            # Python has a high overhead with processes. When there's just
+            # one worker, it is faster to not create a new process.
+            # Yet, User still deserves to have a progress print. So, use
+            # python's "thread", which is actually a single process, using
+            # an internal schedule to switch between tasks. No performance
+            # gains for non-IO tasks, but still it can be quickly interrupted
+            # from time to time to display progress.
+            executor = futures.ThreadPoolExecutor
+
+        not_found = []
+        f_list = []
+        with executor(max_workers=max_workers) as exe:
+            for refs in self.get_fileref(all_refs, chunk_size):
+                if refs:
+                    try:
+                        f_list.append(exe.submit(self.check_file, refs, found))
+
+                    except KeyboardInterrupt:
+                        return
+
+            total = len(f_list)
+
+            if not total:
+                if self.abi.re_string:
+                    print(f"No ABI symbol matches {self.abi.search_string}")
+                else:
+                    self.abi.log.warning("No ABI symbols found")
+                return
+
+            print(f"{len(f_list):6d} jobs queued on {max_workers} workers",
+                  file=sys.stderr)
+
+            while f_list:
+                try:
+                    t = futures.wait(f_list, timeout=1,
+                                     return_when=futures.FIRST_COMPLETED)
+
+                    done = t[0]
+
+                    for fut in done:
+                        res_list = fut.result()
+
+                        for res in res_list:
+                            if not res["found"]:
+                                not_found.append(res["fname"])
+                            if res["msg"]:
+                                print(res["msg"])
+
+                        f_list.remove(fut)
+                except KeyboardInterrupt:
+                    return
+
+                except RuntimeError as e:
+                    self.abi.log.warning(f"Future: {e}")
+                    break
+
+                if sys.stderr.isatty():
+                    elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
+                    if len(f_list) < total:
+                        elapsed += f" ({total - len(f_list)}/{total} jobs completed).  "
+                    if elapsed != old_elapsed:
+                        print(elapsed + "\r", end="", flush=True,
+                              file=sys.stderr)
+                        old_elapsed = elapsed
+
+        elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
+        print(elapsed, file=sys.stderr)
+
+        for f in sorted(not_found):
+            print(f"{f} not found.")
diff --git a/tools/lib/python/feat/parse_features.py b/tools/lib/python/feat/parse_features.py
new file mode 100755
index 000000000000..41a51d9d6f62
--- /dev/null
+++ b/tools/lib/python/feat/parse_features.py
@@ -0,0 +1,507 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0911,R0912,R0914,R0915
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+
+"""
+Library to parse the Linux Feature files and produce a ReST book.
+"""
+
+import os
+import re
+import sys
+
+from glob import iglob
+
+
+class ParseFeature:
+    """
+    Parses Documentation/features, allowing to generate ReST documentation
+    from it.
+    """
+
+    #: feature header string.
+    h_name = "Feature"
+
+    #: Kernel config header string.
+    h_kconfig = "Kconfig"
+
+    #: description header string.
+    h_description = "Description"
+
+    #: subsystem header string.
+    h_subsys = "Subsystem"
+
+    #: status header string.
+    h_status = "Status"
+
+    #: architecture header string.
+    h_arch = "Architecture"
+
+    #: Sort order for status. Others will be mapped at the end.
+    status_map = {
+        "ok":   0,
+        "TODO": 1,
+        "N/A":  2,
+        # The only missing status is "..", which was mapped as "---",
+        # as this is an special ReST cell value. Let it get the
+        # default order (99).
+    }
+
+    def __init__(self, prefix, debug=0, enable_fname=False):
+        """
+        Sets internal variables.
+        """
+
+        self.prefix = prefix
+        self.debug = debug
+        self.enable_fname = enable_fname
+
+        self.data = {}
+
+        # Initial maximum values use just the headers
+        self.max_size_name = len(self.h_name)
+        self.max_size_kconfig = len(self.h_kconfig)
+        self.max_size_description = len(self.h_description)
+        self.max_size_desc_word = 0
+        self.max_size_subsys = len(self.h_subsys)
+        self.max_size_status = len(self.h_status)
+        self.max_size_arch = len(self.h_arch)
+        self.max_size_arch_with_header = self.max_size_arch + self.max_size_arch
+        self.description_size = 1
+
+        self.msg = ""
+
+    def emit(self, msg="", end="\n"):
+        """Helper function to append a new message for feature output."""
+
+        self.msg += msg + end
+
+    def parse_error(self, fname, ln, msg, data=None):
+        """
+        Displays an error message, printing file name and line.
+        """
+
+        if ln:
+            fname += f"#{ln}"
+
+        print(f"Warning: file {fname}: {msg}", file=sys.stderr, end="")
+
+        if data:
+            data = data.rstrip()
+            print(f":\n\t{data}", file=sys.stderr)
+        else:
+            print("", file=sys.stderr)
+
+    def parse_feat_file(self, fname):
+        """Parses a single arch-support.txt feature file."""
+
+        if os.path.isdir(fname):
+            return
+
+        base = os.path.basename(fname)
+
+        if base != "arch-support.txt":
+            if self.debug:
+                print(f"ignoring {fname}", file=sys.stderr)
+            return
+
+        subsys = os.path.dirname(fname).split("/")[-2]
+        self.max_size_subsys = max(self.max_size_subsys, len(subsys))
+
+        feature_name = ""
+        kconfig = ""
+        description = ""
+        comments = ""
+        arch_table = {}
+
+        if self.debug > 1:
+            print(f"Opening {fname}", file=sys.stderr)
+
+        if self.enable_fname:
+            full_fname = os.path.abspath(fname)
+            self.emit(f".. FILE {full_fname}")
+
+        with open(fname, encoding="utf-8") as f:
+            for ln, line in enumerate(f, start=1):
+                line = line.strip()
+
+                match = re.match(r"^\#\s+Feature\s+name:\s*(.*\S)", line)
+                if match:
+                    feature_name = match.group(1)
+
+                    self.max_size_name = max(self.max_size_name,
+                                             len(feature_name))
+                    continue
+
+                match = re.match(r"^\#\s+Kconfig:\s*(.*\S)", line)
+                if match:
+                    kconfig = match.group(1)
+
+                    self.max_size_kconfig = max(self.max_size_kconfig,
+                                                len(kconfig))
+                    continue
+
+                match = re.match(r"^\#\s+description:\s*(.*\S)", line)
+                if match:
+                    description = match.group(1)
+
+                    self.max_size_description = max(self.max_size_description,
+                                                    len(description))
+
+                    words = re.split(r"\s+", line)[1:]
+                    for word in words:
+                        self.max_size_desc_word = max(self.max_size_desc_word,
+                                                        len(word))
+
+                    continue
+
+                if re.search(r"^\\s*$", line):
+                    continue
+
+                if re.match(r"^\s*\-+\s*$", line):
+                    continue
+
+                if re.search(r"^\s*\|\s*arch\s*\|\s*status\s*\|\s*$", line):
+                    continue
+
+                match = re.match(r"^\#\s*(.*)$", line)
+                if match:
+                    comments += match.group(1)
+                    continue
+
+                match = re.match(r"^\s*\|\s*(\S+):\s*\|\s*(\S+)\s*\|\s*$", line)
+                if match:
+                    arch = match.group(1)
+                    status = match.group(2)
+
+                    self.max_size_status = max(self.max_size_status,
+                                               len(status))
+                    self.max_size_arch = max(self.max_size_arch, len(arch))
+
+                    if status == "..":
+                        status = "---"
+
+                    arch_table[arch] = status
+
+                    continue
+
+                self.parse_error(fname, ln, "Line is invalid", line)
+
+        if not feature_name:
+            self.parse_error(fname, 0, "Feature name not found")
+            return
+        if not subsys:
+            self.parse_error(fname, 0, "Subsystem not found")
+            return
+        if not kconfig:
+            self.parse_error(fname, 0, "Kconfig not found")
+            return
+        if not description:
+            self.parse_error(fname, 0, "Description not found")
+            return
+        if not arch_table:
+            self.parse_error(fname, 0, "Architecture table not found")
+            return
+
+        self.data[feature_name] = {
+            "where": fname,
+            "subsys": subsys,
+            "kconfig": kconfig,
+            "description": description,
+            "comments": comments,
+            "table": arch_table,
+        }
+
+        self.max_size_arch_with_header = self.max_size_arch + len(self.h_arch)
+
+    def parse(self):
+        """Parses all arch-support.txt feature files inside self.prefix."""
+
+        path = os.path.expanduser(self.prefix)
+
+        if self.debug > 2:
+            print(f"Running parser for {path}")
+
+        example_path = os.path.join(path, "arch-support.txt")
+
+        for fname in iglob(os.path.join(path, "**"), recursive=True):
+            if fname != example_path:
+                self.parse_feat_file(fname)
+
+        return self.data
+
+    def output_arch_table(self, arch, feat=None):
+        """
+        Output feature(s) for a given architecture.
+        """
+
+        title = f"Feature status on {arch} architecture"
+
+        self.emit("=" * len(title))
+        self.emit(title)
+        self.emit("=" * len(title))
+        self.emit()
+
+        self.emit("=" * self.max_size_subsys + "  ", end="")
+        self.emit("=" * self.max_size_name + "  ", end="")
+        self.emit("=" * self.max_size_kconfig + "  ", end="")
+        self.emit("=" * self.max_size_status + "  ", end="")
+        self.emit("=" * self.max_size_description)
+
+        self.emit(f"{self.h_subsys:<{self.max_size_subsys}}  ", end="")
+        self.emit(f"{self.h_name:<{self.max_size_name}}  ", end="")
+        self.emit(f"{self.h_kconfig:<{self.max_size_kconfig}}  ", end="")
+        self.emit(f"{self.h_status:<{self.max_size_status}}  ", end="")
+        self.emit(f"{self.h_description:<{self.max_size_description}}")
+
+        self.emit("=" * self.max_size_subsys + "  ", end="")
+        self.emit("=" * self.max_size_name + "  ", end="")
+        self.emit("=" * self.max_size_kconfig + "  ", end="")
+        self.emit("=" * self.max_size_status + "  ", end="")
+        self.emit("=" * self.max_size_description)
+
+        sorted_features = sorted(self.data.keys(),
+                                 key=lambda x: (self.data[x]["subsys"],
+                                                x.lower()))
+
+        for name in sorted_features:
+            if feat and name != feat:
+                continue
+
+            arch_table = self.data[name]["table"]
+
+            if not arch in arch_table:
+                continue
+
+            self.emit(f"{self.data[name]['subsys']:<{self.max_size_subsys}}  ",
+                  end="")
+            self.emit(f"{name:<{self.max_size_name}}  ", end="")
+            self.emit(f"{self.data[name]['kconfig']:<{self.max_size_kconfig}}  ",
+                  end="")
+            self.emit(f"{arch_table[arch]:<{self.max_size_status}}  ",
+                  end="")
+            self.emit(f"{self.data[name]['description']}")
+
+        self.emit("=" * self.max_size_subsys + "  ", end="")
+        self.emit("=" * self.max_size_name + "  ", end="")
+        self.emit("=" * self.max_size_kconfig + "  ", end="")
+        self.emit("=" * self.max_size_status + "  ", end="")
+        self.emit("=" * self.max_size_description)
+
+        return self.msg
+
+    def output_feature(self, feat):
+        """
+        Output a feature on all architectures.
+        """
+
+        title = f"Feature {feat}"
+
+        self.emit("=" * len(title))
+        self.emit(title)
+        self.emit("=" * len(title))
+        self.emit()
+
+        if not feat in self.data:
+            return
+
+        if self.data[feat]["subsys"]:
+            self.emit(f":Subsystem: {self.data[feat]['subsys']}")
+        if self.data[feat]["kconfig"]:
+            self.emit(f":Kconfig: {self.data[feat]['kconfig']}")
+
+        desc = self.data[feat]["description"]
+        desc = desc[0].upper() + desc[1:]
+        desc = desc.rstrip(". \t")
+        self.emit(f"\n{desc}.\n")
+
+        com = self.data[feat]["comments"].strip()
+        if com:
+            self.emit("Comments")
+            self.emit("--------")
+            self.emit(f"\n{com}\n")
+
+        self.emit("=" * self.max_size_arch + "  ", end="")
+        self.emit("=" * self.max_size_status)
+
+        self.emit(f"{self.h_arch:<{self.max_size_arch}}  ", end="")
+        self.emit(f"{self.h_status:<{self.max_size_status}}")
+
+        self.emit("=" * self.max_size_arch + "  ", end="")
+        self.emit("=" * self.max_size_status)
+
+        arch_table = self.data[feat]["table"]
+        for arch in sorted(arch_table.keys()):
+            self.emit(f"{arch:<{self.max_size_arch}}  ", end="")
+            self.emit(f"{arch_table[arch]:<{self.max_size_status}}")
+
+        self.emit("=" * self.max_size_arch + "  ", end="")
+        self.emit("=" * self.max_size_status)
+
+        return self.msg
+
+    def matrix_lines(self, desc_size, max_size_status, header):
+        """
+        Helper function to split element tables at the output matrix.
+        """
+
+        if header:
+            ln_marker = "="
+        else:
+            ln_marker = "-"
+
+        self.emit("+" + ln_marker * self.max_size_name + "+", end="")
+        self.emit(ln_marker * desc_size, end="")
+        self.emit("+" + ln_marker * max_size_status + "+")
+
+    def output_matrix(self):
+        """
+        Generates a set of tables, groped by subsystem, containing
+        what's the feature state on each architecture.
+        """
+
+        title = "Feature status on all architectures"
+
+        self.emit("=" * len(title))
+        self.emit(title)
+        self.emit("=" * len(title))
+        self.emit()
+
+        desc_title = f"{self.h_kconfig} / {self.h_description}"
+
+        desc_size = self.max_size_kconfig + 4
+        if not self.description_size:
+            desc_size = max(self.max_size_description, desc_size)
+        else:
+            desc_size = max(self.description_size, desc_size)
+
+        desc_size = max(self.max_size_desc_word, desc_size, len(desc_title))
+
+        notcompat = "Not compatible"
+        self.max_size_status = max(self.max_size_status, len(notcompat))
+
+        min_status_size = self.max_size_status + self.max_size_arch + 4
+        max_size_status = max(min_status_size, self.max_size_status)
+
+        h_status_per_arch = "Status per architecture"
+        max_size_status = max(max_size_status, len(h_status_per_arch))
+
+        cur_subsys = None
+        for name in sorted(self.data.keys(),
+                           key=lambda x: (self.data[x]["subsys"], x.lower())):
+            if not cur_subsys or cur_subsys != self.data[name]["subsys"]:
+                if cur_subsys:
+                    self.emit()
+
+                cur_subsys = self.data[name]["subsys"]
+
+                title = f"Subsystem: {cur_subsys}"
+                self.emit(title)
+                self.emit("=" * len(title))
+                self.emit()
+
+                self.matrix_lines(desc_size, max_size_status, 0)
+
+                self.emit(f"|{self.h_name:<{self.max_size_name}}", end="")
+                self.emit(f"|{desc_title:<{desc_size}}", end="")
+                self.emit(f"|{h_status_per_arch:<{max_size_status}}|")
+
+                self.matrix_lines(desc_size, max_size_status, 1)
+
+            lines = []
+            descs = []
+            cur_status = ""
+            line = ""
+
+            arch_table = sorted(self.data[name]["table"].items(),
+                                key=lambda x: (self.status_map.get(x[1], 99),
+                                               x[0].lower()))
+
+            for arch, status in arch_table:
+                if status == "---":
+                    status = notcompat
+
+                if status != cur_status:
+                    if line != "":
+                        lines.append(line)
+                        line = ""
+                    line = f"- **{status}**: {arch}"
+                elif len(line) + len(arch) + 2 < max_size_status:
+                    line += f", {arch}"
+                else:
+                    lines.append(line)
+                    line = f"  {arch}"
+                cur_status = status
+
+            if line != "":
+                lines.append(line)
+
+            description = self.data[name]["description"]
+            while len(description) > desc_size:
+                desc_line = description[:desc_size]
+
+                last_space = desc_line.rfind(" ")
+                if last_space != -1:
+                    desc_line = desc_line[:last_space]
+                    descs.append(desc_line)
+                    description = description[last_space + 1:]
+                else:
+                    desc_line = desc_line[:-1]
+                    descs.append(desc_line + "\\")
+                    description = description[len(desc_line):]
+
+            if description:
+                descs.append(description)
+
+            while len(lines) < 2 + len(descs):
+                lines.append("")
+
+            for ln, line in enumerate(lines):
+                col = ["", ""]
+
+                if not ln:
+                    col[0] = name
+                    col[1] = f"``{self.data[name]['kconfig']}``"
+                else:
+                    if ln >= 2 and descs:
+                        col[1] = descs.pop(0)
+
+                self.emit(f"|{col[0]:<{self.max_size_name}}", end="")
+                self.emit(f"|{col[1]:<{desc_size}}", end="")
+                self.emit(f"|{line:<{max_size_status}}|")
+
+            self.matrix_lines(desc_size, max_size_status, 0)
+
+        return self.msg
+
+    def list_arch_features(self, arch, feat):
+        """
+        Print a matrix of kernel feature support for the chosen architecture.
+        """
+        self.emit("#")
+        self.emit(f"# Kernel feature support matrix of the '{arch}' architecture:")
+        self.emit("#")
+
+        # Sort by subsystem, then by feature name (case‑insensitive)
+        for name in sorted(self.data.keys(),
+                           key=lambda n: (self.data[n]["subsys"].lower(),
+                                          n.lower())):
+            if feat and name != feat:
+                continue
+
+            feature = self.data[name]
+            arch_table = feature["table"]
+            status = arch_table.get(arch, "")
+            status = " " * ((4 - len(status)) // 2) + status
+
+            self.emit(f"{feature['subsys']:>{self.max_size_subsys + 1}}/ ",
+                      end="")
+            self.emit(f"{name:<{self.max_size_name}}: ", end="")
+            self.emit(f"{status:<5}|   ", end="")
+            self.emit(f"{feature['kconfig']:>{self.max_size_kconfig}} ",
+                      end="")
+            self.emit(f"#  {feature['description']}")
+
+        return self.msg
diff --git a/tools/lib/python/jobserver.py b/tools/lib/python/jobserver.py
new file mode 100755
index 000000000000..0b1ffdf9f7a3
--- /dev/null
+++ b/tools/lib/python/jobserver.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0+
+#
+# pylint: disable=C0103,C0209
+#
+#
+
+"""
+Interacts with the POSIX jobserver during the Kernel build time.
+
+A "normal" jobserver task, like the one initiated by a make subprocess would do:
+
+    - open read/write file descriptors to communicate with the job server;
+    - ask for one slot by calling::
+
+        claim = os.read(reader, 1)
+
+    - when the job finishes, call::
+
+        os.write(writer, b"+")  # os.write(writer, claim)
+
+Here, the goal is different: This script aims to get the remaining number
+of slots available, using all of them to run a command which handle tasks in
+parallel. To to that, it has a loop that ends only after there are no
+slots left. It then increments the number by one, in order to allow a
+call equivalent to ``make -j$((claim+1))``, e.g. having a parent make creating
+$claim child to do the actual work.
+
+The end goal here is to keep the total number of build tasks under the
+limit established by the initial ``make -j$n_proc`` call.
+
+See:
+    https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver
+"""
+
+import errno
+import os
+import subprocess
+import sys
+
+def warn(text, *args):
+    print(f'WARNING: {text}', *args, file = sys.stderr)
+
+class JobserverExec:
+    """
+    Claim all slots from make using POSIX Jobserver.
+
+    The main methods here are:
+
+    - open(): reserves all slots;
+    - close(): method returns all used slots back to make;
+    - run(): executes a command setting PARALLELISM=<available slots jobs + 1>.
+    """
+
+    def __init__(self):
+        """Initialize internal vars."""
+        self.claim = 0
+        self.jobs = b""
+        self.reader = None
+        self.writer = None
+        self.is_open = False
+
+    def open(self):
+        """Reserve all available slots to be claimed later on."""
+
+        if self.is_open:
+            return
+        self.is_open = True  # We only try once
+        self.claim = None
+        #
+        # Check the make flags for "--jobserver=R,W"
+        # Note that GNU Make has used --jobserver-fds and --jobserver-auth
+        # so this handles all of them.
+        #
+        flags = os.environ.get('MAKEFLAGS', '')
+        opts = [x for x in flags.split(" ") if x.startswith("--jobserver")]
+        if not opts:
+            return
+        #
+        # Separate out the provided file descriptors
+        #
+        split_opt = opts[-1].split('=', 1)
+        if len(split_opt) != 2:
+            warn('unparseable option:', opts[-1])
+            return
+        fds = split_opt[1]
+        #
+        # As of GNU Make 4.4, we'll be looking for a named pipe
+        # identified as fifo:path
+        #
+        if fds.startswith('fifo:'):
+            path = fds[len('fifo:'):]
+            try:
+                self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK)
+                self.writer = os.open(path, os.O_WRONLY)
+            except (OSError, IOError):
+                warn('unable to open jobserver pipe', path)
+                return
+        #
+        # Otherwise look for integer file-descriptor numbers.
+        #
+        else:
+            split_fds = fds.split(',')
+            if len(split_fds) != 2:
+                warn('malformed jobserver file descriptors:', fds)
+                return
+            try:
+                self.reader = int(split_fds[0])
+                self.writer = int(split_fds[1])
+            except ValueError:
+                warn('non-integer jobserver file-descriptors:', fds)
+                return
+            try:
+                #
+                # Open a private copy of reader to avoid setting nonblocking
+                # on an unexpecting process with the same reader fd.
+                #
+                self.reader = os.open(f"/proc/self/fd/{self.reader}",
+                                      os.O_RDONLY | os.O_NONBLOCK)
+            except (IOError, OSError) as e:
+                warn('Unable to reopen jobserver read-side pipe:', repr(e))
+                return
+        #
+        # OK, we have the channel to the job server; read out as many jobserver
+        # slots as possible.
+        #
+        while True:
+            try:
+                slot = os.read(self.reader, 8)
+                if not slot:
+                    #
+                    # Something went wrong.  Clear self.jobs to avoid writing
+                    # weirdness back to the jobserver and give up.
+                    self.jobs = b""
+                    warn("unexpected empty token from jobserver;"
+                         " possible invalid '--jobserver-auth=' setting")
+                    self.claim = None
+                    return
+            except (OSError, IOError) as e:
+                #
+                # If there is nothing more to read then we are done.
+                #
+                if e.errno == errno.EWOULDBLOCK:
+                    break
+                #
+                # Anything else says that something went weird; give back
+                # the jobs and give up.
+                #
+                if self.jobs:
+                    os.write(self.writer, self.jobs)
+                    self.claim = None
+                    warn('error reading from jobserver pipe', repr(e))
+                    return
+            self.jobs += slot
+        #
+        # Add a bump for our caller's reserveration, since we're just going
+        # to sit here blocked on our child.
+        #
+        self.claim = len(self.jobs) + 1
+
+    def close(self):
+        """Return all reserved slots to Jobserver."""
+
+        if not self.is_open:
+            return
+
+        # Return all the reserved slots.
+        if len(self.jobs):
+            os.write(self.writer, self.jobs)
+
+        self.is_open = False
+
+    def __enter__(self):
+        self.open()
+        return self
+
+    def __exit__(self, exc_type, exc_value, exc_traceback):
+        self.close()
+
+    def run(self, cmd, *args, **pwargs):
+        """
+        Run a command setting PARALLELISM env variable to the number of
+        available job slots (claim) + 1, e.g. it will reserve claim slots
+        to do the actual build work, plus one to monitor its children.
+        """
+        self.open()             # Ensure that self.claim is set
+
+        # We can only claim parallelism if there was a jobserver (i.e. a
+        # top-level "-jN" argument) and there were no other failures. Otherwise
+        # leave out the environment variable and let the child figure out what
+        # is best.
+        if self.claim:
+            os.environ["PARALLELISM"] = str(self.claim)
+
+        return subprocess.call(cmd, *args, **pwargs)
diff --git a/tools/lib/python/kdoc/__init__.py b/tools/lib/python/kdoc/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/lib/python/kdoc/__init__.py
diff --git a/tools/lib/python/kdoc/c_lex.py b/tools/lib/python/kdoc/c_lex.py
new file mode 100644
index 000000000000..cb95f5172448
--- /dev/null
+++ b/tools/lib/python/kdoc/c_lex.py
@@ -0,0 +1,662 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+"""
+Regular expression ancillary classes.
+
+Those help caching regular expressions and do matching for kernel-doc.
+
+Please notice that the code here may rise exceptions to indicate bad
+usage inside kdoc to indicate problems at the replace pattern.
+
+Other errors are logged via log instance.
+"""
+
+import logging
+import re
+
+from copy import copy
+
+from .kdoc_re import KernRe
+
+log = logging.getLogger(__name__)
+
+def tokenizer_set_log(logger, prefix = ""):
+    """
+    Replace the module‑level logger with a LoggerAdapter that
+    prepends *prefix* to every message.
+    """
+    global log
+
+    class PrefixAdapter(logging.LoggerAdapter):
+        """
+        Ancillary class to set prefix on all message logs.
+        """
+        def process(self, msg, kwargs):
+            return f"{prefix}{msg}", kwargs
+
+    # Wrap the provided logger in our adapter
+    log = PrefixAdapter(logger, {"prefix": prefix})
+
+class CToken():
+    """
+    Data class to define a C token.
+    """
+
+    # Tokens that can be used by the parser. Works like an C enum.
+
+    COMMENT = 0     #: A standard C or C99 comment, including delimiter.
+    STRING = 1      #: A string, including quotation marks.
+    CHAR = 2        #: A character, including apostophes.
+    NUMBER = 3      #: A number.
+    PUNC = 4        #: A puntuation mark: / ``,`` / ``.``.
+    BEGIN = 5       #: A begin character: ``{`` / ``[`` / ``(``.
+    END = 6         #: A end character: ``}`` / ``]`` / ``)``.
+    CPP = 7         #: A preprocessor macro.
+    HASH = 8        #: The hash character - useful to handle other macros.
+    OP = 9          #: A C operator (add, subtract, ...).
+    STRUCT = 10     #: A ``struct`` keyword.
+    UNION = 11      #: An ``union`` keyword.
+    ENUM = 12       #: A ``struct`` keyword.
+    TYPEDEF = 13    #: A ``typedef`` keyword.
+    NAME = 14       #: A name. Can be an ID or a type.
+    SPACE = 15      #: Any space characters, including new lines
+    ENDSTMT = 16    #: End of an statement (``;``).
+
+    BACKREF = 17    #: Not a valid C sequence, but used at sub regex patterns.
+
+    MISMATCH = 255  #: an error indicator: should never happen in practice.
+
+    # Dict to convert from an enum interger into a string.
+    _name_by_val = {v: k for k, v in dict(vars()).items() if isinstance(v, int)}
+
+    # Dict to convert from string to an enum-like integer value.
+    _name_to_val = {k: v for v, k in _name_by_val.items()}
+
+    @staticmethod
+    def to_name(val):
+        """Convert from an integer value from CToken enum into a string"""
+
+        return CToken._name_by_val.get(val, f"UNKNOWN({val})")
+
+    @staticmethod
+    def from_name(name):
+        """Convert a string into a CToken enum value"""
+        if name in CToken._name_to_val:
+            return CToken._name_to_val[name]
+
+        return CToken.MISMATCH
+
+
+    def __init__(self, kind, value=None, pos=0,
+                 brace_level=0, paren_level=0, bracket_level=0):
+        self.kind = kind
+        self.value = value
+        self.pos = pos
+        self.level = (bracket_level, paren_level, brace_level)
+
+    def __repr__(self):
+        name = self.to_name(self.kind)
+        if isinstance(self.value, str):
+            value = '"' + self.value + '"'
+        else:
+            value = self.value
+
+        return f"CToken(CToken.{name}, {value}, {self.pos}, {self.level})"
+
+#: Regexes to parse C code, transforming it into tokens.
+RE_SCANNER_LIST = [
+    #
+    # Note that \s\S is different than .*, as it also catches \n
+    #
+    (CToken.COMMENT, r"//[^\n]*|/\*[\s\S]*?\*/"),
+
+    (CToken.STRING,  r'"(?:\\.|[^"\\])*"'),
+    (CToken.CHAR,    r"'(?:\\.|[^'\\])'"),
+
+    (CToken.NUMBER,  r"0[xX][\da-fA-F]+[uUlL]*|0[0-7]+[uUlL]*|"
+                     r"\d+(?:\.\d*)?(?:[eE][+-]?\d+)?[fFlL]*"),
+
+    (CToken.ENDSTMT, r"(?:\s+;|;)"),
+
+    (CToken.PUNC,    r"[,\.]"),
+
+    (CToken.BEGIN,   r"[\[\(\{]"),
+
+    (CToken.END,     r"[\]\)\}]"),
+
+    (CToken.CPP,     r"#\s*(?:define|include|ifdef|ifndef|if|else|elif|endif|undef|pragma)\b"),
+
+    (CToken.HASH,    r"#"),
+
+    (CToken.OP,      r"\+\+|\-\-|\->|==|\!=|<=|>=|&&|\|\||<<|>>|\+=|\-=|\*=|/=|%="
+                     r"|&=|\|=|\^=|[=\+\-\*/%<>&\|\^~!\?\:]"),
+
+    (CToken.STRUCT,  r"\bstruct\b"),
+    (CToken.UNION,   r"\bunion\b"),
+    (CToken.ENUM,    r"\benum\b"),
+    (CToken.TYPEDEF, r"\btypedef\b"),
+
+    (CToken.NAME,    r"[A-Za-z_]\w*"),
+
+    (CToken.SPACE,   r"\s+"),
+
+    (CToken.BACKREF, r"\\\d+"),
+
+    (CToken.MISMATCH,r"."),
+]
+
+def fill_re_scanner(token_list):
+    """Ancillary routine to convert RE_SCANNER_LIST into a finditer regex"""
+    re_tokens = []
+
+    for kind, pattern in token_list:
+        name = CToken.to_name(kind)
+        re_tokens.append(f"(?P<{name}>{pattern})")
+
+    return KernRe("|".join(re_tokens), re.MULTILINE | re.DOTALL)
+
+#: Handle C continuation lines.
+RE_CONT = KernRe(r"\\\n")
+
+RE_COMMENT_START = KernRe(r'/\*\s*')
+
+#: tokenizer regex. Will be filled at the first CTokenizer usage.
+RE_SCANNER = fill_re_scanner(RE_SCANNER_LIST)
+
+
+class CTokenizer():
+    """
+    Scan C statements and definitions and produce tokens.
+
+    When converted to string, it drops comments and handle public/private
+    values, respecting depth.
+    """
+
+    # This class is inspired and follows the basic concepts of:
+    #   https://docs.python.org/3/library/re.html#writing-a-tokenizer
+
+    def __init__(self, source=None):
+        """
+        Create a regular expression to handle RE_SCANNER_LIST.
+
+        While I generally don't like using regex group naming via:
+            (?P<name>...)
+
+        in this particular case, it makes sense, as we can pick the name
+        when matching a code via RE_SCANNER.
+        """
+
+        #
+        # Store logger to allow parser classes to re-use it
+        #
+        global log
+        self.log = log
+
+        self.tokens = []
+
+        if not source:
+            return
+
+        if isinstance(source, list):
+            self.tokens = source
+            return
+
+        #
+        # While we could just use _tokenize directly via interator,
+        # As we'll need to use the tokenizer several times inside kernel-doc
+        # to handle macro transforms, cache the results on a list, as
+        # re-using it is cheaper than having to parse everytime.
+        #
+        for tok in self._tokenize(source):
+            self.tokens.append(tok)
+
+    def _tokenize(self, source):
+        """
+        Iterator that parses ``source``, splitting it into tokens, as defined
+        at ``self.RE_SCANNER_LIST``.
+
+        The interactor returns a CToken class object.
+        """
+
+        # Handle continuation lines. Note that kdoc_parser already has a
+        # logic to do that. Still, let's keep it for completeness, as we might
+        # end re-using this tokenizer outsize kernel-doc some day - or we may
+        # eventually remove from there as a future cleanup.
+        source = RE_CONT.sub("", source)
+
+        brace_level = 0
+        paren_level = 0
+        bracket_level = 0
+
+        for match in RE_SCANNER.finditer(source):
+            kind = CToken.from_name(match.lastgroup)
+            pos = match.start()
+            value = match.group()
+
+            if kind == CToken.MISMATCH:
+                log.error(f"Unexpected token '{value}' on pos {pos}:\n\t'{source}'")
+            elif kind == CToken.BEGIN:
+                if value == '(':
+                    paren_level += 1
+                elif value == '[':
+                    bracket_level += 1
+                else:  # value == '{'
+                    brace_level += 1
+
+            elif kind == CToken.END:
+                if value == ')' and paren_level > 0:
+                    paren_level -= 1
+                elif value == ']' and bracket_level > 0:
+                    bracket_level -= 1
+                elif brace_level > 0:    # value == '}'
+                    brace_level -= 1
+
+            yield CToken(kind, value, pos,
+                         brace_level, paren_level, bracket_level)
+
+    def __str__(self):
+        out=""
+        show_stack = [True]
+
+        for i, tok in enumerate(self.tokens):
+            if tok.kind == CToken.BEGIN:
+                show_stack.append(show_stack[-1])
+
+            elif tok.kind == CToken.END:
+                prev = show_stack[-1]
+                if len(show_stack) > 1:
+                    show_stack.pop()
+
+                if not prev and show_stack[-1]:
+                    #
+                    # Try to preserve indent
+                    #
+                    out += "\t" * (len(show_stack) - 1)
+
+                    out += str(tok.value)
+                    continue
+
+            elif tok.kind == CToken.COMMENT:
+                comment = RE_COMMENT_START.sub("", tok.value)
+
+                if comment.startswith("private:"):
+                    show_stack[-1] = False
+                    show = False
+                elif comment.startswith("public:"):
+                    show_stack[-1] = True
+
+                continue
+
+            if not show_stack[-1]:
+                continue
+
+            if i < len(self.tokens) - 1:
+                next_tok = self.tokens[i + 1]
+
+                # Do some cleanups before ";"
+
+                if tok.kind == CToken.SPACE and next_tok.kind == CToken.ENDSTMT:
+                    continue
+
+                if tok.kind == CToken.ENDSTMT and next_tok.kind == tok.kind:
+                    continue
+
+            out += str(tok.value)
+
+        return out
+
+
+class CTokenArgs:
+    """
+    Ancillary class to help using backrefs from sub matches.
+
+    If the highest backref contain a "+" at the last element,
+    the logic will be greedy, picking all other delims.
+
+    This is needed to parse struct_group macros with end with ``MEMBERS...``.
+    """
+    def __init__(self, sub_str):
+        self.sub_groups = set()
+        self.max_group = -1
+        self.greedy = None
+
+        for m in KernRe(r'\\(\d+)([+]?)').finditer(sub_str):
+            group = int(m.group(1))
+            if m.group(2) == "+":
+                if self.greedy and self.greedy != group:
+                    raise ValueError("There are multiple greedy patterns!")
+                self.greedy = group
+
+            self.sub_groups.add(group)
+            self.max_group = max(self.max_group, group)
+
+        if self.greedy:
+            if self.greedy != self.max_group:
+                raise ValueError("Greedy pattern is not the last one!")
+
+            sub_str = KernRe(r'(\\\d+)[+]').sub(r"\1", sub_str)
+
+        self.sub_str = sub_str
+        self.sub_tokeninzer = CTokenizer(sub_str)
+
+    def groups(self, new_tokenizer):
+        r"""
+        Create replacement arguments for backrefs like:
+
+        ``\0``, ``\1``, ``\2``, ... ``\{number}``
+
+        It also accepts a ``+`` character to the highest backref, like
+        ``\4+``. When used, the backref will be greedy, picking all other
+        arguments afterwards.
+
+        The logic is smart enough to only go up to the maximum required
+        argument, even if there are more.
+
+        If there is a backref for an argument above the limit, it will
+        raise an exception. Please notice that, on C, square brackets
+        don't have any separator on it. Trying to use ``\1``..``\n`` for
+        brackets also raise an exception.
+        """
+
+        level = (0, 0, 0)
+
+        if self.max_group < 0:
+            return level, []
+
+        tokens = new_tokenizer.tokens
+
+        #
+        # Fill \0 with the full token contents
+        #
+        groups_list = [ [] ]
+
+        if 0 in self.sub_groups:
+            inner_level = 0
+
+            for i in range(0, len(tokens)):
+                tok = tokens[i]
+
+                if tok.kind == CToken.BEGIN:
+                    inner_level += 1
+
+                    #
+                    # Discard first begin
+                    #
+                    if not groups_list[0]:
+                        continue
+                elif tok.kind == CToken.END:
+                    inner_level -= 1
+                    if inner_level < 0:
+                        break
+
+                if inner_level:
+                    groups_list[0].append(tok)
+
+        if not self.max_group:
+            return level, groups_list
+
+        delim = None
+
+        #
+        # Ignore everything before BEGIN. The value of begin gives the
+        # delimiter to be used for the matches
+        #
+        for i in range(0, len(tokens)):
+            tok = tokens[i]
+            if tok.kind == CToken.BEGIN:
+                if tok.value == "{":
+                    delim = ";"
+                elif tok.value == "(":
+                    delim = ","
+                else:
+                    self.log.error(fr"Can't handle \1..\n on {sub_str}")
+
+                level = tok.level
+                break
+
+        pos = 1
+        groups_list.append([])
+
+        inner_level = 0
+        for i in range(i + 1, len(tokens)):
+            tok = tokens[i]
+
+            if tok.kind == CToken.BEGIN:
+                inner_level += 1
+            if tok.kind == CToken.END:
+                inner_level -= 1
+                if inner_level < 0:
+                    break
+
+            if tok.kind in [CToken.PUNC, CToken.ENDSTMT] and delim == tok.value:
+                pos += 1
+                if self.greedy and pos > self.max_group:
+                    pos -= 1
+                else:
+                    groups_list.append([])
+
+                    if pos > self.max_group:
+                        break
+
+                    continue
+
+            groups_list[pos].append(tok)
+
+        if pos < self.max_group:
+            log.error(fr"{self.sub_str} groups are up to {pos} instead of {self.max_group}")
+
+        return level, groups_list
+
+    def tokens(self, new_tokenizer):
+        level, groups = self.groups(new_tokenizer)
+
+        new = CTokenizer()
+
+        for tok in self.sub_tokeninzer.tokens:
+            if tok.kind == CToken.BACKREF:
+                group = int(tok.value[1:])
+
+                for group_tok in groups[group]:
+                    new_tok = copy(group_tok)
+
+                    new_level = [0, 0, 0]
+
+                    for i in range(0, len(level)):
+                        new_level[i] = new_tok.level[i] + level[i]
+
+                    new_tok.level = tuple(new_level)
+
+                    new.tokens += [ new_tok ]
+            else:
+                new.tokens += [ tok ]
+
+        return new.tokens
+
+
+class CMatch:
+    """
+    Finding nested delimiters is hard with regular expressions. It is
+    even harder on Python with its normal re module, as there are several
+    advanced regular expressions that are missing.
+
+    This is the case of this pattern::
+
+            '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
+
+    which is used to properly match open/close parentheses of the
+    string search STRUCT_GROUP(),
+
+    Add a class that counts pairs of delimiters, using it to match and
+    replace nested expressions.
+
+    The original approach was suggested by:
+
+        https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+
+    Although I re-implemented it to make it more generic and match 3 types
+    of delimiters. The logic checks if delimiters are paired. If not, it
+    will ignore the search string.
+    """
+
+
+    def __init__(self, regex, delim="("):
+        self.regex = KernRe("^" + regex + r"\b")
+        self.start_delim = delim
+
+    def _search(self, tokenizer):
+        """
+        Finds paired blocks for a regex that ends with a delimiter.
+
+        The suggestion of using finditer to match pairs came from:
+        https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+        but I ended using a different implementation to align all three types
+        of delimiters and seek for an initial regular expression.
+
+        The algorithm seeks for open/close paired delimiters and places them
+        into a stack, yielding a start/stop position of each match when the
+        stack is zeroed.
+
+        The algorithm should work fine for properly paired lines, but will
+        silently ignore end delimiters that precede a start delimiter.
+        This should be OK for kernel-doc parser, as unaligned delimiters
+        would cause compilation errors. So, we don't need to raise exceptions
+        to cover such issues.
+        """
+
+        start = None
+        started = False
+
+        import sys
+
+        stack = []
+
+        for i, tok in enumerate(tokenizer.tokens):
+            if start is None:
+                if tok.kind == CToken.NAME and self.regex.match(tok.value):
+                    start = i
+                    stack.append((start, tok.level))
+                    started = False
+
+                continue
+
+            if not started:
+                if tok.kind == CToken.SPACE:
+                    continue
+
+                if tok.kind == CToken.BEGIN and tok.value == self.start_delim:
+                    started = True
+                    continue
+
+                # Name only token without BEGIN/END
+                if i > start:
+                    i -= 1
+                yield start, i
+                start = None
+
+            if tok.kind == CToken.END and tok.level == stack[-1][1]:
+                start, level = stack.pop()
+
+                yield start, i
+                start = None
+
+        #
+        # If an END zeroing levels is not there, return remaining stuff
+        # This is meant to solve cases where the caller logic might be
+        # picking an incomplete block.
+        #
+        if start and stack:
+            if started:
+                s = str(tokenizer)
+                log.warning(f"can't find a final end at {s}")
+
+            yield start, len(tokenizer.tokens)
+
+    def search(self, source):
+        """
+        This is similar to re.search:
+
+        It matches a regex that it is followed by a delimiter,
+        returning occurrences only if all delimiters are paired.
+        """
+
+        if isinstance(source, CTokenizer):
+            tokenizer = source
+            is_token = True
+        else:
+            tokenizer = CTokenizer(source)
+            is_token = False
+
+        for start, end in self._search(tokenizer):
+            new_tokenizer = CTokenizer(tokenizer.tokens[start:end + 1])
+
+            if is_token:
+                yield new_tokenizer
+            else:
+                yield str(new_tokenizer)
+
+    def sub(self, sub_str, source, count=0):
+        """
+        This is similar to re.sub:
+
+        It matches a regex that it is followed by a delimiter,
+        replacing occurrences only if all delimiters are paired.
+
+        if the sub argument contains::
+
+            r'\0'
+
+        it will work just like re: it places there the matched paired data
+        with the delimiter stripped.
+
+        If count is different than zero, it will replace at most count
+        items.
+        """
+        if isinstance(source, CTokenizer):
+            is_token = True
+            tokenizer = source
+        else:
+            is_token = False
+            tokenizer = CTokenizer(source)
+
+        # Detect if sub_str contains sub arguments
+
+        args_match = CTokenArgs(sub_str)
+
+        new_tokenizer = CTokenizer()
+        pos = 0
+        n = 0
+
+        #
+        # NOTE: the code below doesn't consider overlays at sub.
+        # We may need to add some extra unit tests to check if those
+        # would cause problems. When replacing by "", this should not
+        # be a problem, but other transformations could be problematic
+        #
+        for start, end in self._search(tokenizer):
+            new_tokenizer.tokens += tokenizer.tokens[pos:start]
+
+            new = CTokenizer(tokenizer.tokens[start:end + 1])
+
+            new_tokenizer.tokens += args_match.tokens(new)
+
+            pos = end + 1
+
+            n += 1
+            if count and n >= count:
+                break
+
+        new_tokenizer.tokens += tokenizer.tokens[pos:]
+
+        if not is_token:
+            return str(new_tokenizer)
+
+        return new_tokenizer
+
+    def __repr__(self):
+        """
+        Returns a displayable version of the class init.
+        """
+
+        return f'CMatch("{self.regex.regex.pattern}")'
diff --git a/tools/lib/python/kdoc/enrich_formatter.py b/tools/lib/python/kdoc/enrich_formatter.py
new file mode 100644
index 000000000000..d1be4e5e1962
--- /dev/null
+++ b/tools/lib/python/kdoc/enrich_formatter.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+"""
+Ancillary argparse HelpFormatter class that works on a similar way as
+argparse.RawDescriptionHelpFormatter, e.g. description maintains line
+breaks, but it also implement transformations to the help text. The
+actual transformations ar given by enrich_text(), if the output is tty.
+
+Currently, the follow transformations are done:
+
+    - Positional arguments are shown in upper cases;
+    - if output is TTY, ``var`` and positional arguments are shown prepended
+      by an ANSI SGR code. This is usually translated to bold. On some
+      terminals, like, konsole, this is translated into a colored bold text.
+"""
+
+import argparse
+import re
+import sys
+
+class EnrichFormatter(argparse.HelpFormatter):
+    """
+    Better format the output, making easier to identify the positional args
+    and how they're used at the __doc__ description.
+    """
+    def __init__(self, *args, **kwargs):
+        """
+        Initialize class and check if is TTY.
+        """
+        super().__init__(*args, **kwargs)
+        self._tty = sys.stdout.isatty()
+
+    def enrich_text(self, text):
+        r"""
+        Handle ReST markups (currently, only \`\`text\`\` markups).
+        """
+        if self._tty and text:
+            # Replace ``text`` with ANSI SGR (bold)
+            return re.sub(r'\`\`(.+?)\`\`',
+                          lambda m: f'\033[1m{m.group(1)}\033[0m', text)
+        return text
+
+    def _fill_text(self, text, width, indent):
+        """
+        Enrich descriptions with markups on it.
+        """
+        enriched = self.enrich_text(text)
+        return "\n".join(indent + line for line in enriched.splitlines())
+
+    def _format_usage(self, usage, actions, groups, prefix):
+        """
+        Enrich positional arguments at usage: line.
+        """
+
+        prog = self._prog
+        parts = []
+
+        for action in actions:
+            if action.option_strings:
+                opt = action.option_strings[0]
+                if action.nargs != 0:
+                    opt += f" {action.dest.upper()}"
+                parts.append(f"[{opt}]")
+            else:
+                # Positional argument
+                parts.append(self.enrich_text(f"``{action.dest.upper()}``"))
+
+        usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n"
+        return usage_text
+
+    def _format_action_invocation(self, action):
+        """
+        Enrich argument names.
+        """
+        if not action.option_strings:
+            return self.enrich_text(f"``{action.dest.upper()}``")
+
+        return ", ".join(action.option_strings)
diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
new file mode 100644
index 000000000000..ed82b6e6ab25
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -0,0 +1,380 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=R0903,R0913,R0914,R0917
+
+"""
+Classes for navigating through the files that kernel-doc needs to handle
+to generate documentation.
+"""
+
+import logging
+import os
+import re
+
+from kdoc.kdoc_parser import KernelDoc
+from kdoc.xforms_lists import CTransforms
+from kdoc.kdoc_output import OutputFormat
+from kdoc.kdoc_yaml_file import KDocTestFile
+
+
+class GlobSourceFiles:
+    """
+    Parse C source code file names and directories via an Interactor.
+    """
+
+    def __init__(self, srctree=None, valid_extensions=None):
+        """
+        Initialize valid extensions with a tuple.
+
+        If not defined, assume default C extensions (.c and .h)
+
+        It would be possible to use python's glob function, but it is
+        very slow, and it is not interactive. So, it would wait to read all
+        directories before actually do something.
+
+        So, let's use our own implementation.
+        """
+
+        if not valid_extensions:
+            self.extensions = (".c", ".h")
+        else:
+            self.extensions = valid_extensions
+
+        self.srctree = srctree
+
+    def _parse_dir(self, dirname):
+        """Internal function to parse files recursively."""
+
+        with os.scandir(dirname) as obj:
+            for entry in obj:
+                name = os.path.join(dirname, entry.name)
+
+                if entry.is_dir(follow_symlinks=False):
+                    yield from self._parse_dir(name)
+
+                if not entry.is_file():
+                    continue
+
+                basename = os.path.basename(name)
+
+                if not basename.endswith(self.extensions):
+                    continue
+
+                yield name
+
+    def parse_files(self, file_list, file_not_found_cb):
+        """
+        Define an iterator to parse all source files from file_list,
+        handling directories if any.
+        """
+
+        if not file_list:
+            return
+
+        for fname in file_list:
+            if self.srctree:
+                f = os.path.join(self.srctree, fname)
+            else:
+                f = fname
+
+            if os.path.isdir(f):
+                yield from self._parse_dir(f)
+            elif os.path.isfile(f):
+                yield f
+            elif file_not_found_cb:
+                file_not_found_cb(fname)
+
+
+class KdocConfig():
+    """
+    Stores all configuration attributes that kdoc_parser and kdoc_output
+    needs.
+    """
+    def __init__(self, verbose=False, werror=False, wreturn=False,
+                 wshort_desc=False, wcontents_before_sections=False,
+                 logger=None):
+
+        self.verbose = verbose
+        self.werror = werror
+        self.wreturn = wreturn
+        self.wshort_desc =  wshort_desc
+        self.wcontents_before_sections = wcontents_before_sections
+
+        if logger:
+            self.log = logger
+        else:
+            self.log = logging.getLogger(__file__)
+
+        self.warning = self.log.warning
+
+class KernelFiles():
+    """
+    Parse kernel-doc tags on multiple kernel source files.
+
+    This is the main entry point to run kernel-doc. This class is initialized
+    using a series of optional arguments:
+
+    ``verbose``
+        If True, enables kernel-doc verbosity. Default: False.
+
+    ``out_style``
+        Class to be used to format output. If None (default),
+        only report errors.
+
+    ``xforms``
+        Transforms to be applied to C prototypes and data structs.
+        If not specified, defaults to xforms = CFunction()
+
+    ``werror``
+        If True, treat warnings as errors, retuning an error code on warnings.
+
+        Default: False.
+
+    ``wreturn``
+        If True, warns about the lack of a return markup on functions.
+
+        Default: False.
+    ``wshort_desc``
+        If True, warns if initial short description is missing.
+
+        Default: False.
+
+    ``wcontents_before_sections``
+        If True, warn if there are contents before sections (deprecated).
+        This option is kept just for backward-compatibility, but it does
+        nothing, neither here nor at the original Perl script.
+
+        Default: False.
+
+    ``logger``
+        Optional logger class instance.
+
+        If not specified, defaults to use: ``logging.getLogger("kernel-doc")``
+
+    ``yaml_file``
+        If defined, stores the output inside a YAML file.
+
+    ``yaml_content``
+        Defines what will be inside the YAML file.
+
+    Note:
+        There are two type of parsers defined here:
+
+        - self.parse_file(): parses both kernel-doc markups and
+          ``EXPORT_SYMBOL*`` macros;
+        - self.process_export_file(): parses only ``EXPORT_SYMBOL*`` macros.
+    """
+
+    def warning(self, msg):
+        """Ancillary routine to output a warning and increment error count."""
+
+        self.config.log.warning(msg)
+        self.errors += 1
+
+    def error(self, msg):
+        """Ancillary routine to output an error and increment error count."""
+
+        self.config.log.error(msg)
+        self.errors += 1
+
+    def parse_file(self, fname):
+        """
+        Parse a single Kernel source.
+        """
+
+        # Prevent parsing the same file twice if results are cached
+        if fname in self.files:
+            return
+
+        if self.test_file:
+            store_src = True
+        else:
+            store_src = False
+
+        doc = KernelDoc(self.config, fname, self.xforms, store_src=store_src)
+        export_table, entries = doc.parse_kdoc()
+
+        self.export_table[fname] = export_table
+
+        self.files.add(fname)
+        self.export_files.add(fname)      # parse_kdoc() already check exports
+
+        self.results[fname] = entries
+
+    def process_export_file(self, fname):
+        """
+        Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file.
+        """
+
+        # Prevent parsing the same file twice if results are cached
+        if fname in self.export_files:
+            return
+
+        doc = KernelDoc(self.config, fname)
+        export_table = doc.parse_export()
+
+        if not export_table:
+            self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}")
+            export_table = set()
+
+        self.export_table[fname] = export_table
+        self.export_files.add(fname)
+
+    def file_not_found_cb(self, fname):
+        """
+        Callback to warn if a file was not found.
+        """
+
+        self.error(f"Cannot find file {fname}")
+
+    def __init__(self, verbose=False, out_style=None, xforms=None,
+                 werror=False, wreturn=False, wshort_desc=False,
+                 wcontents_before_sections=False,
+                 yaml_file=None, yaml_content=None, logger=None):
+        """
+        Initialize startup variables and parse all files.
+        """
+
+        if not verbose:
+            try:
+                verbose = bool(int(os.environ.get("KBUILD_VERBOSE", 0)))
+            except ValueError:
+                # Handles an eventual case where verbosity is not a number
+                # like KBUILD_VERBOSE=""
+                verbose = False
+
+        if out_style is None:
+            out_style = OutputFormat()
+
+        if not werror:
+            kcflags = os.environ.get("KCFLAGS", None)
+            if kcflags:
+                match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags)
+                if match:
+                    werror = True
+
+            # reading this variable is for backwards compat just in case
+            # someone was calling it with the variable from outside the
+            # kernel's build system
+            kdoc_werror = os.environ.get("KDOC_WERROR", None)
+            if kdoc_werror:
+                werror = kdoc_werror
+
+        if not logger:
+           logger = logging.getLogger("kernel-doc")
+        else:
+            logger = logger
+
+        # Some variables are global to the parser logic as a whole as they are
+        # used to send control configuration to KernelDoc class. As such,
+        # those variables are read-only inside the KernelDoc.
+        self.config = KdocConfig(verbose, werror, wreturn, wshort_desc,
+                                 wcontents_before_sections, logger)
+
+        # Override log warning, as we want to count errors
+        self.config.warning = self.warning
+
+        if yaml_file:
+            self.test_file = KDocTestFile(self.config, yaml_file, yaml_content)
+        else:
+            self.test_file = None
+
+        if xforms:
+            self.xforms = xforms
+        else:
+            self.xforms = CTransforms()
+
+        self.config.src_tree = os.environ.get("SRCTREE", None)
+
+        # Initialize variables that are internal to KernelFiles
+
+        self.out_style = out_style
+        self.out_style.set_config(self.config)
+
+        self.errors = 0
+        self.results = {}
+
+        self.files = set()
+        self.export_files = set()
+        self.export_table = {}
+
+    def parse(self, file_list, export_file=None):
+        """
+        Parse all files.
+        """
+
+        glob = GlobSourceFiles(srctree=self.config.src_tree)
+
+        for fname in glob.parse_files(file_list, self.file_not_found_cb):
+            self.parse_file(fname)
+
+        for fname in glob.parse_files(export_file, self.file_not_found_cb):
+            self.process_export_file(fname)
+
+    def out_msg(self, fname, name, arg):
+        """
+        Return output messages from a file name using the output style
+        filtering.
+
+        If output type was not handled by the styler, return None.
+        """
+
+        # NOTE: we can add rules here to filter out unwanted parts,
+        # although OutputFormat.msg already does that.
+
+        return self.out_style.msg(fname, name, arg)
+
+    def msg(self, enable_lineno=False, export=False, internal=False,
+            symbol=None, nosymbol=None, no_doc_sections=False,
+            filenames=None, export_file=None):
+        """
+        Interacts over the kernel-doc results and output messages,
+        returning kernel-doc markups on each interaction.
+        """
+
+        if not filenames:
+            filenames = sorted(self.results.keys())
+
+        glob = GlobSourceFiles(srctree=self.config.src_tree)
+
+        for fname in filenames:
+            function_table = set()
+
+            if internal or export:
+                if not export_file:
+                    export_file = [fname]
+
+                for f in glob.parse_files(export_file, self.file_not_found_cb):
+                    function_table |= self.export_table[f]
+
+            if symbol:
+                for s in symbol:
+                    function_table.add(s)
+
+            if fname not in self.results:
+                self.config.log.warning("No kernel-doc for file %s", fname)
+                continue
+
+            symbols = self.results[fname]
+
+            if self.test_file:
+                self.test_file.set_filter(export, internal, symbol, nosymbol,
+                                          function_table, enable_lineno,
+                                          no_doc_sections)
+
+                self.test_file.output_symbols(fname, symbols)
+
+                continue
+
+            self.out_style.set_filter(export, internal, symbol, nosymbol,
+                                      function_table, enable_lineno,
+                                      no_doc_sections)
+
+            msg = self.out_style.output_symbols(fname, symbols)
+            if msg:
+                yield fname, msg
+
+        if self.test_file:
+            self.test_file.write()
diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py
new file mode 100644
index 000000000000..a7aa6e1e4c1c
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_item.py
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# A class that will, eventually, encapsulate all of the parsed data that we
+# then pass into the output modules.
+#
+
+"""
+Data class to store a kernel-doc Item.
+"""
+
+class KdocItem:
+    """
+    A class that will, eventually, encapsulate all of the parsed data that we
+    then pass into the output modules.
+    """
+
+    def __init__(self, name, fname, type, start_line,
+                 **other_stuff):
+        self.name = name
+        self.fname = fname
+        self.type = type
+        self.declaration_start_line = start_line
+        self.sections = {}
+        self.sections_start_lines = {}
+        self.parameterlist = []
+        self.parameterdesc_start_lines = {}
+        self.parameterdescs = {}
+        self.parametertypes = {}
+
+        self.warnings = []
+
+        #
+        # Just save everything else into our own dict so that the output
+        # side can grab it directly as before.  As we move things into more
+        # structured data, this will, hopefully, fade away.
+        #
+        known_keys = {
+            'declaration_start_line',
+            'sections',
+            'sections_start_lines',
+            'parameterlist',
+            'parameterdesc_start_lines',
+            'parameterdescs',
+            'parametertypes',
+            'warnings',
+        }
+
+        self.other_stuff = {}
+        for k, v in other_stuff.items():
+            if k in known_keys:
+                setattr(self, k, v)           # real attribute
+            else:
+                self.other_stuff[k] = v
+
+    def get(self, key, default = None):
+        """
+        Get a value from optional keys.
+        """
+        return self.other_stuff.get(key, default)
+
+    def __getitem__(self, key):
+        return self.get(key)
+
+    def __repr__(self):
+        return f"KdocItem({self.name}, {self.fname}, {self.type}, {self.declaration_start_line})"
+
+    @classmethod
+    def from_dict(cls, d):
+        """Create a KdocItem from a plain dict."""
+
+        cp = d.copy()
+        name        = cp.pop('name', None)
+        fname       = cp.pop('fname', None)
+        type        = cp.pop('type', None)
+        start_line  = cp.pop('start_line', 1)
+        other_stuff = cp.pop('other_stuff', {})
+
+        # Everything that’s left goes straight to __init__
+        return cls(name, fname, type, start_line, **cp, **other_stuff)
+
+    #
+    # Tracking of section and parameter information.
+    #
+    def set_sections(self, sections, start_lines):
+        """
+        Set sections and start lines.
+        """
+        self.sections = sections
+        self.sections_start_lines = start_lines
+
+    def set_params(self, names, descs, types, starts):
+        """
+        Set parameter list: names, descriptions, types and start lines.
+        """
+        self.parameterlist = names
+        self.parameterdescs = descs
+        self.parametertypes = types
+        self.parameterdesc_start_lines = starts
diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
new file mode 100644
index 000000000000..de107ab4a281
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -0,0 +1,1160 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917
+
+"""
+Classes to implement output filters to print kernel-doc documentation.
+
+The implementation uses a virtual base class ``OutputFormat``. It
+contains dispatches to virtual methods, and some code to filter
+out output messages.
+
+The actual implementation is done on one separate class per each type
+of output, e.g. ``RestFormat`` and ``ManFormat`` classes.
+
+Currently, there are output classes for ReST and man/troff.
+"""
+
+import os
+import re
+from datetime import datetime
+
+from kdoc.kdoc_parser import KernelDoc, type_param
+from kdoc.kdoc_re import KernRe
+
+
+function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False)
+
+# match expressions used to find embedded type information
+type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False)
+type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False)
+type_func = KernRe(r"(\w+)\(\)", cache=False)
+type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
+
+# Special RST handling for func ptr params
+type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False)
+
+# Special RST handling for structs with func ptr params
+type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False)
+
+type_env = KernRe(r"(\$\w+)", cache=False)
+type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False)
+type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False)
+type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False)
+type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False)
+type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False)
+type_fallback = KernRe(r"\&([_\w]+)", cache=False)
+type_member_func = type_member + KernRe(r"\(\)", cache=False)
+
+
+class OutputFormat:
+    """
+    Base class for OutputFormat. If used as-is, it means that only
+    warnings will be displayed.
+    """
+
+    # output mode.
+    OUTPUT_ALL          = 0 #: Output all symbols and doc sections.
+    OUTPUT_INCLUDE      = 1 #: Output only specified symbols.
+    OUTPUT_EXPORTED     = 2 #: Output exported symbols.
+    OUTPUT_INTERNAL     = 3 #: Output non-exported symbols.
+
+    #: Highlights to be used in ReST format.
+    highlights = []
+
+    #: Blank line character.
+    blankline = ""
+
+    def __init__(self):
+        """Declare internal vars and set mode to ``OUTPUT_ALL``."""
+
+        self.out_mode = self.OUTPUT_ALL
+        self.enable_lineno = None
+        self.nosymbol = {}
+        self.symbol = None
+        self.function_table = None
+        self.config = None
+        self.no_doc_sections = False
+
+        self.data = ""
+
+    def set_config(self, config):
+        """
+        Setup global config variables used by both parser and output.
+        """
+
+        self.config = config
+
+    def set_filter(self, export, internal, symbol, nosymbol, function_table,
+                   enable_lineno, no_doc_sections):
+        """
+        Initialize filter variables according to the requested mode.
+
+        Only one choice is valid between export, internal and symbol.
+
+        The nosymbol filter can be used on all modes.
+        """
+
+        self.enable_lineno = enable_lineno
+        self.no_doc_sections = no_doc_sections
+        self.function_table = function_table
+
+        if symbol:
+            self.out_mode = self.OUTPUT_INCLUDE
+        elif export:
+            self.out_mode = self.OUTPUT_EXPORTED
+        elif internal:
+            self.out_mode = self.OUTPUT_INTERNAL
+        else:
+            self.out_mode = self.OUTPUT_ALL
+
+        if nosymbol:
+            self.nosymbol = set(nosymbol)
+
+
+    def highlight_block(self, block):
+        """
+        Apply the RST highlights to a sub-block of text.
+        """
+
+        for r, sub in self.highlights:
+            block = r.sub(sub, block)
+
+        return block
+
+    def out_warnings(self, args):
+        """
+        Output warnings for identifiers that will be displayed.
+        """
+
+        for log_msg in args.warnings:
+            self.config.warning(log_msg)
+
+    def check_doc(self, name, args):
+        """Check if DOC should be output."""
+
+        if self.no_doc_sections:
+            return False
+
+        if name in self.nosymbol:
+            return False
+
+        if self.out_mode == self.OUTPUT_ALL:
+            self.out_warnings(args)
+            return True
+
+        if self.out_mode == self.OUTPUT_INCLUDE:
+            if name in self.function_table:
+                self.out_warnings(args)
+                return True
+
+        return False
+
+    def check_declaration(self, dtype, name, args):
+        """
+        Checks if a declaration should be output or not based on the
+        filtering criteria.
+        """
+
+        if name in self.nosymbol:
+            return False
+
+        if self.out_mode == self.OUTPUT_ALL:
+            self.out_warnings(args)
+            return True
+
+        if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]:
+            if name in self.function_table:
+                return True
+
+        if self.out_mode == self.OUTPUT_INTERNAL:
+            if dtype != "function":
+                self.out_warnings(args)
+                return True
+
+            if name not in self.function_table:
+                self.out_warnings(args)
+                return True
+
+        return False
+
+    def msg(self, fname, name, args):
+        """
+        Handles a single entry from kernel-doc parser.
+        """
+
+        self.data = ""
+
+        dtype = args.type
+
+        if dtype == "doc":
+            self.out_doc(fname, name, args)
+            return self.data
+
+        if not self.check_declaration(dtype, name, args):
+            return self.data
+
+        if dtype == "function":
+            self.out_function(fname, name, args)
+            return self.data
+
+        if dtype == "enum":
+            self.out_enum(fname, name, args)
+            return self.data
+
+        if dtype == "var":
+            self.out_var(fname, name, args)
+            return self.data
+
+        if dtype == "typedef":
+            self.out_typedef(fname, name, args)
+            return self.data
+
+        if dtype in ["struct", "union"]:
+            self.out_struct(fname, name, args)
+            return self.data
+
+        # Warn if some type requires an output logic
+        self.config.log.warning("doesn't know how to output '%s' block",
+                                dtype)
+
+        return None
+
+    def output_symbols(self, fname, symbols):
+        """
+        Handles a set of KdocItem symbols.
+        """
+        self.set_symbols(symbols)
+
+        msg = ""
+        for arg in symbols:
+            m = self.msg(fname, arg.name, arg)
+
+            if m is None:
+                ln = arg.get("ln", 0)
+                dtype = arg.get('type', "")
+
+                self.config.log.warning("%s:%d Can't handle %s",
+                                        fname, ln, dtype)
+            else:
+                msg += m
+
+        return msg
+
+    # Virtual methods to be overridden by inherited classes
+    # At the base class, those do nothing.
+    def set_symbols(self, symbols):
+        """Get a list of all symbols from kernel_doc."""
+
+    def out_doc(self, fname, name, args):
+        """Outputs a DOC block."""
+
+    def out_function(self, fname, name, args):
+        """Outputs a function."""
+
+    def out_enum(self, fname, name, args):
+        """Outputs an enum."""
+
+    def out_var(self, fname, name, args):
+        """Outputs a variable."""
+
+    def out_typedef(self, fname, name, args):
+        """Outputs a typedef."""
+
+    def out_struct(self, fname, name, args):
+        """Outputs a struct."""
+
+
+class RestFormat(OutputFormat):
+    """Consts and functions used by ReST output."""
+
+    #: Highlights to be used in ReST format
+    highlights = [
+        (type_constant, r"``\1``"),
+        (type_constant2, r"``\1``"),
+
+        # Note: need to escape () to avoid func matching later
+        (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"),
+        (type_member, r":c:type:`\1\2\3 <\1>`"),
+        (type_fp_param, r"**\1\\(\\)**"),
+        (type_fp_param2, r"**\1\\(\\)**"),
+        (type_func, r"\1()"),
+        (type_enum, r":c:type:`\1 <\2>`"),
+        (type_struct, r":c:type:`\1 <\2>`"),
+        (type_typedef, r":c:type:`\1 <\2>`"),
+        (type_union, r":c:type:`\1 <\2>`"),
+
+        # in rst this can refer to any type
+        (type_fallback, r":c:type:`\1`"),
+        (type_param_ref, r"**\1\2**")
+    ]
+
+    blankline = "\n"
+
+    #: Sphinx literal block regex.
+    sphinx_literal = KernRe(r'^[^.].*::$', cache=False)
+
+    #: Sphinx code block regex.
+    sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False)
+
+    def __init__(self):
+        """
+        Creates class variables.
+
+        Not really mandatory, but it is a good coding style and makes
+        pylint happy.
+        """
+
+        super().__init__()
+        self.lineprefix = ""
+
+    def print_lineno(self, ln):
+        """Outputs a line number."""
+
+        if self.enable_lineno and ln is not None:
+            ln += 1
+            self.data += f".. LINENO {ln}\n"
+
+    def output_highlight(self, args):
+        """
+        Outputs a C symbol that may require being converted to ReST using
+        the self.highlights variable.
+        """
+
+        input_text = args
+        output = ""
+        in_literal = False
+        litprefix = ""
+        block = ""
+
+        for line in input_text.strip("\n").split("\n"):
+
+            # If we're in a literal block, see if we should drop out of it.
+            # Otherwise, pass the line straight through unmunged.
+            if in_literal:
+                if line.strip():  # If the line is not blank
+                    # If this is the first non-blank line in a literal block,
+                    # figure out the proper indent.
+                    if not litprefix:
+                        r = KernRe(r'^(\s*)')
+                        if r.match(line):
+                            litprefix = '^' + r.group(1)
+                        else:
+                            litprefix = ""
+
+                        output += line + "\n"
+                    elif not KernRe(litprefix).match(line):
+                        in_literal = False
+                    else:
+                        output += line + "\n"
+                else:
+                    output += line + "\n"
+
+            # Not in a literal block (or just dropped out)
+            if not in_literal:
+                block += line + "\n"
+                if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line):
+                    in_literal = True
+                    litprefix = ""
+                    output += self.highlight_block(block)
+                    block = ""
+
+        # Handle any remaining block
+        if block:
+            output += self.highlight_block(block)
+
+        # Print the output with the line prefix
+        for line in output.strip("\n").split("\n"):
+            self.data += self.lineprefix + line + "\n"
+
+    def out_section(self, args, out_docblock=False):
+        """
+        Outputs a block section.
+
+        This could use some work; it's used to output the DOC: sections, and
+        starts by putting out the name of the doc section itself, but that
+        tends to duplicate a header already in the template file.
+        """
+        for section, text in args.sections.items():
+            # Skip sections that are in the nosymbol_table
+            if section in self.nosymbol:
+                continue
+
+            if out_docblock:
+                if not self.out_mode == self.OUTPUT_INCLUDE:
+                    self.data += f".. _{section}:\n\n"
+                    self.data += f'{self.lineprefix}**{section}**\n\n'
+            else:
+                self.data += f'{self.lineprefix}**{section}**\n\n'
+
+            self.print_lineno(args.sections_start_lines.get(section, 0))
+            self.output_highlight(text)
+            self.data += "\n"
+        self.data += "\n"
+
+    def out_doc(self, fname, name, args):
+        if not self.check_doc(name, args):
+            return
+        self.out_section(args, out_docblock=True)
+
+    def out_function(self, fname, name, args):
+
+        oldprefix = self.lineprefix
+        signature = ""
+
+        func_macro = args.get('func_macro', False)
+        if func_macro:
+            signature = name
+        else:
+            if args.get('functiontype'):
+                signature = args['functiontype'] + " "
+            signature += name + " ("
+
+        ln = args.declaration_start_line
+        count = 0
+        for parameter in args.parameterlist:
+            if count != 0:
+                signature += ", "
+            count += 1
+            dtype = args.parametertypes.get(parameter, "")
+
+            if function_pointer.search(dtype):
+                signature += function_pointer.group(1) + parameter + function_pointer.group(3)
+            else:
+                signature += dtype
+
+        if not func_macro:
+            signature += ")"
+
+        self.print_lineno(ln)
+        if args.get('typedef') or not args.get('functiontype'):
+            self.data += f".. c:macro:: {name}\n\n"
+
+            if args.get('typedef'):
+                self.data += "   **Typedef**: "
+                self.lineprefix = ""
+                self.output_highlight(args.get('purpose', ""))
+                self.data += "\n\n**Syntax**\n\n"
+                self.data += f"  ``{signature}``\n\n"
+            else:
+                self.data += f"``{signature}``\n\n"
+        else:
+            self.data += f".. c:function:: {signature}\n\n"
+
+        if not args.get('typedef'):
+            self.print_lineno(ln)
+            self.lineprefix = "   "
+            self.output_highlight(args.get('purpose', ""))
+            self.data += "\n"
+
+        # Put descriptive text into a container (HTML <div>) to help set
+        # function prototypes apart
+        self.lineprefix = "  "
+
+        if args.parameterlist:
+            self.data += ".. container:: kernelindent\n\n"
+            self.data += f"{self.lineprefix}**Parameters**\n\n"
+
+        for parameter in args.parameterlist:
+            parameter_name = KernRe(r'\[.*').sub('', parameter)
+            dtype = args.parametertypes.get(parameter, "")
+
+            if dtype:
+                self.data += f"{self.lineprefix}``{dtype}``\n"
+            else:
+                self.data += f"{self.lineprefix}``{parameter}``\n"
+
+            self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0))
+
+            self.lineprefix = "    "
+            if parameter_name in args.parameterdescs and \
+               args.parameterdescs[parameter_name] != KernelDoc.undescribed:
+
+                self.output_highlight(args.parameterdescs[parameter_name])
+                self.data += "\n"
+            else:
+                self.data += f"{self.lineprefix}*undescribed*\n\n"
+            self.lineprefix = "  "
+
+        self.out_section(args)
+        self.lineprefix = oldprefix
+
+    def out_enum(self, fname, name, args):
+
+        oldprefix = self.lineprefix
+        ln = args.declaration_start_line
+
+        self.data += f"\n\n.. c:enum:: {name}\n\n"
+
+        self.print_lineno(ln)
+        self.lineprefix = "  "
+        self.output_highlight(args.get('purpose', ''))
+        self.data += "\n"
+
+        self.data += ".. container:: kernelindent\n\n"
+        outer = self.lineprefix + "  "
+        self.lineprefix = outer + "  "
+        self.data += f"{outer}**Constants**\n\n"
+
+        for parameter in args.parameterlist:
+            self.data += f"{outer}``{parameter}``\n"
+
+            if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed:
+                self.output_highlight(args.parameterdescs[parameter])
+            else:
+                self.data += f"{self.lineprefix}*undescribed*\n\n"
+            self.data += "\n"
+
+        self.lineprefix = oldprefix
+        self.out_section(args)
+
+    def out_var(self, fname, name, args):
+        oldprefix = self.lineprefix
+        ln = args.declaration_start_line
+        full_proto = args.other_stuff.get("full_proto")
+        if not full_proto:
+            raise KeyError(f"Can't find full proto for {name} variable")
+
+        self.lineprefix = "  "
+
+        self.data += f"\n\n.. c:macro:: {name}\n\n{self.lineprefix}``{full_proto}``\n\n"
+
+        self.print_lineno(ln)
+        self.output_highlight(args.get('purpose', ''))
+        self.data += "\n"
+
+        if args.other_stuff["default_val"]:
+            self.data += f'{self.lineprefix}**Initialization**\n\n'
+            self.output_highlight(f'default: ``{args.other_stuff["default_val"]}``')
+
+        self.out_section(args)
+
+    def out_typedef(self, fname, name, args):
+
+        oldprefix = self.lineprefix
+        ln = args.declaration_start_line
+
+        self.data += f"\n\n.. c:type:: {name}\n\n"
+
+        self.print_lineno(ln)
+        self.lineprefix = "   "
+
+        self.output_highlight(args.get('purpose', ''))
+
+        self.data += "\n"
+
+        self.lineprefix = oldprefix
+        self.out_section(args)
+
+    def out_struct(self, fname, name, args):
+
+        purpose = args.get('purpose', "")
+        declaration = args.get('definition', "")
+        dtype = args.type
+        ln = args.declaration_start_line
+
+        self.data += f"\n\n.. c:{dtype}:: {name}\n\n"
+
+        self.print_lineno(ln)
+
+        oldprefix = self.lineprefix
+        self.lineprefix += "  "
+
+        self.output_highlight(purpose)
+        self.data += "\n"
+
+        self.data += ".. container:: kernelindent\n\n"
+        self.data += f"{self.lineprefix}**Definition**::\n\n"
+
+        self.lineprefix = self.lineprefix + "  "
+
+        declaration = declaration.replace("\t", self.lineprefix)
+
+        self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n"
+        self.data += f"{declaration}{self.lineprefix}" + "};\n\n"
+
+        self.lineprefix = "  "
+        self.data += f"{self.lineprefix}**Members**\n\n"
+        for parameter in args.parameterlist:
+            if not parameter or parameter.startswith("#"):
+                continue
+
+            parameter_name = parameter.split("[", maxsplit=1)[0]
+
+            if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed:
+                continue
+
+            self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0))
+
+            self.data += f"{self.lineprefix}``{parameter}``\n"
+
+            self.lineprefix = "    "
+            self.output_highlight(args.parameterdescs[parameter_name])
+            self.lineprefix = "  "
+
+            self.data += "\n"
+
+        self.data += "\n"
+
+        self.lineprefix = oldprefix
+        self.out_section(args)
+
+
+class ManFormat(OutputFormat):
+    """
+    Consts and functions used by man pages output.
+
+    This class has one mandatory parameter and some optional ones, which
+    are needed to define the title header contents:
+
+    ``modulename``
+        Defines the module name to be used at the troff ``.TH`` output.
+
+        This argument is optional. If not specified, it will be filled
+        with the directory which contains the documented file.
+
+    ``section``
+        Usually a numeric value from 0 to 9, but man pages also accept
+        some strings like "p".
+
+        Defauls to ``9``
+
+    ``manual``
+        Defaults to ``Kernel API Manual``.
+
+    The above controls the output of teh corresponding fields on troff
+    title headers, which will be filled like this::
+
+        .TH "{name}" {section} "{date}" "{modulename}" "{manual}"
+
+    where ``name``` will match the API symbol name, and ``date`` will be
+    either the date where the Kernel was compiled or the current date
+    """
+
+    highlights = (
+        (type_constant, r"\1"),
+        (type_constant2, r"\1"),
+        (type_func, r"\\fB\1\\fP"),
+        (type_enum, r"\\fI\1\\fP"),
+        (type_struct, r"\\fI\1\\fP"),
+        (type_typedef, r"\\fI\1\\fP"),
+        (type_union, r"\\fI\1\\fP"),
+        (type_param, r"\\fI\1\\fP"),
+        (type_param_ref, r"\\fI\1\2\\fP"),
+        (type_member, r"\\fI\1\2\3\\fP"),
+        (type_fallback, r"\\fI\1\\fP")
+    )
+    blankline = ""
+
+    #: Allowed timestamp formats.
+    date_formats = [
+        "%a %b %d %H:%M:%S %Z %Y",
+        "%a %b %d %H:%M:%S %Y",
+        "%Y-%m-%d",
+        "%b %d %Y",
+        "%B %d %Y",
+        "%m %d %Y",
+    ]
+
+    def modulename(self, args):
+        if self._modulename:
+            return self._modulename
+
+        return os.path.dirname(args.fname)
+
+    def emit_th(self, name, args):
+        """Emit a title header line."""
+        title = name.strip()
+        module = self.modulename(args)
+
+        self.data += f'.TH "{title}" {self.section} "{self.date}" '
+        self.data += f'"{module}" "{self.manual}"\n'
+
+    def __init__(self, modulename=None, section="9", manual="Kernel API Manual"):
+        """
+        Creates class variables.
+
+        Not really mandatory, but it is a good coding style and makes
+        pylint happy.
+        """
+
+        super().__init__()
+
+        self._modulename = modulename
+        self.section = section
+        self.manual = manual
+
+        self.symbols = []
+
+        dt = None
+        tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP")
+        if tstamp:
+            for fmt in self.date_formats:
+                try:
+                    dt = datetime.strptime(tstamp, fmt)
+                    break
+                except ValueError:
+                    pass
+
+        if not dt:
+            dt = datetime.now()
+
+        self.date = dt.strftime("%B %Y")
+
+    def arg_name(self, args, name):
+        """
+        Return the name that will be used for the man page.
+
+        As we may have the same name on different namespaces,
+        prepend the data type for all types except functions and typedefs.
+
+        The doc section is special: it uses the modulename.
+        """
+
+        dtype = args.type
+
+        if dtype == "doc":
+            return name
+#            return os.path.basename(self.modulename(args))
+
+        if dtype in ["function", "typedef"]:
+            return name
+
+        return f"{dtype} {name}"
+
+    def set_symbols(self, symbols):
+        """
+        Get a list of all symbols from kernel_doc.
+
+        Man pages will uses it to add a SEE ALSO section with other
+        symbols at the same file.
+        """
+        self.symbols = symbols
+
+    def out_tail(self, fname, name, args):
+        """Adds a tail for all man pages."""
+
+        # SEE ALSO section
+        self.data += f'.SH "SEE ALSO"' + "\n.PP\n"
+        self.data += (f"Kernel file \\fB{args.fname}\\fR\n")
+        if len(self.symbols) >= 2:
+            cur_name = self.arg_name(args, name)
+
+            related = []
+            for arg in self.symbols:
+                out_name = self.arg_name(arg, arg.name)
+
+                if cur_name == out_name:
+                    continue
+
+                related.append(f"\\fB{out_name}\\fR(9)")
+
+            self.data += ",\n".join(related) + "\n"
+
+        # TODO: does it make sense to add other sections? Maybe
+        # REPORTING ISSUES? LICENSE?
+
+    def msg(self, fname, name, args):
+        """
+        Handles a single entry from kernel-doc parser.
+
+        Add a tail at the end of man pages output.
+        """
+        super().msg(fname, name, args)
+        self.out_tail(fname, name, args)
+
+        return self.data
+
+    def emit_table(self, colspec_row, rows):
+
+        if not rows:
+            return ""
+
+        out = ""
+        colspec = "\t".join(["l"] * len(rows[0]))
+
+        out += "\n.TS\n"
+        out += "box;\n"
+        out += f"{colspec}.\n"
+
+        if colspec_row:
+            out_row = []
+
+            for text in colspec_row:
+                out_row.append(f"\\fB{text}\\fP")
+
+            out += "\t".join(out_row) + "\n_\n"
+
+        for r in rows:
+            out += "\t".join(r) + "\n"
+
+        out += ".TE\n"
+
+        return out
+
+    def grid_table(self, lines, start):
+        """
+        Ancillary function to help handling a grid table inside the text.
+        """
+
+        i = start + 1
+        rows = []
+        colspec_row = None
+
+        while i < len(lines):
+            line = lines[i]
+
+            if KernRe(r"^\s*\|.*\|\s*$").match(line):
+                parts = []
+
+                for p in line.strip('|').split('|'):
+                    parts.append(p.strip())
+
+                rows.append(parts)
+
+            elif KernRe(r'^\+\=[\+\=]+\+\s*$').match(line):
+                if rows and rows[0]:
+                    if not colspec_row:
+                        colspec_row = [""] * len(rows[0])
+
+                    for j in range(0, len(rows[0])):
+                        content = []
+                        for row in rows:
+                            content.append(row[j])
+
+                        colspec_row[j] = " ".join(content)
+
+                    rows = []
+
+            elif KernRe(r"^\s*\+[-+]+\+.*$").match(line):
+                pass
+
+            else:
+                break
+
+            i += 1
+
+        return i, self.emit_table(colspec_row, rows)
+
+    def simple_table(self, lines, start):
+        """
+        Ancillary function to help handling a simple table inside the text.
+        """
+
+        i = start
+        rows = []
+        colspec_row = None
+
+        pos = []
+        for m in KernRe(r'\=+').finditer(lines[i]):
+            pos.append((m.start(), m.end() - 1))
+
+        i += 1
+        while i < len(lines):
+            line = lines[i]
+
+            if KernRe(r"^\s*[\=]+[ \t\=]+$").match(line):
+                i += 1
+                break
+
+            elif KernRe(r'^[\s=]+$').match(line):
+                if rows and rows[0]:
+                    if not colspec_row:
+                        colspec_row = [""] * len(rows[0])
+
+                    for j in range(0, len(rows[0])):
+                        content = []
+                        for row in rows:
+                            content.append(row[j])
+
+                        colspec_row[j] = " ".join(content)
+
+                    rows = []
+
+            else:
+                row = [""] * len(pos)
+
+                for j in range(0, len(pos)):
+                    start, end = pos[j]
+
+                    row[j] = line[start:end].strip()
+
+                rows.append(row)
+
+            i += 1
+
+        return i, self.emit_table(colspec_row, rows)
+
+    def code_block(self, lines, start):
+        """
+        Ensure that code blocks won't be messed up at the output.
+
+        By default, troff join lines at the same paragraph. Disable it,
+        on code blocks.
+        """
+
+        line = lines[start]
+
+        if "code-block" in line:
+            out = "\n.nf\n"
+        elif line.startswith("..") and line.endswith("::"):
+            #
+            # Handle note, warning, error, ... markups
+            #
+            line = line[2:-1].strip().upper()
+            out = f"\n.nf\n\\fB{line}\\fP\n"
+        elif line.endswith("::"):
+            out = line[:-1]
+            out += "\n.nf\n"
+        else:
+            # Just in case. Should never happen in practice
+            out = "\n.nf\n"
+
+        i = start + 1
+        ident = None
+
+        while i < len(lines):
+            line = lines[i]
+
+            m = KernRe(r"\S").match(line)
+            if not m:
+                out += line + "\n"
+                i += 1
+                continue
+
+            pos = m.start()
+            if not ident:
+                if pos > 0:
+                    ident = pos
+                else:
+                    out += "\n.fi\n"
+                    if i > start + 1:
+                        return i - 1, out
+                    else:
+                        # Just in case. Should never happen in practice
+                        return i, out
+
+            if pos >= ident:
+                out += line + "\n"
+                i += 1
+                continue
+
+            break
+
+        out += "\n.fi\n"
+        return i, out
+
+    def output_highlight(self, block):
+        """
+        Outputs a C symbol that may require being highlighted with
+        self.highlights variable using troff syntax.
+        """
+
+        contents = self.highlight_block(block)
+
+        if isinstance(contents, list):
+            contents = "\n".join(contents)
+
+        lines = contents.strip("\n").split("\n")
+        i = 0
+
+        while i < len(lines):
+            org_line = lines[i]
+
+            line = KernRe(r"^\s*").sub("", org_line)
+
+            if line:
+                if KernRe(r"^\+\-[-+]+\+.*$").match(line):
+                    i, text = self.grid_table(lines, i)
+                    self.data += text
+                    continue
+
+                if KernRe(r"^\=+[ \t]\=[ \t\=]+$").match(line):
+                    i, text = self.simple_table(lines, i)
+                    self.data += text
+                    continue
+
+                if line.endswith("::") or KernRe(r"\.\.\s+code-block.*::").match(line):
+                    i, text = self.code_block(lines, i)
+                    self.data += text
+                    continue
+
+                if line[0] == ".":
+                    self.data += "\\&" + line + "\n"
+                    i += 1
+                    continue
+
+                #
+                # Handle lists
+                #
+                line = KernRe(r'^[-*]\s+').sub(r'.IP \[bu]\n', line)
+                line = KernRe(r'^(\d+|a-z)[\.\)]\s+').sub(r'.IP \1\n', line)
+            else:
+                line = ".PP\n"
+
+            i += 1
+
+            self.data += line + "\n"
+
+    def out_doc(self, fname, name, args):
+        if not self.check_doc(name, args):
+            return
+
+        out_name = self.arg_name(args, name)
+
+        self.emit_th(out_name, args)
+
+        for section, text in args.sections.items():
+            self.data += f'.SH "{section}"' + "\n"
+            self.output_highlight(text)
+
+    def out_function(self, fname, name, args):
+
+        out_name = self.arg_name(args, name)
+
+        self.emit_th(out_name, args)
+
+        self.data += ".SH NAME\n"
+        self.data += f"{name} \\- {args['purpose']}\n"
+
+        self.data += ".SH SYNOPSIS\n"
+        if args.get('functiontype', ''):
+            self.data += f'.B "{args["functiontype"]}" {name}' + "\n"
+        else:
+            self.data += f'.B "{name}' + "\n"
+
+        count = 0
+        parenth = "("
+        post = ","
+
+        for parameter in args.parameterlist:
+            if count == len(args.parameterlist) - 1:
+                post = ");"
+
+            dtype = args.parametertypes.get(parameter, "")
+            if function_pointer.match(dtype):
+                # Pointer-to-function
+                self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n"
+            else:
+                dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype)
+
+                self.data += f'.BI "{parenth}{dtype}"  "{post}"' + "\n"
+            count += 1
+            parenth = ""
+
+        if args.parameterlist:
+            self.data += ".SH ARGUMENTS\n"
+
+        for parameter in args.parameterlist:
+            parameter_name = re.sub(r'\[.*', '', parameter)
+
+            self.data += f'.IP "{parameter}" 12' + "\n"
+            self.output_highlight(args.parameterdescs.get(parameter_name, ""))
+
+        for section, text in args.sections.items():
+            self.data += f'.SH "{section.upper()}"' + "\n"
+            self.output_highlight(text)
+
+    def out_enum(self, fname, name, args):
+        out_name = self.arg_name(args, name)
+
+        self.emit_th(out_name, args)
+
+        self.data += ".SH NAME\n"
+        self.data += f"enum {name} \\- {args['purpose']}\n"
+
+        self.data += ".SH SYNOPSIS\n"
+        self.data += f"enum {name}" + " {\n"
+
+        count = 0
+        for parameter in args.parameterlist:
+            self.data += f'.br\n.BI "    {parameter}"' + "\n"
+            if count == len(args.parameterlist) - 1:
+                self.data += "\n};\n"
+            else:
+                self.data += ", \n.br\n"
+
+            count += 1
+
+        self.data += ".SH Constants\n"
+
+        for parameter in args.parameterlist:
+            parameter_name = KernRe(r'\[.*').sub('', parameter)
+            self.data += f'.IP "{parameter}" 12' + "\n"
+            self.output_highlight(args.parameterdescs.get(parameter_name, ""))
+
+        for section, text in args.sections.items():
+            self.data += f'.SH "{section}"' + "\n"
+            self.output_highlight(text)
+
+    def out_var(self, fname, name, args):
+        out_name = self.arg_name(args, name)
+        full_proto = args.other_stuff["full_proto"]
+
+        self.emit_th(out_name, args)
+
+        self.data += ".SH NAME\n"
+        self.data += f"{name} \\- {args['purpose']}\n"
+
+        self.data += ".SH SYNOPSIS\n"
+        self.data += f"{full_proto}\n"
+
+        if args.other_stuff["default_val"]:
+            self.data += f'.SH "Initialization"' + "\n"
+            self.output_highlight(f'default: {args.other_stuff["default_val"]}')
+
+        for section, text in args.sections.items():
+            self.data += f'.SH "{section}"' + "\n"
+            self.output_highlight(text)
+
+    def out_typedef(self, fname, name, args):
+        module = self.modulename(args)
+        purpose = args.get('purpose')
+        out_name = self.arg_name(args, name)
+
+        self.emit_th(out_name, args)
+
+        self.data += ".SH NAME\n"
+        self.data += f"typedef {name} \\- {purpose}\n"
+
+        for section, text in args.sections.items():
+            self.data += f'.SH "{section}"' + "\n"
+            self.output_highlight(text)
+
+    def out_struct(self, fname, name, args):
+        module = self.modulename(args)
+        purpose = args.get('purpose')
+        definition = args.get('definition')
+        out_name = self.arg_name(args, name)
+
+        self.emit_th(out_name, args)
+
+        self.data += ".SH NAME\n"
+        self.data += f"{args.type} {name} \\- {purpose}\n"
+
+        # Replace tabs with two spaces and handle newlines
+        declaration = definition.replace("\t", "  ")
+        declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration)
+
+        self.data += ".SH SYNOPSIS\n"
+        self.data += f"{args.type} {name} " + "{" + "\n.br\n"
+        self.data += f'.BI "{declaration}\n' + "};\n.br\n\n"
+
+        self.data += ".SH Members\n"
+        for parameter in args.parameterlist:
+            if parameter.startswith("#"):
+                continue
+
+            parameter_name = re.sub(r"\[.*", "", parameter)
+
+            if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed:
+                continue
+
+            self.data += f'.IP "{parameter}" 12' + "\n"
+            self.output_highlight(args.parameterdescs.get(parameter_name))
+
+        for section, text in args.sections.items():
+            self.data += f'.SH "{section}"' + "\n"
+            self.output_highlight(text)
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
new file mode 100644
index 000000000000..c3f966da533e
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -0,0 +1,1679 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702
+
+"""
+Classes and functions related to reading a C language source or header FILE
+and extract embedded documentation comments from it.
+"""
+
+import sys
+import re
+from pprint import pformat
+
+from kdoc.c_lex import CTokenizer, tokenizer_set_log
+from kdoc.kdoc_re import KernRe
+from kdoc.kdoc_item import KdocItem
+
+#
+# Regular expressions used to parse kernel-doc markups at KernelDoc class.
+#
+# Let's declare them in lowercase outside any class to make it easier to
+# convert from the Perl script.
+#
+# As those are evaluated at the beginning, no need to cache them
+#
+
+# Allow whitespace at end of comment start.
+doc_start = KernRe(r'^/\*\*\s*$', cache=False)
+
+doc_end = KernRe(r'\*/', cache=False)
+doc_com = KernRe(r'\s*\*\s*', cache=False)
+doc_com_body = KernRe(r'\s*\* ?', cache=False)
+doc_decl = doc_com + KernRe(r'(\w+)', cache=False)
+
+# @params and a strictly limited set of supported section names
+# Specifically:
+#   Match @word:
+#         @...:
+#         @{section-name}:
+# while trying to not match literal block starts like "example::"
+#
+known_section_names = 'description|context|returns?|notes?|examples?'
+known_sections = KernRe(known_section_names, flags = re.I)
+doc_sect = doc_com + \
+    KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$',
+           flags=re.I, cache=False)
+
+doc_content = doc_com_body + KernRe(r'(.*)', cache=False)
+doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)
+doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)
+doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)
+doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@\s*[\w][\w\.]*\s*):\s*(.*)\s*\*/\s*$', cache=False)
+
+export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)
+export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False)
+
+type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
+
+#
+# Tests for the beginning of a kerneldoc block in its various forms.
+#
+doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)
+doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef|var)\b\s*(\w*)", cache = False)
+doc_begin_func = KernRe(str(doc_com) +			# initial " * '
+                        r"(?:\w+\s*\*\s*)?" + 		# type (not captured)
+                        r'(?:define\s+)?' + 		# possible "define" (not captured)
+                        r'(\w+)\s*(?:\(\w*\))?\s*' +	# name and optional "(...)"
+                        r'(?:[-:].*)?$',		# description (not captured)
+                        cache = False)
+
+#
+# Ancillary functions
+#
+
+multi_space = KernRe(r'\s\s+')
+def trim_whitespace(s):
+    """
+    A little helper to get rid of excess white space.
+    """
+    return multi_space.sub(' ', s.strip())
+
+def trim_private_members(text):
+    """
+    Remove ``struct``/``enum`` members that have been marked "private".
+    """
+
+    tokens = CTokenizer(text)
+    return str(tokens)
+
+class state:
+    """
+    States used by the parser's state machine.
+    """
+
+    # Parser states
+    NORMAL        = 0        #: Normal code.
+    NAME          = 1        #: Looking for function name.
+    DECLARATION   = 2        #: We have seen a declaration which might not be done.
+    BODY          = 3        #: The body of the comment.
+    SPECIAL_SECTION = 4      #: Doc section ending with a blank line.
+    PROTO         = 5        #: Scanning prototype.
+    DOCBLOCK      = 6        #: Documentation block.
+    INLINE_NAME   = 7        #: Gathering doc outside main block.
+    INLINE_TEXT   = 8	     #: Reading the body of inline docs.
+
+    #: Names for each parser state.
+    name = [
+        "NORMAL",
+        "NAME",
+        "DECLARATION",
+        "BODY",
+        "SPECIAL_SECTION",
+        "PROTO",
+        "DOCBLOCK",
+        "INLINE_NAME",
+        "INLINE_TEXT",
+    ]
+
+
+SECTION_DEFAULT = "Description"  #: Default section.
+
+class KernelEntry:
+    """
+    Encapsulates a Kernel documentation entry.
+    """
+
+    def __init__(self, config, fname, ln):
+        self.config = config
+        self.fname = fname
+
+        self._contents = []
+        self.prototype = ""
+
+        self.warnings = []
+
+        self.parameterlist = []
+        self.parameterdescs = {}
+        self.parametertypes = {}
+        self.parameterdesc_start_lines = {}
+
+        self.sections_start_lines = {}
+        self.sections = {}
+
+        self.anon_struct_union = False
+
+        self.leading_space = None
+
+        self.fname = fname
+
+        # State flags
+        self.brcount = 0
+        self.declaration_start_line = ln + 1
+
+    #
+    # Management of section contents
+    #
+    def add_text(self, text):
+        """Add a new text to the entry contents list."""
+        self._contents.append(text)
+
+    def contents(self):
+        """Returns a string with all content texts that were added."""
+        return '\n'.join(self._contents) + '\n'
+
+    # TODO: rename to emit_message after removal of kernel-doc.pl
+    def emit_msg(self, ln, msg, *, warning=True):
+        """Emit a message."""
+
+        log_msg = f"{self.fname}:{ln} {msg}"
+
+        if not warning:
+            self.config.log.info(log_msg)
+            return
+
+        # Delegate warning output to output logic, as this way it
+        # will report warnings/info only for symbols that are output
+
+        self.warnings.append(log_msg)
+        return
+
+    def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False):
+        """
+        Begin a new section.
+        """
+        if dump:
+            self.dump_section(start_new = True)
+        self.section = title
+        self.new_start_line = line_no
+
+    def dump_section(self, start_new=True):
+        """
+        Dumps section contents to arrays/hashes intended for that purpose.
+        """
+        #
+        # If we have accumulated no contents in the default ("description")
+        # section, don't bother.
+        #
+        if self.section == SECTION_DEFAULT and not self._contents:
+            return
+        name = self.section
+        contents = self.contents()
+
+        if type_param.match(name):
+            name = type_param.group(1)
+
+            self.parameterdescs[name] = contents
+            self.parameterdesc_start_lines[name] = self.new_start_line
+
+            self.new_start_line = 0
+
+        else:
+            if name in self.sections and self.sections[name] != "":
+                # Only warn on user-specified duplicate section names
+                if name != SECTION_DEFAULT:
+                    self.emit_msg(self.new_start_line,
+                                  f"duplicate section name '{name}'")
+                # Treat as a new paragraph - add a blank line
+                self.sections[name] += '\n' + contents
+            else:
+                self.sections[name] = contents
+                self.sections_start_lines[name] = self.new_start_line
+                self.new_start_line = 0
+
+#        self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
+
+        if start_new:
+            self.section = SECTION_DEFAULT
+            self._contents = []
+
+python_warning = False
+
+class KernelDoc:
+    """
+    Read a C language source or header FILE and extract embedded
+    documentation comments.
+    """
+
+    #: Name of context section.
+    section_context = "Context"
+
+    #: Name of return section.
+    section_return = "Return"
+
+    #: String to write when a parameter is not described.
+    undescribed = "-- undescribed --"
+
+    def __init__(self, config, fname, xforms, store_src=False):
+        """Initialize internal variables"""
+
+        self.fname = fname
+        self.config = config
+        self.xforms = xforms
+        self.store_src = store_src
+
+        tokenizer_set_log(self.config.log, f"{self.fname}: CMatch: ")
+
+        # Initial state for the state machines
+        self.state = state.NORMAL
+
+        # Store entry currently being processed
+        self.entry = None
+
+        # Place all potential outputs into an array
+        self.entries = []
+
+        #
+        # We need Python 3.7 for its "dicts remember the insertion
+        # order" guarantee
+        #
+        global python_warning
+        if (not python_warning and
+            sys.version_info.major == 3 and sys.version_info.minor < 7):
+
+            self.emit_msg(0,
+                          'Python 3.7 or later is required for correct results')
+            python_warning = True
+
+    def emit_msg(self, ln, msg, *, warning=True):
+        """Emit a message"""
+
+        if self.entry:
+            self.entry.emit_msg(ln, msg, warning=warning)
+            return
+
+        log_msg = f"{self.fname}:{ln} {msg}"
+
+        if warning:
+            self.config.log.warning(log_msg)
+        else:
+            self.config.log.info(log_msg)
+
+    def dump_section(self, start_new=True):
+        """
+        Dump section contents to arrays/hashes intended for that purpose.
+        """
+
+        if self.entry:
+            self.entry.dump_section(start_new)
+
+    # TODO: rename it to store_declaration after removal of kernel-doc.pl
+    def output_declaration(self, dtype, name, **args):
+        """
+        Store the entry into an entry array.
+
+        The actual output and output filters will be handled elsewhere.
+        """
+
+        item = KdocItem(name, self.fname, dtype,
+                        self.entry.declaration_start_line, **args)
+        item.warnings = self.entry.warnings
+
+        # Drop empty sections
+        # TODO: improve empty sections logic to emit warnings
+        sections = self.entry.sections
+        for section in ["Description", "Return"]:
+            if section in sections and not sections[section].rstrip():
+                del sections[section]
+        item.set_sections(sections, self.entry.sections_start_lines)
+        item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
+                        self.entry.parametertypes,
+                        self.entry.parameterdesc_start_lines)
+        self.entries.append(item)
+
+        self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args))
+
+    def emit_unused_warnings(self):
+        """
+        When the parser fails to produce a valid entry, it places some
+        warnings under `entry.warnings` that will be discarded when resetting
+        the state.
+
+        Ensure that those warnings are not lost.
+
+        .. note::
+
+              Because we are calling `config.warning()` here, those
+              warnings are not filtered by the `-W` parameters: they will all
+              be produced even when `-Wreturn`, `-Wshort-desc`, and/or
+              `-Wcontents-before-sections` are used.
+
+              Allowing those warnings to be filtered is complex, because it
+              would require storing them in a buffer and then filtering them
+              during the output step of the code, depending on the
+              selected symbols.
+        """
+        if self.entry and self.entry not in self.entries:
+            for log_msg in self.entry.warnings:
+                self.config.warning(log_msg)
+
+    def reset_state(self, ln):
+        """
+        Ancillary routine to create a new entry. It initializes all
+        variables used by the state machine.
+        """
+
+        self.emit_unused_warnings()
+
+        self.entry = KernelEntry(self.config, self.fname, ln)
+
+        # State flags
+        self.state = state.NORMAL
+
+    def push_parameter(self, ln, decl_type, param, dtype,
+                       org_arg, declaration_name):
+        """
+        Store parameters and their descriptions at self.entry.
+        """
+
+        if self.entry.anon_struct_union and dtype == "" and param == "}":
+            return  # Ignore the ending }; from anonymous struct/union
+
+        self.entry.anon_struct_union = False
+
+        param = KernRe(r'[\[\)].*').sub('', param, count=1)
+
+        #
+        # Look at various "anonymous type" cases.
+        #
+        if dtype == '':
+            if param.endswith("..."):
+                if len(param) > 3: # there is a name provided, use that
+                    param = param[:-3]
+                if not self.entry.parameterdescs.get(param):
+                    self.entry.parameterdescs[param] = "variable arguments"
+
+            elif (not param) or param == "void":
+                param = "void"
+                self.entry.parameterdescs[param] = "no arguments"
+
+            elif param in ["struct", "union"]:
+                # Handle unnamed (anonymous) union or struct
+                dtype = param
+                param = "{unnamed_" + param + "}"
+                self.entry.parameterdescs[param] = "anonymous\n"
+                self.entry.anon_struct_union = True
+
+        # Warn if parameter has no description
+        # (but ignore ones starting with # as these are not parameters
+        # but inline preprocessor statements)
+        if param not in self.entry.parameterdescs and not param.startswith("#"):
+            self.entry.parameterdescs[param] = self.undescribed
+
+            if "." not in param:
+                if decl_type == 'function':
+                    dname = f"{decl_type} parameter"
+                else:
+                    dname = f"{decl_type} member"
+
+                self.emit_msg(ln,
+                              f"{dname} '{param}' not described in '{declaration_name}'")
+
+        # Strip spaces from param so that it is one continuous string on
+        # parameterlist. This fixes a problem where check_sections()
+        # cannot find a parameter like "addr[6 + 2]" because it actually
+        # appears as "addr[6", "+", "2]" on the parameter list.
+        # However, it's better to maintain the param string unchanged for
+        # output, so just weaken the string compare in check_sections()
+        # to ignore "[blah" in a parameter string.
+
+        self.entry.parameterlist.append(param)
+        org_arg = KernRe(r'\s\s+').sub(' ', org_arg)
+        self.entry.parametertypes[param] = org_arg
+
+
+    def create_parameter_list(self, ln, decl_type, args,
+                              splitter, declaration_name):
+        """
+        Creates a list of parameters, storing them at self.entry.
+        """
+
+        # temporarily replace all commas inside function pointer definition
+        arg_expr = KernRe(r'(\([^\),]+),')
+        while arg_expr.search(args):
+            args = arg_expr.sub(r"\1#", args)
+
+        for arg in args.split(splitter):
+            # Ignore argument attributes
+            arg = KernRe(r'\sPOS0?\s').sub(' ', arg)
+
+            # Replace '[at_least ' with '[static '.  This allows sphinx to parse
+            # array parameter declarations like 'char A[at_least 4]', where
+            # 'at_least' is #defined to 'static' by the kernel headers.
+            arg = arg.replace('[at_least ', '[static ')
+
+            # Strip leading/trailing spaces
+            arg = arg.strip()
+            arg = KernRe(r'\s+').sub(' ', arg, count=1)
+
+            if arg.startswith('#'):
+                # Treat preprocessor directive as a typeless variable just to fill
+                # corresponding data structures "correctly". Catch it later in
+                # output_* subs.
+
+                # Treat preprocessor directive as a typeless variable
+                self.push_parameter(ln, decl_type, arg, "",
+                                    "", declaration_name)
+            #
+            # The pointer-to-function case.
+            #
+            elif KernRe(r'\(.+\)\s*\(').search(arg):
+                arg = arg.replace('#', ',')
+                r = KernRe(r'[^\(]+\(\*?\s*'  # Everything up to "(*"
+                           r'([\w\[\].]*)'    # Capture the name and possible [array]
+                           r'\s*\)')	      # Make sure the trailing ")" is there
+                if r.match(arg):
+                    param = r.group(1)
+                else:
+                    self.emit_msg(ln, f"Invalid param: {arg}")
+                    param = arg
+                dtype = arg.replace(param, '')
+                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
+            #
+            # The array-of-pointers case.  Dig the parameter name out from the middle
+            # of the declaration.
+            #
+            elif KernRe(r'\(.+\)\s*\[').search(arg):
+                r = KernRe(r'[^\(]+\(\s*\*\s*'		# Up to "(" and maybe "*"
+                           r'([\w.]*?)'			# The actual pointer name
+                           r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion]
+                if r.match(arg):
+                    param = r.group(1)
+                else:
+                    self.emit_msg(ln, f"Invalid param: {arg}")
+                    param = arg
+                dtype = arg.replace(param, '')
+                self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
+            elif arg:
+                #
+                # Clean up extraneous spaces and split the string at commas; the first
+                # element of the resulting list will also include the type information.
+                #
+                arg = KernRe(r'\s*:\s*').sub(":", arg)
+                arg = KernRe(r'\s*\[').sub('[', arg)
+                args = KernRe(r'\s*,\s*').split(arg)
+                args[0] = re.sub(r'(\*+)\s*', r' \1', args[0])
+                #
+                # args[0] has a string of "type a".  If "a" includes an [array]
+                # declaration, we want to not be fooled by any white space inside
+                # the brackets, so detect and handle that case specially.
+                #
+                r = KernRe(r'^([^[\]]*\s+)(.*)$')
+                if r.match(args[0]):
+                    args[0] = r.group(2)
+                    dtype = r.group(1)
+                else:
+                    # No space in args[0]; this seems wrong but preserves previous behavior
+                    dtype = ''
+
+                bitfield_re = KernRe(r'(.*?):(\w+)')
+                for param in args:
+                    #
+                    # For pointers, shift the star(s) from the variable name to the
+                    # type declaration.
+                    #
+                    r = KernRe(r'^(\*+)\s*(.*)')
+                    if r.match(param):
+                        self.push_parameter(ln, decl_type, r.group(2),
+                                            f"{dtype} {r.group(1)}",
+                                            arg, declaration_name)
+                    #
+                    # Perform a similar shift for bitfields.
+                    #
+                    elif bitfield_re.search(param):
+                        if dtype != "":  # Skip unnamed bit-fields
+                            self.push_parameter(ln, decl_type, bitfield_re.group(1),
+                                                f"{dtype}:{bitfield_re.group(2)}",
+                                                arg, declaration_name)
+                    else:
+                        self.push_parameter(ln, decl_type, param, dtype,
+                                            arg, declaration_name)
+
+    def check_sections(self, ln, decl_name, decl_type):
+        """
+        Check for errors inside sections, emitting warnings if not found
+        parameters are described.
+        """
+        for section in self.entry.sections:
+            if section not in self.entry.parameterlist and \
+               not known_sections.search(section):
+                if decl_type == 'function':
+                    dname = f"{decl_type} parameter"
+                else:
+                    dname = f"{decl_type} member"
+                self.emit_msg(ln,
+                              f"Excess {dname} '{section}' description in '{decl_name}'")
+
+    def check_return_section(self, ln, declaration_name, return_type):
+        """
+        If the function doesn't return void, warns about the lack of a
+        return description.
+        """
+
+        if not self.config.wreturn:
+            return
+
+        # Ignore an empty return type (It's a macro)
+        # Ignore functions with a "void" return type (but not "void *")
+        if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type):
+            return
+
+        if not self.entry.sections.get("Return", None):
+            self.emit_msg(ln,
+                          f"No description found for return value of '{declaration_name}'")
+
+    def split_struct_proto(self, proto):
+        """
+        Split apart a structure prototype; returns (struct|union, name,
+        members) or ``None``.
+        """
+
+        type_pattern = r'(struct|union)'
+        qualifiers = [
+            "__attribute__",
+            "__packed",
+            "__aligned",
+            "____cacheline_aligned_in_smp",
+            "____cacheline_aligned",
+        ]
+        definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?"
+
+        r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
+        if r.search(proto):
+            return (r.group(1), r.group(2), r.group(3))
+        else:
+            r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
+            if r.search(proto):
+                return (r.group(1), r.group(3), r.group(2))
+        return None
+
+    def rewrite_struct_members(self, members):
+        """
+        Process ``struct``/``union`` members from the most deeply nested
+        outward.
+
+        Rewrite the members of a ``struct`` or ``union`` for easier formatting
+        later on. Among other things, this function will turn a member like::
+
+          struct { inner_members; } foo;
+
+        into::
+
+          struct foo; inner_members;
+        """
+
+        #
+        # The trick is in the ``^{`` below - it prevents a match of an outer
+        # ``struct``/``union`` until the inner one has been munged
+        # (removing the ``{`` in the process).
+        #
+        struct_members = KernRe(r'(struct|union)'   # 0: declaration type
+                                r'([^\{\};]+)' 	    # 1: possible name
+                                r'(\{)'
+                                r'([^\{\}]*)'       # 3: Contents of declaration
+                                r'(\})'
+                                r'([^\{\};]*)(;)')  # 5: Remaining stuff after declaration
+        tuples = struct_members.findall(members)
+        while tuples:
+            for t in tuples:
+                newmember = ""
+                oldmember = "".join(t) # Reconstruct the original formatting
+                dtype, name, lbr, content, rbr, rest, semi = t
+                #
+                # Pass through each field name, normalizing the form and formatting.
+                #
+                for s_id in rest.split(','):
+                    s_id = s_id.strip()
+                    newmember += f"{dtype} {s_id}; "
+                    #
+                    # Remove bitfield/array/pointer info, getting the bare name.
+                    #
+                    s_id = KernRe(r'[:\[].*').sub('', s_id)
+                    s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
+                    #
+                    # Pass through the members of this inner structure/union.
+                    #
+                    for arg in content.split(';'):
+                        arg = arg.strip()
+                        #
+                        # Look for (type)(*name)(args) - pointer to function
+                        #
+                        r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
+                        if r.match(arg):
+                            dtype, name, extra = r.group(1), r.group(2), r.group(3)
+                            # Pointer-to-function
+                            if not s_id:
+                                # Anonymous struct/union
+                                newmember += f"{dtype}{name}{extra}; "
+                            else:
+                                newmember += f"{dtype}{s_id}.{name}{extra}; "
+                        #
+                        # Otherwise a non-function member.
+                        #
+                        else:
+                            #
+                            # Remove bitmap and array portions and spaces around commas
+                            #
+                            arg = KernRe(r':\s*\d+\s*').sub('', arg)
+                            arg = KernRe(r'\[.*\]').sub('', arg)
+                            arg = KernRe(r'\s*,\s*').sub(',', arg)
+                            #
+                            # Look for a normal decl - "type name[,name...]"
+                            #
+                            r = KernRe(r'(.*)\s+([\S+,]+)')
+                            if r.search(arg):
+                                for name in r.group(2).split(','):
+                                    name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name)
+                                    if not s_id:
+                                        # Anonymous struct/union
+                                        newmember += f"{r.group(1)} {name}; "
+                                    else:
+                                        newmember += f"{r.group(1)} {s_id}.{name}; "
+                            else:
+                                newmember += f"{arg}; "
+                #
+                # At the end of the s_id loop, replace the original declaration with
+                # the munged version.
+                #
+                members = members.replace(oldmember, newmember)
+            #
+            # End of the tuple loop - search again and see if there are outer members
+            # that now turn up.
+            #
+            tuples = struct_members.findall(members)
+        return members
+
+    def format_struct_decl(self, declaration):
+        """
+        Format the ``struct`` declaration into a standard form for inclusion
+        in the resulting docs.
+        """
+
+        #
+        # Insert newlines, get rid of extra spaces.
+        #
+        declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)
+        declaration = KernRe(r'\}\s+;').sub('};', declaration)
+        #
+        # Format inline enums with each member on its own line.
+        #
+        r = KernRe(r'(enum\s+\{[^\}]+),([^\n])')
+        while r.search(declaration):
+            declaration = r.sub(r'\1,\n\2', declaration)
+        #
+        # Now go through and supply the right number of tabs
+        # for each line.
+        #
+        def_args = declaration.split('\n')
+        level = 1
+        declaration = ""
+        for clause in def_args:
+            clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1)
+            if clause:
+                if '}' in clause and level > 1:
+                    level -= 1
+                if not clause.startswith('#'):
+                    declaration += "\t" * level
+                declaration += "\t" + clause + "\n"
+                if "{" in clause and "}" not in clause:
+                    level += 1
+        return declaration
+
+
+    def dump_struct(self, ln, proto, source):
+        """
+        Store an entry for a ``struct`` or ``union``
+        """
+        #
+        # Do the basic parse to get the pieces of the declaration.
+        #
+        source = source
+        proto = trim_private_members(proto)
+        struct_parts = self.split_struct_proto(proto)
+        if not struct_parts:
+            self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
+            return
+        decl_type, declaration_name, members = struct_parts
+
+        if self.entry.identifier != declaration_name:
+            self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. "
+                          f"Prototype was for {decl_type} {declaration_name} instead\n")
+            return
+        #
+        # Go through the list of members applying all of our transformations.
+        #
+        members = self.xforms.apply("struct", members)
+
+        #
+        # Deal with embedded struct and union members, and drop enums entirely.
+        #
+        declaration = members
+        members = self.rewrite_struct_members(members)
+        members = re.sub(r'(\{[^\{\}]*\})', '', members)
+        #
+        # Output the result and we are done.
+        #
+        self.create_parameter_list(ln, decl_type, members, ';',
+                                   declaration_name)
+        self.check_sections(ln, declaration_name, decl_type)
+        self.output_declaration(decl_type, declaration_name,
+                                source=source,
+                                definition=self.format_struct_decl(declaration),
+                                purpose=self.entry.declaration_purpose)
+
+    def dump_enum(self, ln, proto, source):
+        """
+        Store an ``enum`` inside self.entries array.
+        """
+        #
+        # Strip preprocessor directives.  Note that this depends on the
+        # trailing semicolon we added in process_proto_type().
+        #
+        source = source
+        proto = trim_private_members(proto)
+        proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
+        #
+        # Parse out the name and members of the enum.  Typedef form first.
+        #
+        r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
+        if r.search(proto):
+            declaration_name = r.group(2)
+            members = r.group(1)
+        #
+        # Failing that, look for a straight enum
+        #
+        else:
+            r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
+            if r.match(proto):
+                declaration_name = r.group(1)
+                members = r.group(2)
+        #
+        # OK, this isn't going to work.
+        #
+            else:
+                self.emit_msg(ln, f"{proto}: error: Cannot parse enum!")
+                return
+        #
+        # Make sure we found what we were expecting.
+        #
+        if self.entry.identifier != declaration_name:
+            if self.entry.identifier == "":
+                self.emit_msg(ln,
+                              f"{proto}: wrong kernel-doc identifier on prototype")
+            else:
+                self.emit_msg(ln,
+                              f"expecting prototype for enum {self.entry.identifier}. "
+                              f"Prototype was for enum {declaration_name} instead")
+            return
+
+        if not declaration_name:
+            declaration_name = "(anonymous)"
+        #
+        # Parse out the name of each enum member, and verify that we
+        # have a description for it.
+        #
+        member_set = set()
+        members = KernRe(r'\([^;)]*\)').sub('', members)
+        for arg in members.split(','):
+            arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
+            if not arg.strip():
+                continue
+
+            self.entry.parameterlist.append(arg)
+            if arg not in self.entry.parameterdescs:
+                self.entry.parameterdescs[arg] = self.undescribed
+                self.emit_msg(ln,
+                              f"Enum value '{arg}' not described in enum '{declaration_name}'")
+            member_set.add(arg)
+        #
+        # Ensure that every described member actually exists in the enum.
+        #
+        for k in self.entry.parameterdescs:
+            if k not in member_set:
+                self.emit_msg(ln,
+                              f"Excess enum value '@{k}' description in '{declaration_name}'")
+
+        self.output_declaration('enum', declaration_name,
+                                source=source,
+                                purpose=self.entry.declaration_purpose)
+
+    def dump_var(self, ln, proto, source):
+        """
+        Store variables that are part of kAPI.
+        """
+        VAR_ATTRIBS = [
+            "extern",
+            "const",
+        ]
+        OPTIONAL_VAR_ATTR = r"^(?:\b(?:" +"|".join(VAR_ATTRIBS) +r")\b\s*)*"
+
+        #
+        # Store the full prototype before modifying it
+        #
+        source = source
+        full_proto = proto
+        declaration_name = None
+
+        #
+        # Handle macro definitions
+        #
+        macro_prefixes = [
+            KernRe(r"DEFINE_[\w_]+\s*\(([\w_]+)\)"),
+        ]
+
+        for r in macro_prefixes:
+            match = r.search(proto)
+            if match:
+                declaration_name = match.group(1)
+                break
+
+        #
+        # Drop comments and macros to have a pure C prototype
+        #
+        if not declaration_name:
+            proto = self.xforms.apply("var", proto)
+
+        proto = proto.rstrip()
+
+        #
+        # Variable name is at the end of the declaration
+        #
+
+        default_val = None
+
+        r= KernRe(OPTIONAL_VAR_ATTR + r"\s*[\w_\s]*\s+(?:\*+)?([\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+        if r.match(proto):
+            if not declaration_name:
+                declaration_name = r.group(1)
+
+            default_val = r.group(2)
+        else:
+            r= KernRe(OPTIONAL_VAR_ATTR + r"(?:[\w_\s]*)?\s+(?:\*+)?(?:[\w_]+)\s*[\d\]\[]*\s*(=.*)?")
+
+            if r.match(proto):
+                default_val = r.group(1)
+        if not declaration_name:
+           self.emit_msg(ln,f"{proto}: can't parse variable")
+           return
+
+        if default_val:
+            default_val = default_val.lstrip("=").strip()
+
+        self.output_declaration("var", declaration_name,
+                                source=source,
+                                full_proto=full_proto,
+                                default_val=default_val,
+                                purpose=self.entry.declaration_purpose)
+
+    def dump_declaration(self, ln, prototype, source):
+        """
+        Store a data declaration inside self.entries array.
+        """
+
+        if self.entry.decl_type == "enum":
+            self.dump_enum(ln, prototype, source)
+        elif self.entry.decl_type == "typedef":
+            self.dump_typedef(ln, prototype, source)
+        elif self.entry.decl_type in ["union", "struct"]:
+            self.dump_struct(ln, prototype, source)
+        elif self.entry.decl_type == "var":
+            self.dump_var(ln, prototype, source)
+        else:
+            # This would be a bug
+            self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
+
+    def dump_function(self, ln, prototype, source):
+        """
+        Store a function or function macro inside self.entries array.
+        """
+
+        source = source
+        found = func_macro = False
+        return_type = ''
+        decl_type = 'function'
+
+        #
+        # If we have a macro, remove the "#define" at the front.
+        #
+        new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)
+        if new_proto != prototype:
+            prototype = new_proto
+            #
+            # Dispense with the simple "#define A B" case here; the key
+            # is the space after the name of the symbol being defined.
+            # NOTE that the seemingly misnamed "func_macro" indicates a
+            # macro *without* arguments.
+            #
+            r = KernRe(r'^(\w+)\s+')
+            if r.search(prototype):
+                return_type = ''
+                declaration_name = r.group(1)
+                func_macro = True
+                found = True
+        else:
+            #
+            # Apply the initial transformations.
+            #
+            prototype = self.xforms.apply("func", prototype)
+
+        # Yes, this truly is vile.  We are looking for:
+        # 1. Return type (may be nothing if we're looking at a macro)
+        # 2. Function name
+        # 3. Function parameters.
+        #
+        # All the while we have to watch out for function pointer parameters
+        # (which IIRC is what the two sections are for), C types (these
+        # regexps don't even start to express all the possibilities), and
+        # so on.
+        #
+        # If you mess with these regexps, it's a good idea to check that
+        # the following functions' documentation still comes out right:
+        # - parport_register_device (function pointer parameters)
+        # - atomic_set (macro)
+        # - pci_match_device, __copy_to_user (long return type)
+
+        name = r'\w+'
+        type1 = r'(?:[\w\s]+)?'
+        type2 = r'(?:[\w\s]+\*+)+'
+        #
+        # Attempt to match first on (args) with no internal parentheses; this
+        # lets us easily filter out __acquires() and other post-args stuff.  If
+        # that fails, just grab the rest of the line to the last closing
+        # parenthesis.
+        #
+        proto_args = r'\(([^\(]*|.*)\)'
+        #
+        # (Except for the simple macro case) attempt to split up the prototype
+        # in the various ways we understand.
+        #
+        if not found:
+            patterns = [
+                rf'^()({name})\s*{proto_args}',
+                rf'^({type1})\s+({name})\s*{proto_args}',
+                rf'^({type2})\s*({name})\s*{proto_args}',
+            ]
+
+            for p in patterns:
+                r = KernRe(p)
+                if r.match(prototype):
+                    return_type = r.group(1)
+                    declaration_name = r.group(2)
+                    args = r.group(3)
+                    self.create_parameter_list(ln, decl_type, args, ',',
+                                               declaration_name)
+                    found = True
+                    break
+        #
+        # Parsing done; make sure that things are as we expect.
+        #
+        if not found:
+            self.emit_msg(ln,
+                          f"cannot understand function prototype: '{prototype}'")
+            return
+        if self.entry.identifier != declaration_name:
+            self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). "
+                          f"Prototype was for {declaration_name}() instead")
+            return
+        self.check_sections(ln, declaration_name, "function")
+        self.check_return_section(ln, declaration_name, return_type)
+        #
+        # Store the result.
+        #
+        self.output_declaration(decl_type, declaration_name,
+                                source=source,
+                                typedef=('typedef' in return_type),
+                                functiontype=return_type,
+                                purpose=self.entry.declaration_purpose,
+                                func_macro=func_macro)
+
+
+    def dump_typedef(self, ln, proto, source):
+        """
+        Store a ``typedef`` inside self.entries array.
+        """
+        #
+        # We start by looking for function typedefs.
+        #
+        typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'
+        typedef_ident = r'\*?\s*(\w\S+)\s*'
+        typedef_args = r'\s*\((.*)\);'
+
+        source = source
+
+        typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
+        typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
+
+        # Parse function typedef prototypes
+        for r in [typedef1, typedef2]:
+            if not r.match(proto):
+                continue
+
+            return_type = r.group(1).strip()
+            declaration_name = r.group(2)
+            args = r.group(3)
+
+            if self.entry.identifier != declaration_name:
+                self.emit_msg(ln,
+                              f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
+                return
+
+            self.create_parameter_list(ln, 'function', args, ',', declaration_name)
+
+            self.output_declaration('function', declaration_name,
+                                    source=source,
+                                    typedef=True,
+                                    functiontype=return_type,
+                                    purpose=self.entry.declaration_purpose)
+            return
+        #
+        # Not a function, try to parse a simple typedef.
+        #
+        r = KernRe(r'typedef.*\s+(\w+)\s*;')
+        if r.match(proto):
+            declaration_name = r.group(1)
+
+            if self.entry.identifier != declaration_name:
+                self.emit_msg(ln,
+                              f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
+                return
+
+            self.output_declaration('typedef', declaration_name,
+                                    source=source,
+                                    purpose=self.entry.declaration_purpose)
+            return
+
+        self.emit_msg(ln, "error: Cannot parse typedef!")
+
+    @staticmethod
+    def process_export(function_set, line):
+        """
+        process ``EXPORT_SYMBOL*`` tags
+
+        This method doesn't use any variable from the class, so declare it
+        with a staticmethod decorator.
+        """
+
+        # We support documenting some exported symbols with different
+        # names.  A horrible hack.
+        suffixes = [ '_noprof' ]
+
+        # Note: it accepts only one EXPORT_SYMBOL* per line, as having
+        # multiple export lines would violate Kernel coding style.
+
+        if export_symbol.search(line):
+            symbol = export_symbol.group(2)
+        elif export_symbol_ns.search(line):
+            symbol = export_symbol_ns.group(2)
+        else:
+            return False
+        #
+        # Found an export, trim out any special suffixes
+        #
+        for suffix in suffixes:
+            # Be backward compatible with Python < 3.9
+            if symbol.endswith(suffix):
+                symbol = symbol[:-len(suffix)]
+        function_set.add(symbol)
+        return True
+
+    def process_normal(self, ln, line, source):
+        """
+        STATE_NORMAL: looking for the ``/**`` to begin everything.
+        """
+
+        if not doc_start.match(line):
+            return
+
+        # start a new entry
+        self.reset_state(ln)
+
+        # next line is always the function name
+        self.state = state.NAME
+
+    def process_name(self, ln, line, source):
+        """
+        STATE_NAME: Looking for the "name - description" line
+        """
+        #
+        # Check for a DOC: block and handle them specially.
+        #
+        if doc_block.search(line):
+
+            if not doc_block.group(1):
+                self.entry.begin_section(ln, "Introduction")
+            else:
+                self.entry.begin_section(ln, doc_block.group(1))
+
+            self.entry.identifier = self.entry.section
+            self.state = state.DOCBLOCK
+        #
+        # Otherwise we're looking for a normal kerneldoc declaration line.
+        #
+        elif doc_decl.search(line):
+            self.entry.identifier = doc_decl.group(1)
+
+            # Test for data declaration
+            if doc_begin_data.search(line):
+                self.entry.decl_type = doc_begin_data.group(1)
+                self.entry.identifier = doc_begin_data.group(2)
+            #
+            # Look for a function description
+            #
+            elif doc_begin_func.search(line):
+                self.entry.identifier = doc_begin_func.group(1)
+                self.entry.decl_type = "function"
+            #
+            # We struck out.
+            #
+            else:
+                self.emit_msg(ln,
+                              f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}")
+                self.state = state.NORMAL
+                return
+            #
+            # OK, set up for a new kerneldoc entry.
+            #
+            self.state = state.BODY
+            self.entry.identifier = self.entry.identifier.strip(" ")
+            # if there's no @param blocks need to set up default section here
+            self.entry.begin_section(ln + 1)
+            #
+            # Find the description portion, which *should* be there but
+            # isn't always.
+            # (We should be able to capture this from the previous parsing - someday)
+            #
+            r = KernRe("[-:](.*)")
+            if r.search(line):
+                self.entry.declaration_purpose = trim_whitespace(r.group(1))
+                self.state = state.DECLARATION
+            else:
+                self.entry.declaration_purpose = ""
+
+            if not self.entry.declaration_purpose and self.config.wshort_desc:
+                self.emit_msg(ln,
+                              f"missing initial short description on line:\n{line}")
+
+            if not self.entry.identifier and self.entry.decl_type != "enum":
+                self.emit_msg(ln,
+                              f"wrong kernel-doc identifier on line:\n{line}")
+                self.state = state.NORMAL
+
+            if self.config.verbose:
+                self.emit_msg(ln,
+                              f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",
+                                  warning=False)
+        #
+        # Failed to find an identifier. Emit a warning
+        #
+        else:
+            self.emit_msg(ln, f"Cannot find identifier on line:\n{line}")
+
+    def is_new_section(self, ln, line):
+        """
+        Helper function to determine if a new section is being started.
+        """
+        if doc_sect.search(line):
+            self.state = state.BODY
+            #
+            # Pick out the name of our new section, tweaking it if need be.
+            #
+            newsection = doc_sect.group(1)
+            if newsection.lower() == 'description':
+                newsection = 'Description'
+            elif newsection.lower() == 'context':
+                newsection = 'Context'
+                self.state = state.SPECIAL_SECTION
+            elif newsection.lower() in ["@return", "@returns",
+                                        "return", "returns"]:
+                newsection = "Return"
+                self.state = state.SPECIAL_SECTION
+            elif newsection[0] == '@':
+                self.state = state.SPECIAL_SECTION
+            #
+            # Initialize the contents, and get the new section going.
+            #
+            newcontents = doc_sect.group(2)
+            if not newcontents:
+                newcontents = ""
+            self.dump_section()
+            self.entry.begin_section(ln, newsection)
+            self.entry.leading_space = None
+
+            self.entry.add_text(newcontents.lstrip())
+            return True
+        return False
+
+    def is_comment_end(self, ln, line):
+        """
+        Helper function to detect (and effect) the end of a kerneldoc comment.
+        """
+        if doc_end.search(line):
+            self.dump_section()
+
+            # Look for doc_com + <text> + doc_end:
+            r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/')
+            if r.match(line):
+                self.emit_msg(ln, f"suspicious ending line: {line}")
+
+            self.entry.prototype = ""
+            self.entry.new_start_line = ln + 1
+
+            self.state = state.PROTO
+            return True
+        return False
+
+
+    def process_decl(self, ln, line, source):
+        """
+        STATE_DECLARATION: We've seen the beginning of a declaration.
+        """
+        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
+            return
+        #
+        # Look for anything with the " * " line beginning.
+        #
+        if doc_content.search(line):
+            cont = doc_content.group(1)
+            #
+            # A blank line means that we have moved out of the declaration
+            # part of the comment (without any "special section" parameter
+            # descriptions).
+            #
+            if cont == "":
+                self.state = state.BODY
+            #
+            # Otherwise we have more of the declaration section to soak up.
+            #
+            else:
+                self.entry.declaration_purpose = \
+                    trim_whitespace(self.entry.declaration_purpose + ' ' + cont)
+        else:
+            # Unknown line, ignore
+            self.emit_msg(ln, f"bad line: {line}")
+
+
+    def process_special(self, ln, line, source):
+        """
+        STATE_SPECIAL_SECTION: a section ending with a blank line.
+        """
+        #
+        # If we have hit a blank line (only the " * " marker), then this
+        # section is done.
+        #
+        if KernRe(r"\s*\*\s*$").match(line):
+            self.entry.begin_section(ln, dump = True)
+            self.state = state.BODY
+            return
+        #
+        # Not a blank line, look for the other ways to end the section.
+        #
+        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
+            return
+        #
+        # OK, we should have a continuation of the text for this section.
+        #
+        if doc_content.search(line):
+            cont = doc_content.group(1)
+            #
+            # If the lines of text after the first in a special section have
+            # leading white space, we need to trim it out or Sphinx will get
+            # confused.  For the second line (the None case), see what we
+            # find there and remember it.
+            #
+            if self.entry.leading_space is None:
+                r = KernRe(r'^(\s+)')
+                if r.match(cont):
+                    self.entry.leading_space = len(r.group(1))
+                else:
+                    self.entry.leading_space = 0
+            #
+            # Otherwise, before trimming any leading chars, be *sure*
+            # that they are white space.  We should maybe warn if this
+            # isn't the case.
+            #
+            for i in range(0, self.entry.leading_space):
+                if cont[i] != " ":
+                    self.entry.leading_space = i
+                    break
+            #
+            # Add the trimmed result to the section and we're done.
+            #
+            self.entry.add_text(cont[self.entry.leading_space:])
+        else:
+            # Unknown line, ignore
+            self.emit_msg(ln, f"bad line: {line}")
+
+    def process_body(self, ln, line, source):
+        """
+        STATE_BODY: the bulk of a kerneldoc comment.
+        """
+        if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
+            return
+
+        if doc_content.search(line):
+            cont = doc_content.group(1)
+            self.entry.add_text(cont)
+        else:
+            # Unknown line, ignore
+            self.emit_msg(ln, f"bad line: {line}")
+
+    def process_inline_name(self, ln, line, source):
+        """STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
+
+        if doc_inline_sect.search(line):
+            self.entry.begin_section(ln, doc_inline_sect.group(1))
+            self.entry.add_text(doc_inline_sect.group(2).lstrip())
+            self.state = state.INLINE_TEXT
+        elif doc_inline_end.search(line):
+            self.dump_section()
+            self.state = state.PROTO
+        elif doc_content.search(line):
+            self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
+            self.state = state.PROTO
+
+            #
+            # Don't let it add partial comments at the code, as breaks the
+            # logic meant to remove comments from prototypes.
+            #
+            self.process_proto_type(ln, "/**\n" + line, source)
+        # else ... ??
+
+    def process_inline_text(self, ln, line, source):
+        """STATE_INLINE_TEXT: docbook comments within a prototype."""
+
+        if doc_inline_end.search(line):
+            self.dump_section()
+            self.state = state.PROTO
+        elif doc_content.search(line):
+            self.entry.add_text(doc_content.group(1))
+        # else ... ??
+
+    def syscall_munge(self, ln, proto):         # pylint: disable=W0613
+        """
+        Handle syscall definitions.
+        """
+
+        is_void = False
+
+        # Strip newlines/CR's
+        proto = re.sub(r'[\r\n]+', ' ', proto)
+
+        # Check if it's a SYSCALL_DEFINE0
+        if 'SYSCALL_DEFINE0' in proto:
+            is_void = True
+
+        # Replace SYSCALL_DEFINE with correct return type & function name
+        proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto)
+
+        r = KernRe(r'long\s+(sys_.*?),')
+        if r.search(proto):
+            proto = KernRe(',').sub('(', proto, count=1)
+        elif is_void:
+            proto = KernRe(r'\)').sub('(void)', proto, count=1)
+
+        # Now delete all of the odd-numbered commas in the proto
+        # so that argument types & names don't have a comma between them
+        count = 0
+        length = len(proto)
+
+        if is_void:
+            length = 0  # skip the loop if is_void
+
+        for ix in range(length):
+            if proto[ix] == ',':
+                count += 1
+                if count % 2 == 1:
+                    proto = proto[:ix] + ' ' + proto[ix + 1:]
+
+        return proto
+
+    def tracepoint_munge(self, ln, proto):
+        """
+        Handle tracepoint definitions.
+        """
+
+        tracepointname = None
+        tracepointargs = None
+
+        # Match tracepoint name based on different patterns
+        r = KernRe(r'TRACE_EVENT\((.*?),')
+        if r.search(proto):
+            tracepointname = r.group(1)
+
+        r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),')
+        if r.search(proto):
+            tracepointname = r.group(1)
+
+        r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),')
+        if r.search(proto):
+            tracepointname = r.group(2)
+
+        if tracepointname:
+            tracepointname = tracepointname.lstrip()
+
+        r = KernRe(r'TP_PROTO\((.*?)\)')
+        if r.search(proto):
+            tracepointargs = r.group(1)
+
+        if not tracepointname or not tracepointargs:
+            self.emit_msg(ln,
+                          f"Unrecognized tracepoint format:\n{proto}\n")
+        else:
+            proto = f"static inline void trace_{tracepointname}({tracepointargs})"
+            self.entry.identifier = f"trace_{self.entry.identifier}"
+
+        return proto
+
+    def process_proto_function(self, ln, line, source):
+        """Ancillary routine to process a function prototype."""
+
+        # strip C99-style comments to end of line
+        line = KernRe(r"//.*$", re.S).sub('', line)
+        #
+        # Soak up the line's worth of prototype text, stopping at { or ; if present.
+        #
+        if KernRe(r'\s*#\s*define').match(line):
+            self.entry.prototype = line
+        elif not line.startswith('#'):   # skip other preprocessor stuff
+            r = KernRe(r'([^\{]*)')
+            if r.match(line):
+                self.entry.prototype += r.group(1) + " "
+        #
+        # If we now have the whole prototype, clean it up and declare victory.
+        #
+        if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line):
+            # strip comments and surrounding spaces
+            self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip()
+            #
+            # Handle self.entry.prototypes for function pointers like:
+            #       int (*pcs_config)(struct foo)
+            # by turning it into
+            #	    int pcs_config(struct foo)
+            #
+            r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')
+            self.entry.prototype = r.sub(r'\1\2', self.entry.prototype)
+            #
+            # Handle special declaration syntaxes
+            #
+            if 'SYSCALL_DEFINE' in self.entry.prototype:
+                self.entry.prototype = self.syscall_munge(ln,
+                                                          self.entry.prototype)
+            else:
+                r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT')
+                if r.search(self.entry.prototype):
+                    self.entry.prototype = self.tracepoint_munge(ln,
+                                                                 self.entry.prototype)
+            #
+            # ... and we're done
+            #
+            self.dump_function(ln, self.entry.prototype, source)
+            self.reset_state(ln)
+
+    def process_proto_type(self, ln, line, source):
+        """
+        Ancillary routine to process a type.
+        """
+
+        # Strip C99-style comments and surrounding whitespace
+        line = KernRe(r"//.*$", re.S).sub('', line).strip()
+        if not line:
+            return # nothing to see here
+
+        # To distinguish preprocessor directive from regular declaration later.
+        if line.startswith('#'):
+            line += ";"
+        #
+        # Split the declaration on any of { } or ;, and accumulate pieces
+        # until we hit a semicolon while not inside {brackets}
+        #
+        r = KernRe(r'(.*?)([{};])')
+        for chunk in r.split(line):
+            if chunk:  # Ignore empty matches
+                self.entry.prototype += chunk
+                #
+                # This cries out for a match statement ... someday after we can
+                # drop Python 3.9 ...
+                #
+                if chunk == '{':
+                    self.entry.brcount += 1
+                elif chunk == '}':
+                    self.entry.brcount -= 1
+                elif chunk == ';' and self.entry.brcount <= 0:
+                    self.dump_declaration(ln, self.entry.prototype, source)
+                    self.reset_state(ln)
+                    return
+        #
+        # We hit the end of the line while still in the declaration; put
+        # in a space to represent the newline.
+        #
+        self.entry.prototype += ' '
+
+    def process_proto(self, ln, line, source):
+        """STATE_PROTO: reading a function/whatever prototype."""
+
+        if doc_inline_oneline.search(line):
+            self.entry.begin_section(ln, doc_inline_oneline.group(1))
+            self.entry.add_text(doc_inline_oneline.group(2))
+            self.dump_section()
+
+        elif doc_inline_start.search(line):
+            self.state = state.INLINE_NAME
+
+        elif self.entry.decl_type == 'function':
+            self.process_proto_function(ln, line, source)
+
+        else:
+            self.process_proto_type(ln, line, source)
+
+    def process_docblock(self, ln, line, source):
+        """STATE_DOCBLOCK: within a ``DOC:`` block."""
+
+        if doc_end.search(line):
+            self.dump_section()
+            self.output_declaration("doc", self.entry.identifier,
+                                    source=source)
+            self.reset_state(ln)
+
+        elif doc_content.search(line):
+            self.entry.add_text(doc_content.group(1))
+
+    def parse_export(self):
+        """
+        Parses ``EXPORT_SYMBOL*`` macros from a single Kernel source file.
+        """
+
+        export_table = set()
+
+        try:
+            with open(self.fname, "r", encoding="utf8",
+                      errors="backslashreplace") as fp:
+
+                for line in fp:
+                    self.process_export(export_table, line)
+
+        except IOError:
+            return None
+
+        return export_table
+
+    #: The state/action table telling us which function to invoke in each state.
+    state_actions = {
+        state.NORMAL:			process_normal,
+        state.NAME:			process_name,
+        state.BODY:			process_body,
+        state.DECLARATION:		process_decl,
+        state.SPECIAL_SECTION:		process_special,
+        state.INLINE_NAME:		process_inline_name,
+        state.INLINE_TEXT:		process_inline_text,
+        state.PROTO:			process_proto,
+        state.DOCBLOCK:			process_docblock,
+        }
+
+    def parse_kdoc(self):
+        """
+        Open and process each line of a C source file.
+        The parsing is controlled via a state machine, and the line is passed
+        to a different process function depending on the state. The process
+        function may update the state as needed.
+
+        Besides parsing kernel-doc tags, it also parses export symbols.
+        """
+
+        prev = ""
+        prev_ln = None
+        export_table = set()
+        self.state = state.NORMAL
+        source = ""
+
+        try:
+            with open(self.fname, "r", encoding="utf8",
+                      errors="backslashreplace") as fp:
+                for ln, line in enumerate(fp):
+
+                    line = line.expandtabs().strip("\n")
+
+                    # Group continuation lines on prototypes
+                    if self.state == state.PROTO:
+                        if line.endswith("\\"):
+                            prev += line.rstrip("\\")
+                            if not prev_ln:
+                                prev_ln = ln
+                            continue
+
+                        if prev:
+                            ln = prev_ln
+                            line = prev + line
+                            prev = ""
+                            prev_ln = None
+
+                    self.config.log.debug("%d %s: %s",
+                                          ln, state.name[self.state],
+                                          line)
+
+                    if self.store_src:
+                        if source and self.state == state.NORMAL:
+                            source = ""
+                        elif self.state != state.NORMAL:
+                            source += line + "\n"
+
+                    # This is an optimization over the original script.
+                    # There, when export_file was used for the same file,
+                    # it was read twice. Here, we use the already-existing
+                    # loop to parse exported symbols as well.
+                    #
+                    if (self.state != state.NORMAL) or \
+                       not self.process_export(export_table, line):
+                        prev_state = self.state
+                        # Hand this line to the appropriate state handler
+                        self.state_actions[self.state](self, ln, line, source)
+                        if prev_state == state.NORMAL and self.state != state.NORMAL:
+                            source += line + "\n"
+
+            self.emit_unused_warnings()
+
+        except OSError:
+            self.config.log.error(f"Error: Cannot open file {self.fname}")
+
+        return export_table, self.entries
diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
new file mode 100644
index 000000000000..28292efe25a2
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+"""
+Regular expression ancillary classes.
+
+Those help caching regular expressions and do matching for kernel-doc.
+"""
+
+import re
+
+# Local cache for regular expressions
+re_cache = {}
+
+
+class KernRe:
+    """
+    Helper class to simplify regex declaration and usage.
+
+    It calls re.compile for a given pattern. It also allows adding
+    regular expressions and define sub at class init time.
+
+    Regular expressions can be cached via an argument, helping to speedup
+    searches.
+    """
+
+    def _add_regex(self, string, flags):
+        """
+        Adds a new regex or reuses it from the cache.
+        """
+        self.regex = re_cache.get(string, None)
+        if not self.regex:
+            self.regex = re.compile(string, flags=flags)
+            if self.cache:
+                re_cache[string] = self.regex
+
+    def __init__(self, string, cache=True, flags=0):
+        """
+        Compile a regular expression and initialize internal vars.
+        """
+
+        self.cache = cache
+        self.last_match = None
+
+        self._add_regex(string, flags)
+
+    def __str__(self):
+        """
+        Return the regular expression pattern.
+        """
+        return self.regex.pattern
+
+    def __repr__(self):
+        """
+        Returns a displayable version of the class init.
+        """
+
+        flag_map = {
+            re.IGNORECASE: "re.I",
+            re.MULTILINE: "re.M",
+            re.DOTALL: "re.S",
+            re.VERBOSE: "re.X",
+        }
+
+        flags = []
+        for flag, name in flag_map.items():
+            if self.regex.flags & flag:
+                flags.append(name)
+
+        flags_name = " | ".join(flags)
+
+        max_len = 60
+        pattern = ""
+        for pos in range(0, len(self.regex.pattern), max_len):
+            pattern += '"' + self.regex.pattern[pos:max_len + pos] + '" '
+
+        if flags_name:
+            return f'KernRe({pattern}, {flags_name})'
+        else:
+            return f'KernRe({pattern})'
+
+    def __add__(self, other):
+        """
+        Allows adding two regular expressions into one.
+        """
+
+        return KernRe(str(self) + str(other), cache=self.cache or other.cache,
+                  flags=self.regex.flags | other.regex.flags)
+
+    def match(self, string):
+        """
+        Handles a re.match storing its results.
+        """
+
+        self.last_match = self.regex.match(string)
+        return self.last_match
+
+    def search(self, string):
+        """
+        Handles a re.search storing its results.
+        """
+
+        self.last_match = self.regex.search(string)
+        return self.last_match
+
+    def finditer(self,  string):
+        """
+        Alias to re.finditer.
+        """
+
+        return self.regex.finditer(string)
+
+    def findall(self, string):
+        """
+        Alias to re.findall.
+        """
+
+        return self.regex.findall(string)
+
+    def split(self, string):
+        """
+        Alias to re.split.
+        """
+
+        return self.regex.split(string)
+
+    def sub(self, sub, string, count=0):
+        """
+        Alias to re.sub.
+        """
+
+        return self.regex.sub(sub, string, count=count)
+
+    def group(self, num):
+        """
+        Returns the group results of the last match.
+        """
+
+        return self.last_match.group(num)
+
+    def groups(self):
+        """
+        Returns the group results of the last match
+        """
+
+        return self.last_match.groups()
diff --git a/tools/lib/python/kdoc/kdoc_yaml_file.py b/tools/lib/python/kdoc/kdoc_yaml_file.py
new file mode 100644
index 000000000000..0be020d50df0
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_yaml_file.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+import os
+
+from kdoc.kdoc_output import ManFormat, RestFormat
+
+
+class KDocTestFile():
+    """
+    Handles the logic needed to store kernel‑doc output inside a YAML file.
+     Useful for unit tests and regression tests.
+    """
+
+    def __init__(self, config, yaml_file, yaml_content):
+        #
+        # Bail out early if yaml is not available
+        #
+        try:
+            import yaml
+        except ImportError:
+            sys.exit("Warning: yaml package not available. Aborting it.")
+
+        self.config = config
+        self.test_file = os.path.expanduser(yaml_file)
+        self.yaml_content = yaml_content
+        self.test_names = set()
+
+        self.tests = []
+
+        out_dir = os.path.dirname(self.test_file)
+        if out_dir and not os.path.isdir(out_dir):
+            sys.exit(f"Directory {out_dir} doesn't exist.")
+
+        self.out_style = []
+
+        if "man" in self.yaml_content:
+            out_style = ManFormat()
+            out_style.set_config(self.config)
+
+            self.out_style.append(out_style)
+
+        if "rst" in self.yaml_content:
+            out_style = RestFormat()
+            out_style.set_config(self.config)
+
+            self.out_style.append(out_style)
+
+    def set_filter(self, export, internal, symbol, nosymbol,
+                   function_table, enable_lineno, no_doc_sections):
+        """
+        Set filters at the output classes.
+        """
+        for out_style in self.out_style:
+            out_style.set_filter(export, internal, symbol,
+                                 nosymbol, function_table,
+                                 enable_lineno, no_doc_sections)
+
+    @staticmethod
+    def get_kdoc_item(arg, start_line=1):
+
+        d = vars(arg)
+
+        declaration_start_line = d.get("declaration_start_line")
+        if not declaration_start_line:
+            return d
+
+        d["declaration_start_line"] = start_line
+
+        parameterdesc_start_lines = d.get("parameterdesc_start_lines")
+        if parameterdesc_start_lines:
+            for key in parameterdesc_start_lines:
+                ln = parameterdesc_start_lines[key]
+                ln += start_line - declaration_start_line
+
+                parameterdesc_start_lines[key] = ln
+
+        sections_start_lines = d.get("sections_start_lines")
+        if sections_start_lines:
+            for key in sections_start_lines:
+                ln = sections_start_lines[key]
+                ln += start_line - declaration_start_line
+
+                sections_start_lines[key] = ln
+
+        return d
+
+    def output_symbols(self, fname, symbols):
+        """
+        Store source, symbols and output strings at self.tests.
+        """
+
+        #
+        # KdocItem needs to be converted into dicts
+        #
+        kdoc_item = []
+        expected = []
+
+        #
+        # Source code didn't produce any symbol
+        #
+        if not symbols:
+            return
+
+        expected_dict = {}
+        start_line=1
+
+        for arg in symbols:
+            source = arg.get("source", "")
+
+            if arg and "KdocItem" in self.yaml_content:
+                msg = self.get_kdoc_item(arg)
+
+                other_stuff = msg.get("other_stuff", {})
+                if "source" in other_stuff:
+                    del other_stuff["source"]
+
+                expected_dict["kdoc_item"] = msg
+
+            base_name = arg.name
+            if not base_name:
+                base_name = fname
+            base_name = base_name.lower().replace(".", "_").replace("/", "_")
+
+
+            # Don't add duplicated names
+            i = 0
+            name = base_name
+            while name in self.test_names:
+                i += 1
+                name = f"{base_name}_{i:03d}"
+
+            self.test_names.add(name)
+
+            for out_style in self.out_style:
+                if isinstance(out_style, ManFormat):
+                    key = "man"
+                else:
+                    key = "rst"
+
+                expected_dict[key]= out_style.output_symbols(fname, [arg]).strip()
+
+            test = {
+                "name": name,
+                "description": f"{fname} line {arg.declaration_start_line}",
+                "fname": fname,
+                "source": source,
+                "expected": [expected_dict]
+            }
+
+            self.tests.append(test)
+
+            expected_dict = {}
+
+    def write(self):
+        """
+        Output the content of self.tests to self.test_file.
+        """
+        import yaml
+
+        # Helper function to better handle multilines
+        def str_presenter(dumper, data):
+            if "\n" in data:
+                return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
+
+            return dumper.represent_scalar("tag:yaml.org,2002:str", data)
+
+        # Register the representer
+        yaml.add_representer(str, str_presenter)
+
+        data = {"tests": self.tests}
+
+        with open(self.test_file, "w", encoding="utf-8") as fp:
+            yaml.dump(data, fp,
+                      sort_keys=False, width=120, indent=2,
+                      default_flow_style=False, allow_unicode=True,
+                      explicit_start=False, explicit_end=False)
diff --git a/tools/lib/python/kdoc/latex_fonts.py b/tools/lib/python/kdoc/latex_fonts.py
new file mode 100755
index 000000000000..1d04cbda169f
--- /dev/null
+++ b/tools/lib/python/kdoc/latex_fonts.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) Akira Yokosawa, 2024
+#
+# Ported to Python by (c) Mauro Carvalho Chehab, 2025
+
+"""
+Detect problematic Noto CJK variable fonts
+==========================================
+
+For ``make pdfdocs``, reports of build errors of translations.pdf started
+arriving early 2024 [1]_ [2]_.  It turned out that Fedora and openSUSE
+tumbleweed have started deploying variable-font [3]_ format of "Noto CJK"
+fonts [4]_ [5]_.  For PDF, a LaTeX package named xeCJK is used for CJK
+(Chinese, Japanese, Korean) pages.  xeCJK requires XeLaTeX/XeTeX, which
+does not (and likely never will) understand variable fonts for historical
+reasons.
+
+The build error happens even when both of variable- and non-variable-format
+fonts are found on the build system.  To make matters worse, Fedora enlists
+variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN,
+-zh_TW, etc.  Hence developers who have interest in CJK pages are more
+likely to encounter the build errors.
+
+This script is invoked from the error path of "make pdfdocs" and emits
+suggestions if variable-font files of "Noto CJK" fonts are in the list of
+fonts accessible from XeTeX.
+
+.. [1] https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/
+.. [2] https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/
+.. [3] https://en.wikipedia.org/wiki/Variable_font
+.. [4] https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts
+.. [5] https://build.opensuse.org/request/show/1157217
+
+Workarounds for building translations.pdf
+-----------------------------------------
+
+* Denylist "variable font" Noto CJK fonts.
+
+  - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with
+    tweaks if necessary.  Remove leading "".
+
+  - Path of fontconfig/fonts.conf can be overridden by setting an env
+    variable FONTS_CONF_DENY_VF.
+
+    * Template::
+
+        <?xml version="1.0"?>
+        <!DOCTYPE fontconfig SYSTEM "urn:fontconfig:fonts.dtd">
+        <fontconfig>
+        <!--
+        Ignore variable-font glob (not to break xetex)
+        -->
+            <selectfont>
+                <rejectfont>
+                    <!--
+                        for Fedora
+                    -->
+                    <glob>/usr/share/fonts/google-noto-*-cjk-vf-fonts</glob>
+                    <!--
+                        for openSUSE tumbleweed
+                    -->
+                    <glob>/usr/share/fonts/truetype/Noto*CJK*-VF.otf</glob>
+                </rejectfont>
+            </selectfont>
+        </fontconfig>
+
+    The denylisting is activated for "make pdfdocs".
+
+* For skipping CJK pages in PDF
+
+  - Uninstall texlive-xecjk.
+    Denylisting is not needed in this case.
+
+* For printing CJK pages in PDF
+
+  - Need non-variable "Noto CJK" fonts.
+
+    * Fedora
+
+      - google-noto-sans-cjk-fonts
+      - google-noto-serif-cjk-fonts
+
+    * openSUSE tumbleweed
+
+      - Non-variable "Noto CJK" fonts are not available as distro packages
+        as of April, 2024.  Fetch a set of font files from upstream Noto
+        CJK Font released at:
+
+          https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc
+
+        and at:
+
+          https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc
+
+        then uncompress and deploy them.
+      - Remember to update fontconfig cache by running fc-cache.
+
+.. caution::
+    Uninstalling "variable font" packages can be dangerous.
+    They might be depended upon by other packages important for your work.
+    Denylisting should be less invasive, as it is effective only while
+    XeLaTeX runs in "make pdfdocs".
+"""
+
+import os
+import re
+import subprocess
+import textwrap
+import sys
+
+class LatexFontChecker:
+    """
+    Detect problems with CJK variable fonts that affect PDF builds for
+    translations.
+    """
+
+    def __init__(self, deny_vf=None):
+        if not deny_vf:
+            deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf")
+
+        self.environ = os.environ.copy()
+        self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf)
+
+        self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK")
+
+    def description(self):
+        """
+        Returns module description.
+        """
+        return __doc__
+
+    def get_noto_cjk_vf_fonts(self):
+        """
+        Get Noto CJK fonts.
+        """
+
+        cjk_fonts = set()
+        cmd = ["fc-list", ":", "file", "family", "variable"]
+        try:
+            result = subprocess.run(cmd,stdout=subprocess.PIPE,
+                                    stderr=subprocess.PIPE,
+                                    universal_newlines=True,
+                                    env=self.environ,
+                                    check=True)
+
+        except subprocess.CalledProcessError as exc:
+            sys.exit(f"Error running fc-list: {repr(exc)}")
+
+        for line in result.stdout.splitlines():
+            if 'variable=True' not in line:
+                continue
+
+            match = self.re_cjk.search(line)
+            if match:
+                cjk_fonts.add(match.group(1))
+
+        return sorted(cjk_fonts)
+
+    def check(self):
+        """
+        Check for problems with CJK fonts.
+        """
+
+        fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), "    ")
+        if not fonts:
+            return None
+
+        rel_file = os.path.relpath(__file__, os.getcwd())
+
+        msg = "=" * 77 + "\n"
+        msg += 'XeTeX is confused by "variable font" files listed below:\n'
+        msg += fonts + "\n"
+        msg += textwrap.dedent(f"""
+                For CJK pages in PDF, they need to be hidden from XeTeX by denylisting.
+                Or, CJK pages can be skipped by uninstalling texlive-xecjk.
+
+                For more info on denylisting, other options, and variable font, run:
+
+                    tools/docs/check-variable-fonts.py -h
+            """)
+        msg += "=" * 77
+
+        return msg
diff --git a/tools/lib/python/kdoc/parse_data_structs.py b/tools/lib/python/kdoc/parse_data_structs.py
new file mode 100755
index 000000000000..9941cd19032e
--- /dev/null
+++ b/tools/lib/python/kdoc/parse_data_structs.py
@@ -0,0 +1,498 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
+# pylint: disable=R0912,R0915
+
+"""
+Parse a source file or header, creating ReStructured Text cross references.
+
+It accepts an optional file to change the default symbol reference or to
+suppress symbols from the output.
+
+It is capable of identifying ``define``, function, ``struct``, ``typedef``,
+``enum`` and ``enum`` symbols and create cross-references for all of them.
+It is also capable of distinguish #define used for specifying a Linux
+ioctl.
+
+The optional rules file contains a set of rules like::
+
+    ignore ioctl VIDIOC_ENUM_FMT
+    replace ioctl VIDIOC_DQBUF vidioc_qbuf
+    replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
+"""
+
+import os
+import re
+import sys
+
+
+class ParseDataStructs:
+    """
+    Creates an enriched version of a Kernel header file with cross-links
+    to each C data structure type.
+
+    It is meant to allow having a more comprehensive documentation, where
+    uAPI headers will create cross-reference links to the code.
+
+    It is capable of identifying ``define``, function, ``struct``, ``typedef``,
+    ``enum`` and ``enum`` symbols and create cross-references for all of them.
+    It is also capable of distinguish #define used for specifying a Linux
+    ioctl.
+
+    By default, it create rules for all symbols and defines, but it also
+    allows parsing an exception file. Such file contains a set of rules
+    using the syntax below:
+
+    1. Ignore rules::
+
+        ignore <type> <symbol>`
+
+    Removes the symbol from reference generation.
+
+    2. Replace rules::
+
+        replace <type> <old_symbol> <new_reference>
+
+       Replaces how old_symbol with a new reference. The new_reference can be:
+
+        - A simple symbol name;
+        - A full Sphinx reference.
+
+    3. Namespace rules::
+
+        namespace <namespace>
+
+       Sets C namespace to be used during cross-reference generation. Can
+       be overridden by replace rules.
+
+    On ignore and replace rules, ``<type>`` can be:
+        - ``ioctl``: for defines that end with ``_IO*``, e.g. ioctl definitions
+        - ``define``: for other defines
+        - ``symbol``: for symbols defined within enums;
+        - ``typedef``: for typedefs;
+        - ``enum``: for the name of a non-anonymous enum;
+        - ``struct``: for structs.
+
+    Examples::
+
+        ignore define __LINUX_MEDIA_H
+        ignore ioctl VIDIOC_ENUM_FMT
+        replace ioctl VIDIOC_DQBUF vidioc_qbuf
+        replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
+
+        namespace MC
+    """
+
+    #: Parser regex with multiple ways to capture enums.
+    RE_ENUMS = [
+        re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),
+        re.compile(r"^\s*enum\s+([\w_]+)\s*$"),
+        re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),
+        re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),
+    ]
+
+    #: Parser regex with multiple ways to capture structs.
+    RE_STRUCTS = [
+        re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),
+        re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),
+        re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),
+        re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),
+    ]
+
+    # NOTE: the original code was written a long time before Sphinx C
+    # domain to have multiple namespaces. To avoid to much turn at the
+    # existing hyperlinks, the code kept using "c:type" instead of the
+    # right types. To change that, we need to change the types not only
+    # here, but also at the uAPI media documentation.
+
+    #: Dictionary containing C type identifiers to be transformed.
+    DEF_SYMBOL_TYPES = {
+        "ioctl": {
+            "prefix": "\\ ",
+            "suffix": "\\ ",
+            "ref_type": ":ref",
+            "description": "IOCTL Commands",
+        },
+        "define": {
+            "prefix": "\\ ",
+            "suffix": "\\ ",
+            "ref_type": ":ref",
+            "description": "Macros and Definitions",
+        },
+        # We're calling each definition inside an enum as "symbol"
+        "symbol": {
+            "prefix": "\\ ",
+            "suffix": "\\ ",
+            "ref_type": ":ref",
+            "description": "Enumeration values",
+        },
+        "typedef": {
+            "prefix": "\\ ",
+            "suffix": "\\ ",
+            "ref_type": ":c:type",
+            "description": "Type Definitions",
+        },
+        # This is the description of the enum itself
+        "enum": {
+            "prefix": "\\ ",
+            "suffix": "\\ ",
+            "ref_type": ":c:type",
+            "description": "Enumerations",
+        },
+        "struct": {
+            "prefix": "\\ ",
+            "suffix": "\\ ",
+            "ref_type": ":c:type",
+            "description": "Structures",
+        },
+    }
+
+    def __init__(self, debug: bool = False):
+        """Initialize internal vars"""
+        self.debug = debug
+        self.data = ""
+
+        self.symbols = {}
+
+        self.namespace = None
+        self.ignore = []
+        self.replace = []
+
+        for symbol_type in self.DEF_SYMBOL_TYPES:
+            self.symbols[symbol_type] = {}
+
+    def read_exceptions(self, fname: str):
+        """
+        Read an optional exceptions file, used to override defaults.
+        """
+
+        if not fname:
+            return
+
+        name = os.path.basename(fname)
+
+        with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
+            for ln, line in enumerate(f):
+                ln += 1
+                line = line.strip()
+                if not line or line.startswith("#"):
+                    continue
+
+                # ignore rules
+                match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
+
+                if match:
+                    self.ignore.append((ln, match.group(1), match.group(2)))
+                    continue
+
+                # replace rules
+                match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
+                if match:
+                    self.replace.append((ln, match.group(1), match.group(2),
+                                         match.group(3)))
+                    continue
+
+                match = re.match(r"^namespace\s+(\S+)", line)
+                if match:
+                    self.namespace = match.group(1)
+                    continue
+
+                sys.exit(f"{name}:{ln}: invalid line: {line}")
+
+    def apply_exceptions(self):
+        """
+        Process exceptions file with rules to ignore or replace references.
+        """
+
+        # Handle ignore rules
+        for ln, c_type, symbol in self.ignore:
+            if c_type not in self.DEF_SYMBOL_TYPES:
+                sys.exit(f"{name}:{ln}: {c_type} is invalid")
+
+            d = self.symbols[c_type]
+            if symbol in d:
+                del d[symbol]
+
+        # Handle replace rules
+        for ln, c_type, old, new in self.replace:
+            if c_type not in self.DEF_SYMBOL_TYPES:
+                sys.exit(f"{name}:{ln}: {c_type} is invalid")
+
+            reftype = None
+
+            # Parse reference type when the type is specified
+
+            match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new)
+            if match:
+                reftype = f":c:{match.group(1)}"
+                new = match.group(2)
+            else:
+                match = re.search(r"(\:ref)\:\`(.+)\`", new)
+                if match:
+                    reftype = match.group(1)
+                    new = match.group(2)
+
+            # If the replacement rule doesn't have a type, get default
+            if not reftype:
+                reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
+                if not reftype:
+                    reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
+
+            new_ref = f"{reftype}:`{old} <{new}>`"
+
+            # Change self.symbols to use the replacement rule
+            if old in self.symbols[c_type]:
+                (_, ln) = self.symbols[c_type][old]
+                self.symbols[c_type][old] = (new_ref, ln)
+            else:
+                print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
+
+    def store_type(self, ln, symbol_type: str, symbol: str,
+                   ref_name: str = None, replace_underscores: bool = True):
+        """
+        Store a new symbol at self.symbols under symbol_type.
+
+        By default, underscores are replaced by ``-``.
+        """
+        defs = self.DEF_SYMBOL_TYPES[symbol_type]
+
+        prefix = defs.get("prefix", "")
+        suffix = defs.get("suffix", "")
+        ref_type = defs.get("ref_type")
+
+        # Determine ref_link based on symbol type
+        if ref_type or self.namespace:
+            if not ref_name:
+                ref_name = symbol.lower()
+
+            # c-type references don't support hash
+            if ref_type == ":ref" and replace_underscores:
+                ref_name = ref_name.replace("_", "-")
+
+            # C domain references may have namespaces
+            if ref_type.startswith(":c:"):
+                if self.namespace:
+                    ref_name = f"{self.namespace}.{ref_name}"
+
+            if ref_type:
+                ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"
+            else:
+                ref_link = f"`{symbol} <{ref_name}>`"
+        else:
+            ref_link = symbol
+
+        self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln)
+
+    def store_line(self, line):
+        """
+        Store a line at self.data, properly indented.
+        """
+        line = "    " + line.expandtabs()
+        self.data += line.rstrip(" ")
+
+    def parse_file(self, file_in: str, exceptions: str = None):
+        """
+        Read a C source file and get identifiers.
+        """
+        self.data = ""
+        is_enum = False
+        is_comment = False
+        multiline = ""
+
+        self.read_exceptions(exceptions)
+
+        with open(file_in, "r",
+                  encoding="utf-8", errors="backslashreplace") as f:
+            for line_no, line in enumerate(f):
+                self.store_line(line)
+                line = line.strip("\n")
+
+                # Handle continuation lines
+                if line.endswith(r"\\"):
+                    multiline += line[-1]
+                    continue
+
+                if multiline:
+                    line = multiline + line
+                    multiline = ""
+
+                # Handle comments. They can be multilined
+                if not is_comment:
+                    if re.search(r"/\*.*", line):
+                        is_comment = True
+                    else:
+                        # Strip C99-style comments
+                        line = re.sub(r"(//.*)", "", line)
+
+                if is_comment:
+                    if re.search(r".*\*/", line):
+                        is_comment = False
+                    else:
+                        multiline = line
+                        continue
+
+                # At this point, line variable may be a multilined statement,
+                # if lines end with \ or if they have multi-line comments
+                # With that, it can safely remove the entire comments,
+                # and there's no need to use re.DOTALL for the logic below
+
+                line = re.sub(r"(/\*.*\*/)", "", line)
+                if not line.strip():
+                    continue
+
+                # It can be useful for debug purposes to print the file after
+                # having comments stripped and multi-lines grouped.
+                if self.debug > 1:
+                    print(f"line {line_no + 1}: {line}")
+
+                # Now the fun begins: parse each type and store it.
+
+                # We opted for a two parsing logic here due to:
+                # 1. it makes easier to debug issues not-parsed symbols;
+                # 2. we want symbol replacement at the entire content, not
+                #    just when the symbol is detected.
+
+                if is_enum:
+                    match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)
+                    if match:
+                        self.store_type(line_no, "symbol", match.group(1))
+                    if "}" in line:
+                        is_enum = False
+                    continue
+
+                match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)
+                if match:
+                    self.store_type(line_no, "ioctl", match.group(1),
+                                    replace_underscores=False)
+                    continue
+
+                match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)
+                if match:
+                    self.store_type(line_no, "define", match.group(1))
+                    continue
+
+                match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",
+                                 line)
+                if match:
+                    name = match.group(2).strip()
+                    symbol = match.group(3)
+                    self.store_type(line_no, "typedef", symbol, ref_name=name)
+                    continue
+
+                for re_enum in self.RE_ENUMS:
+                    match = re_enum.match(line)
+                    if match:
+                        self.store_type(line_no, "enum", match.group(1))
+                        is_enum = True
+                        break
+
+                for re_struct in self.RE_STRUCTS:
+                    match = re_struct.match(line)
+                    if match:
+                        self.store_type(line_no, "struct", match.group(1))
+                        break
+
+        self.apply_exceptions()
+
+    def debug_print(self):
+        """
+        Print debug information containing the replacement rules per symbol.
+        To make easier to check, group them per type.
+        """
+        if not self.debug:
+            return
+
+        for c_type, refs in self.symbols.items():
+            if not refs:  # Skip empty dictionaries
+                continue
+
+            print(f"{c_type}:")
+
+            for symbol, (ref, ln) in sorted(refs.items()):
+                print(f"  #{ln:<5d} {symbol} -> {ref}")
+
+            print()
+
+    def gen_output(self):
+        """Write the formatted output to a file."""
+
+        # Avoid extra blank lines
+        text = re.sub(r"\s+$", "", self.data) + "\n"
+        text = re.sub(r"\n\s+\n", "\n\n", text)
+
+        # Escape Sphinx special characters
+        text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)
+
+        # Source uAPI files may have special notes. Use bold font for them
+        text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)
+
+        # Delimiters to catch the entire symbol after escaped
+        start_delim = r"([ \n\t\(=\*\@])"
+        end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"
+
+        # Process all reference types
+        for ref_dict in self.symbols.values():
+            for symbol, (replacement, _) in ref_dict.items():
+                symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))
+                text = re.sub(fr'{start_delim}{symbol}{end_delim}',
+                              fr'\1{replacement}\2', text)
+
+        # Remove "\ " where not needed: before spaces and at the end of lines
+        text = re.sub(r"\\ ([\n ])", r"\1", text)
+        text = re.sub(r" \\ ", " ", text)
+
+        return text
+
+    def gen_toc(self):
+        """
+        Create a list of symbols to be part of a TOC contents table.
+        """
+        text = []
+
+        # Sort symbol types per description
+        symbol_descriptions = []
+        for k, v in self.DEF_SYMBOL_TYPES.items():
+            symbol_descriptions.append((v['description'], k))
+
+        symbol_descriptions.sort()
+
+        # Process each category
+        for description, c_type in symbol_descriptions:
+
+            refs = self.symbols[c_type]
+            if not refs:  # Skip empty categories
+                continue
+
+            text.append(f"{description}")
+            text.append("-" * len(description))
+            text.append("")
+
+            # Sort symbols alphabetically
+            for symbol, (ref, ln) in sorted(refs.items()):
+                text.append(f"- LINENO_{ln}: {ref}")
+
+            text.append("")  # Add empty line between categories
+
+        return "\n".join(text)
+
+    def write_output(self, file_in: str, file_out: str, toc: bool):
+        """
+        Write a ReST output file.
+        """
+
+        title = os.path.basename(file_in)
+
+        if toc:
+            text = self.gen_toc()
+        else:
+            text = self.gen_output()
+
+        with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:
+            f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")
+            f.write(f"{title}\n")
+            f.write("=" * len(title) + "\n\n")
+
+            if not toc:
+                f.write(".. parsed-literal::\n\n")
+
+            f.write(text)
diff --git a/tools/lib/python/kdoc/python_version.py b/tools/lib/python/kdoc/python_version.py
new file mode 100644
index 000000000000..4ddb7ead5f56
--- /dev/null
+++ b/tools/lib/python/kdoc/python_version.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2017-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+
+"""
+Handle Python version check logic.
+
+Not all Python versions are supported by scripts. Yet, on some cases,
+like during documentation build, a newer version of python could be
+available.
+
+This class allows checking if the minimal requirements are followed.
+
+Better than that, PythonVersion.check_python() not only checks the minimal
+requirements, but it automatically switches to a the newest available
+Python version if present.
+
+"""
+
+import os
+import re
+import subprocess
+import shlex
+import sys
+
+from glob import glob
+from textwrap import indent
+
+class PythonVersion:
+    """
+    Ancillary methods that checks for missing dependencies for different
+    types of types, like binaries, python modules, rpm deps, etc.
+    """
+
+    def __init__(self, version):
+        """
+        Ïnitialize self.version tuple from a version string.
+        """
+        self.version = self.parse_version(version)
+
+    @staticmethod
+    def parse_version(version):
+        """
+        Convert a major.minor.patch version into a tuple.
+        """
+        return tuple(int(x) for x in version.split("."))
+
+    @staticmethod
+    def ver_str(version):
+        """
+        Returns a version tuple as major.minor.patch.
+        """
+        return ".".join([str(x) for x in version])
+
+    @staticmethod
+    def cmd_print(cmd, max_len=80):
+        """
+        Outputs a command line, repecting maximum width.
+        """
+
+        cmd_line = []
+
+        for w in cmd:
+            w = shlex.quote(w)
+
+            if cmd_line:
+                if not max_len or len(cmd_line[-1]) + len(w) < max_len:
+                    cmd_line[-1] += " " + w
+                    continue
+                else:
+                    cmd_line[-1] += " \\"
+                    cmd_line.append(w)
+            else:
+                cmd_line.append(w)
+
+        return "\n  ".join(cmd_line)
+
+    def __str__(self):
+        """
+        Return a version tuple as major.minor.patch from self.version.
+        """
+        return self.ver_str(self.version)
+
+    @staticmethod
+    def get_python_version(cmd):
+        """
+        Get python version from a Python binary. As we need to detect if
+        are out there newer python binaries, we can't rely on sys.release here.
+        """
+
+        kwargs = {}
+        if sys.version_info < (3, 7):
+            kwargs['universal_newlines'] = True
+        else:
+            kwargs['text'] = True
+
+        result = subprocess.run([cmd, "--version"],
+                                stdout = subprocess.PIPE,
+                                stderr = subprocess.PIPE,
+                                **kwargs, check=False)
+
+        version = result.stdout.strip()
+
+        match = re.search(r"(\d+\.\d+\.\d+)", version)
+        if match:
+            return PythonVersion.parse_version(match.group(1))
+
+        print(f"Can't parse version {version}")
+        return (0, 0, 0)
+
+    @staticmethod
+    def find_python(min_version):
+        """
+        Detect if are out there any python 3.xy version newer than the
+        current one.
+
+        Note: this routine is limited to up to 2 digits for python3. We
+        may need to update it one day, hopefully on a distant future.
+        """
+        patterns = [
+            "python3.[0-9][0-9]",
+            "python3.[0-9]",
+        ]
+
+        python_cmd = []
+
+        # Seek for a python binary newer than min_version
+        for path in os.getenv("PATH", "").split(":"):
+            for pattern in patterns:
+                for cmd in glob(os.path.join(path, pattern)):
+                    if os.path.isfile(cmd) and os.access(cmd, os.X_OK):
+                        version = PythonVersion.get_python_version(cmd)
+                        if version >= min_version:
+                            python_cmd.append((version, cmd))
+
+        return sorted(python_cmd, reverse=True)
+
+    @staticmethod
+    def check_python(min_version, show_alternatives=False, bail_out=False,
+                     success_on_error=False):
+        """
+        Check if the current python binary satisfies our minimal requirement
+        for Sphinx build. If not, re-run with a newer version if found.
+        """
+        cur_ver = sys.version_info[:3]
+        if cur_ver >= min_version:
+            ver = PythonVersion.ver_str(cur_ver)
+            return
+
+        python_ver = PythonVersion.ver_str(cur_ver)
+
+        available_versions = PythonVersion.find_python(min_version)
+        if not available_versions:
+            print(f"ERROR: Python version {python_ver} is not supported anymore\n")
+            print("       Can't find a new version. This script may fail")
+            return
+
+        script_path = os.path.abspath(sys.argv[0])
+
+        # Check possible alternatives
+        if available_versions:
+            new_python_cmd = available_versions[0][1]
+        else:
+            new_python_cmd = None
+
+        if show_alternatives and available_versions:
+            print("You could run, instead:")
+            for _, cmd in available_versions:
+                args = [cmd, script_path] + sys.argv[1:]
+
+                cmd_str = indent(PythonVersion.cmd_print(args), "  ")
+                print(f"{cmd_str}\n")
+
+        if bail_out:
+            msg = f"Python {python_ver} not supported. Bailing out"
+            if success_on_error:
+                print(msg, file=sys.stderr)
+                sys.exit(0)
+            else:
+                sys.exit(msg)
+
+        print(f"Python {python_ver} not supported. Changing to {new_python_cmd}")
+
+        # Restart script using the newer version
+        args = [new_python_cmd, script_path] + sys.argv[1:]
+
+        try:
+            os.execv(new_python_cmd, args)
+        except OSError as e:
+            sys.exit(f"Failed to restart with {new_python_cmd}: {e}")
diff --git a/tools/lib/python/kdoc/xforms_lists.py b/tools/lib/python/kdoc/xforms_lists.py
new file mode 100644
index 000000000000..f6ea9efb11ae
--- /dev/null
+++ b/tools/lib/python/kdoc/xforms_lists.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+import re
+
+from kdoc.kdoc_re import KernRe
+from kdoc.c_lex import CMatch, CTokenizer
+
+struct_args_pattern = r"([^,)]+)"
+
+
+class CTransforms:
+    """
+    Data class containing a long set of transformations to turn
+    structure member prefixes, and macro invocations and variables
+    into something we can parse and generate kdoc for.
+    """
+
+    #
+    # NOTE:
+    #      Due to performance reasons, place CMatch rules before KernRe,
+    #      as this avoids running the C parser every time.
+    #
+
+    #: Transforms for structs and unions.
+    struct_xforms = [
+        (CMatch("__attribute__"), ""),
+        (CMatch("__aligned"), ""),
+        (CMatch("__counted_by"), ""),
+        (CMatch("__counted_by_(le|be)"), ""),
+        (CMatch("__guarded_by"), ""),
+        (CMatch("__pt_guarded_by"), ""),
+        (CMatch("__packed"), ""),
+        (CMatch("CRYPTO_MINALIGN_ATTR"), ""),
+        (CMatch("__private"), ""),
+        (CMatch("__rcu"), ""),
+        (CMatch("____cacheline_aligned_in_smp"), ""),
+        (CMatch("____cacheline_aligned"), ""),
+        (CMatch("__cacheline_group_(?:begin|end)"), ""),
+        (CMatch("__ETHTOOL_DECLARE_LINK_MODE_MASK"), r"DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)"),
+        (CMatch("DECLARE_PHY_INTERFACE_MASK",),r"DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)"),
+        (CMatch("DECLARE_BITMAP"), r"unsigned long \1[BITS_TO_LONGS(\2)]"),
+        (CMatch("DECLARE_HASHTABLE"), r"unsigned long \1[1 << ((\2) - 1)]"),
+        (CMatch("DECLARE_KFIFO"), r"\2 *\1"),
+        (CMatch("DECLARE_KFIFO_PTR"), r"\2 *\1"),
+        (CMatch("(?:__)?DECLARE_FLEX_ARRAY"), r"\1 \2[]"),
+        (CMatch("DEFINE_DMA_UNMAP_ADDR"), r"dma_addr_t \1"),
+        (CMatch("DEFINE_DMA_UNMAP_LEN"), r"__u32 \1"),
+        (CMatch("VIRTIO_DECLARE_FEATURES"), r"union { u64 \1; u64 \1_array[VIRTIO_FEATURES_U64S]; }"),
+        (CMatch("__cond_acquires"), ""),
+        (CMatch("__cond_releases"), ""),
+        (CMatch("__acquires"), ""),
+        (CMatch("__releases"), ""),
+        (CMatch("__must_hold"), ""),
+        (CMatch("__must_not_hold"), ""),
+        (CMatch("__must_hold_shared"), ""),
+        (CMatch("__cond_acquires_shared"), ""),
+        (CMatch("__acquires_shared"), ""),
+        (CMatch("__releases_shared"), ""),
+        (CMatch("__attribute__"), ""),
+
+        #
+        # Macro __struct_group() creates an union with an anonymous
+        # and a non-anonymous struct, depending on the parameters. We only
+        # need one of those at kernel-doc, as we won't be documenting the same
+        # members twice.
+        #
+        (CMatch("struct_group"), r"struct { \2+ };"),
+        (CMatch("struct_group_attr"), r"struct { \3+ };"),
+        (CMatch("struct_group_tagged"), r"struct { \3+ };"),
+        (CMatch("__struct_group"), r"struct { \4+ };"),
+    ]
+
+    #: Transforms for function prototypes.
+    function_xforms = [
+        (CMatch("static"), ""),
+        (CMatch("extern"), ""),
+        (CMatch("asmlinkage"), ""),
+        (CMatch("inline"), ""),
+        (CMatch("__inline__"), ""),
+        (CMatch("__inline"), ""),
+        (CMatch("__always_inline"), ""),
+        (CMatch("noinline"), ""),
+        (CMatch("__FORTIFY_INLINE"), ""),
+        (CMatch("__init"), ""),
+        (CMatch("__init_or_module"), ""),
+        (CMatch("__exit"), ""),
+        (CMatch("__deprecated"), ""),
+        (CMatch("__flatten"), ""),
+        (CMatch("__meminit"), ""),
+        (CMatch("__must_check"), ""),
+        (CMatch("__weak"), ""),
+        (CMatch("__sched"), ""),
+        (CMatch("__always_unused"), ""),
+        (CMatch("__printf"), ""),
+        (CMatch("__(?:re)?alloc_size"), ""),
+        (CMatch("__diagnose_as"), ""),
+        (CMatch("DECL_BUCKET_PARAMS"), r"\1, \2"),
+        (CMatch("__no_context_analysis"), ""),
+        (CMatch("__attribute_const__"), ""),
+        (CMatch("__attribute__"), ""),
+
+        #
+        # HACK: this is similar to process_export() hack. It is meant to
+        # drop _noproof from function name. See for instance:
+        # ahash_request_alloc kernel-doc declaration at include/crypto/hash.h.
+        #
+        (KernRe("_noprof"), ""),
+    ]
+
+    #: Transforms for variable prototypes.
+    var_xforms = [
+        (CMatch("__read_mostly"), ""),
+        (CMatch("__ro_after_init"), ""),
+        (CMatch("__guarded_by"), ""),
+        (CMatch("__pt_guarded_by"), ""),
+        (CMatch("LIST_HEAD"), r"struct list_head \1"),
+
+        (KernRe(r"(?://.*)$"), ""),
+        (KernRe(r"(?:/\*.*\*/)"), ""),
+        (KernRe(r";$"), ""),
+    ]
+
+    #: Transforms main dictionary used at apply_transforms().
+    xforms = {
+        "struct": struct_xforms,
+        "func": function_xforms,
+        "var": var_xforms,
+    }
+
+    def apply(self, xforms_type, source):
+        """
+        Apply a set of transforms to a block of source.
+
+        As tokenizer is used here, this function also remove comments
+        at the end.
+        """
+        if xforms_type not in self.xforms:
+            return source
+
+        if isinstance(source, str):
+            source = CTokenizer(source)
+
+        for search, subst in self.xforms[xforms_type]:
+            #
+            # KernRe only accept strings.
+            #
+            if isinstance(search, KernRe):
+                source = str(source)
+
+            source = search.sub(subst, source)
+        return str(source)
diff --git a/tools/lib/python/unittest_helper.py b/tools/lib/python/unittest_helper.py
new file mode 100755
index 000000000000..f3cba5120401
--- /dev/null
+++ b/tools/lib/python/unittest_helper.py
@@ -0,0 +1,363 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025-2026: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0103,R0912,R0914,E1101
+
+"""
+Provides helper functions and classes execute python unit tests.
+
+Those help functions provide a nice colored output summary of each
+executed test and, when a test fails, it shows the different in diff
+format when running in verbose mode, like::
+
+    $ tools/unittests/nested_match.py -v
+    ...
+    Traceback (most recent call last):
+    File "/new_devel/docs/tools/unittests/nested_match.py", line 69, in test_count_limit
+        self.assertEqual(replaced, "bar(a); bar(b); foo(c)")
+        ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    AssertionError: 'bar(a) foo(b); foo(c)' != 'bar(a); bar(b); foo(c)'
+    - bar(a) foo(b); foo(c)
+    ?       ^^^^
+    + bar(a); bar(b); foo(c)
+    ?       ^^^^^
+    ...
+
+It also allows filtering what tests will be executed via ``-k`` parameter.
+
+Typical usage is to do::
+
+    from unittest_helper import run_unittest
+    ...
+
+    if __name__ == "__main__":
+        run_unittest(__file__)
+
+If passing arguments is needed, on a more complex scenario, it can be
+used like on this example::
+
+    from unittest_helper import TestUnits, run_unittest
+    ...
+    env = {'sudo': ""}
+    ...
+    if __name__ == "__main__":
+        runner = TestUnits()
+        base_parser = runner.parse_args()
+        base_parser.add_argument('--sudo', action='store_true',
+                                help='Enable tests requiring sudo privileges')
+
+        args = base_parser.parse_args()
+
+        # Update module-level flag
+        if args.sudo:
+            env['sudo'] = "1"
+
+        # Run tests with customized arguments
+        runner.run(__file__, parser=base_parser, args=args, env=env)
+"""
+
+import argparse
+import atexit
+import os
+import re
+import unittest
+import sys
+
+from unittest.mock import patch
+
+
+class Summary(unittest.TestResult):
+    """
+    Overrides ``unittest.TestResult`` class to provide a nice colored
+    summary. When in verbose mode, displays actual/expected difference in
+    unified diff format.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        #: Dictionary to store organized test results.
+        self.test_results = {}
+
+        #: max length of the test names.
+        self.max_name_length = 0
+
+    def startTest(self, test):
+        super().startTest(test)
+        test_id = test.id()
+        parts = test_id.split(".")
+
+        # Extract module, class, and method names
+        if len(parts) >= 3:
+            module_name = parts[-3]
+        else:
+            module_name = ""
+        if len(parts) >= 2:
+            class_name = parts[-2]
+        else:
+            class_name = ""
+
+        method_name = parts[-1]
+
+        # Build the hierarchical structure
+        if module_name not in self.test_results:
+            self.test_results[module_name] = {}
+
+        if class_name not in self.test_results[module_name]:
+            self.test_results[module_name][class_name] = []
+
+        # Track maximum test name length for alignment
+        display_name = f"{method_name}:"
+
+        self.max_name_length = max(len(display_name), self.max_name_length)
+
+    def _record_test(self, test, status):
+        test_id = test.id()
+        parts = test_id.split(".")
+        if len(parts) >= 3:
+            module_name = parts[-3]
+        else:
+            module_name = ""
+        if len(parts) >= 2:
+            class_name = parts[-2]
+        else:
+            class_name = ""
+        method_name = parts[-1]
+        self.test_results[module_name][class_name].append((method_name, status))
+
+    def addSuccess(self, test):
+        super().addSuccess(test)
+        self._record_test(test, "OK")
+
+    def addFailure(self, test, err):
+        super().addFailure(test, err)
+        self._record_test(test, "FAIL")
+
+    def addError(self, test, err):
+        super().addError(test, err)
+        self._record_test(test, "ERROR")
+
+    def addSkip(self, test, reason):
+        super().addSkip(test, reason)
+        self._record_test(test, f"SKIP ({reason})")
+
+    def printResults(self, verbose):
+        """
+        Print results using colors if tty.
+        """
+        # Check for ANSI color support
+        use_color = sys.stdout.isatty()
+        COLORS = {
+            "OK":            "\033[32m",   # Green
+            "FAIL":          "\033[31m",   # Red
+            "SKIP":          "\033[1;33m", # Yellow
+            "PARTIAL":       "\033[33m",   # Orange
+            "EXPECTED_FAIL": "\033[36m",   # Cyan
+            "reset":         "\033[0m",    # Reset to default terminal color
+        }
+        if not use_color:
+            for c in COLORS:
+                COLORS[c] = ""
+
+        # Calculate maximum test name length
+        if not self.test_results:
+            return
+        try:
+            lengths = []
+            for module in self.test_results.values():
+                for tests in module.values():
+                    for test_name, _ in tests:
+                        lengths.append(len(test_name) + 1)  # +1 for colon
+            max_length = max(lengths) + 2  # Additional padding
+        except ValueError:
+            sys.exit("Test list is empty")
+
+        # Print results
+        for module_name, classes in self.test_results.items():
+            if verbose:
+                print(f"{module_name}:")
+            for class_name, tests in classes.items():
+                if verbose:
+                    print(f"    {class_name}:")
+                for test_name, status in tests:
+                    if not verbose and status in [ "OK", "EXPECTED_FAIL" ]:
+                        continue
+
+                    # Get base status without reason for SKIP
+                    if status.startswith("SKIP"):
+                        status_code = status.split()[0]
+                    else:
+                        status_code = status
+                    color = COLORS.get(status_code, "")
+                    print(
+                        f"        {test_name + ':':<{max_length}}{color}{status}{COLORS['reset']}"
+                    )
+            if verbose:
+                print()
+
+        # Print summary
+        print(f"\nRan {self.testsRun} tests", end="")
+        if hasattr(self, "timeTaken"):
+            print(f" in {self.timeTaken:.3f}s", end="")
+        print()
+
+        if not self.wasSuccessful():
+            print(f"\n{COLORS['FAIL']}FAILED (", end="")
+            failures = getattr(self, "failures", [])
+            errors = getattr(self, "errors", [])
+            if failures:
+                print(f"failures={len(failures)}", end="")
+            if errors:
+                if failures:
+                    print(", ", end="")
+                print(f"errors={len(errors)}", end="")
+            print(f"){COLORS['reset']}")
+
+
+def flatten_suite(suite):
+    """Flatten test suite hierarchy."""
+    tests = []
+    for item in suite:
+        if isinstance(item, unittest.TestSuite):
+            tests.extend(flatten_suite(item))
+        else:
+            tests.append(item)
+    return tests
+
+
+class TestUnits:
+    """
+    Helper class to set verbosity level.
+
+    This class discover test files, import its unittest classes and
+    executes the test on it.
+    """
+    def parse_args(self):
+        """Returns a parser for command line arguments."""
+        parser = argparse.ArgumentParser(description="Test runner with regex filtering")
+        parser.add_argument("-v", "--verbose", action="count", default=1)
+        parser.add_argument("-q", "--quiet", action="store_true")
+        parser.add_argument("-f", "--failfast", action="store_true")
+        parser.add_argument("-k", "--keyword",
+                            help="Regex pattern to filter test methods")
+        return parser
+
+    def run(self, caller_file=None, pattern=None,
+            suite=None, parser=None, args=None, env=None):
+        """
+        Execute all tests from the unity test file.
+
+        It contains several optional parameters:
+
+        ``caller_file``:
+            -  name of the file that contains test.
+
+               typical usage is to place __file__ at the caller test, e.g.::
+
+                    if __name__ == "__main__":
+                        TestUnits().run(__file__)
+
+        ``pattern``:
+            - optional pattern to match multiple file names. Defaults
+              to basename of ``caller_file``.
+
+        ``suite``:
+            - an unittest suite initialized by the caller using
+              ``unittest.TestLoader().discover()``.
+
+        ``parser``:
+            - an argparse parser. If not defined, this helper will create
+              one.
+
+        ``args``:
+            - an ``argparse.Namespace`` data filled by the caller.
+
+        ``env``:
+            - environment variables that will be passed to the test suite
+
+        At least ``caller_file`` or ``suite`` must be used, otherwise a
+        ``TypeError`` will be raised.
+        """
+        if not args:
+            if not parser:
+                parser = self.parse_args()
+            args = parser.parse_args()
+
+        if not caller_file and not suite:
+            raise TypeError("Either caller_file or suite is needed at TestUnits")
+
+        if args.quiet:
+            verbose = 0
+        else:
+            verbose = args.verbose
+
+        if not env:
+            env = os.environ.copy()
+
+        env["VERBOSE"] = f"{verbose}"
+
+        patcher = patch.dict(os.environ, env)
+        patcher.start()
+        # ensure it gets stopped after
+        atexit.register(patcher.stop)
+
+
+        if verbose >= 2:
+            unittest.TextTestRunner(verbosity=verbose).run = lambda suite: suite
+
+        # Load ONLY tests from the calling file
+        if not suite:
+            if not pattern:
+                pattern = caller_file
+
+            loader = unittest.TestLoader()
+            suite = loader.discover(start_dir=os.path.dirname(caller_file),
+                                    pattern=os.path.basename(caller_file))
+
+        # Flatten the suite for environment injection
+        tests_to_inject = flatten_suite(suite)
+
+        # Filter tests by method name if -k specified
+        if args.keyword:
+            try:
+                pattern = re.compile(args.keyword)
+                filtered_suite = unittest.TestSuite()
+                for test in tests_to_inject:  # Use the pre-flattened list
+                    method_name = test.id().split(".")[-1]
+                    if pattern.search(method_name):
+                        filtered_suite.addTest(test)
+                suite = filtered_suite
+            except re.error as e:
+                sys.stderr.write(f"Invalid regex pattern: {e}\n")
+                sys.exit(1)
+        else:
+            # Maintain original suite structure if no keyword filtering
+            suite = unittest.TestSuite(tests_to_inject)
+
+        if verbose >= 2:
+            resultclass = None
+        else:
+            resultclass = Summary
+
+        runner = unittest.TextTestRunner(verbosity=args.verbose,
+                                            resultclass=resultclass,
+                                            failfast=args.failfast)
+        result = runner.run(suite)
+        if resultclass:
+            result.printResults(verbose)
+
+        sys.exit(not result.wasSuccessful())
+
+
+def run_unittest(fname):
+    """
+    Basic usage of TestUnits class.
+
+    Use it when there's no need to pass any extra argument to the tests
+    with. The recommended way is to place this at the end of each
+    unittest module::
+
+        if __name__ == "__main__":
+            run_unittest(__file__)
+    """
+    TestUnits().run(fname)
diff --git a/tools/lib/slab.c b/tools/lib/slab.c
index 959997fb0652..981a21404f32 100644
--- a/tools/lib/slab.c
+++ b/tools/lib/slab.c
@@ -36,3 +36,19 @@ void kfree(void *p)
 		printf("Freeing %p to malloc\n", p);
 	free(p);
 }
+
+void *kmalloc_array(size_t n, size_t size, gfp_t gfp)
+{
+	void *ret;
+
+	if (!(gfp & __GFP_DIRECT_RECLAIM))
+		return NULL;
+
+	ret = calloc(n, size);
+	uatomic_inc(&kmalloc_nr_allocated);
+	if (kmalloc_verbose)
+		printf("Allocating %p from calloc\n", ret);
+	if (gfp & __GFP_ZERO)
+		memset(ret, 0, n * size);
+	return ret;
+}
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index 8561b0f01a24..db94aa685b73 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -9,6 +9,7 @@
 #include <sys/stat.h>
 #include <unistd.h>
 #include <dirent.h>
+#include <assert.h>
 #include "subcmd-util.h"
 #include "help.h"
 #include "exec-cmd.h"
@@ -74,6 +75,9 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
 	size_t ci, cj, ei;
 	int cmp;
 
+	if (!excludes->cnt)
+		return;
+
 	ci = cj = ei = 0;
 	while (ci < cmds->cnt && ei < excludes->cnt) {
 		cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
@@ -82,24 +86,27 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
 				ci++;
 				cj++;
 			} else {
-				zfree(&cmds->names[cj]);
-				cmds->names[cj++] = cmds->names[ci++];
+				cmds->names[cj++] = cmds->names[ci];
+				cmds->names[ci++] = NULL;
 			}
 		} else if (cmp == 0) {
+			zfree(&cmds->names[ci]);
 			ci++;
 			ei++;
 		} else if (cmp > 0) {
 			ei++;
 		}
 	}
-	if (ci != cj) {
-		while (ci < cmds->cnt) {
-			zfree(&cmds->names[cj]);
-			cmds->names[cj++] = cmds->names[ci++];
+	while (ci < cmds->cnt) {
+		if (ci != cj) {
+			cmds->names[cj] = cmds->names[ci];
+			cmds->names[ci] = NULL;
 		}
+		ci++;
+		cj++;
 	}
 	for (ci = cj; ci < cmds->cnt; ci++)
-		zfree(&cmds->names[ci]);
+		assert(cmds->names[ci] == NULL);
 	cmds->cnt = cj;
 }
 
diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c
index 0a764c25c384..b7510f83209a 100644
--- a/tools/lib/subcmd/run-command.c
+++ b/tools/lib/subcmd/run-command.c
@@ -5,6 +5,7 @@
 #include <ctype.h>
 #include <fcntl.h>
 #include <string.h>
+#include <linux/compiler.h>
 #include <linux/string.h>
 #include <errno.h>
 #include <sys/wait.h>
@@ -216,10 +217,20 @@ static int wait_or_whine(struct child_process *cmd, bool block)
 	return result;
 }
 
+/*
+ * Conservative estimate of number of characaters needed to hold an a decoded
+ * integer, assume each 3 bits needs a character byte and plus a possible sign
+ * character.
+ */
+#ifndef is_signed_type
+#define is_signed_type(type) (((type)(-1)) < (type)1)
+#endif
+#define MAX_STRLEN_TYPE(type) (sizeof(type) * 8 / 3 + (is_signed_type(type) ? 1 : 0))
+
 int check_if_command_finished(struct child_process *cmd)
 {
 #ifdef __linux__
-	char filename[FILENAME_MAX + 12];
+	char filename[6 + MAX_STRLEN_TYPE(typeof(cmd->pid)) + 7 + 1];
 	char status_line[256];
 	FILE *status_file;
 
@@ -227,7 +238,7 @@ int check_if_command_finished(struct child_process *cmd)
 	 * Check by reading /proc/<pid>/status as calling waitpid causes
 	 * stdout/stderr to be closed and data lost.
 	 */
-	sprintf(filename, "/proc/%d/status", cmd->pid);
+	sprintf(filename, "/proc/%u/status", cmd->pid);
 	status_file = fopen(filename, "r");
 	if (status_file == NULL) {
 		/* Open failed assume finish_command was called. */
diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile
index a1f5e388644d..41aa7a324ff4 100644
--- a/tools/lib/thermal/Makefile
+++ b/tools/lib/thermal/Makefile
@@ -46,8 +46,12 @@ else
   CFLAGS := -g -Wall
 endif
 
+NL3_CFLAGS = $(shell pkg-config --cflags libnl-3.0 2>/dev/null)
+ifeq ($(NL3_CFLAGS),)
+NL3_CFLAGS = -I/usr/include/libnl3
+endif
+
 INCLUDES = \
--I/usr/include/libnl3 \
 -I$(srctree)/tools/lib/thermal/include \
 -I$(srctree)/tools/lib/ \
 -I$(srctree)/tools/include \
@@ -59,6 +63,7 @@ INCLUDES = \
 override CFLAGS += $(EXTRA_WARNINGS)
 override CFLAGS += -Werror -Wall
 override CFLAGS += -fPIC
+override CFLAGS += $(NL3_CFLAGS)
 override CFLAGS += $(INCLUDES)
 override CFLAGS += -fvisibility=hidden
 override CFGLAS += -Wl,-L.
@@ -134,7 +139,7 @@ endef
 install_lib: libs
 	$(call QUIET_INSTALL, $(LIBTHERMAL_ALL)) \
 		$(call do_install_mkdir,$(libdir_SQ)); \
-		cp -fpR $(LIBTHERMAL_ALL) $(DESTDIR)$(libdir_SQ)
+		cp -fR --preserve=mode,timestamp $(LIBTHERMAL_ALL) $(DESTDIR)$(libdir_SQ)
 
 install_headers:
 	$(call QUIET_INSTALL, headers) \
diff --git a/tools/lib/thermal/libthermal.map b/tools/lib/thermal/libthermal.map
index d657176aa47f..1d3d0c04e4b6 100644
--- a/tools/lib/thermal/libthermal.map
+++ b/tools/lib/thermal/libthermal.map
@@ -1,6 +1,5 @@
 LIBTHERMAL_0.0.1 {
 	global:
-		thermal_init;
 		for_each_thermal_zone;
 		for_each_thermal_trip;
 		for_each_thermal_cdev;
@@ -9,9 +8,12 @@ LIBTHERMAL_0.0.1 {
 		thermal_zone_find_by_id;
 		thermal_zone_discover;
 		thermal_init;
+		thermal_exit;
+		thermal_events_exit;
 		thermal_events_init;
 		thermal_events_handle;
 		thermal_events_fd;
+		thermal_cmd_exit;
 		thermal_cmd_init;
 		thermal_cmd_get_tz;
 		thermal_cmd_get_cdev;
@@ -22,6 +24,7 @@ LIBTHERMAL_0.0.1 {
 		thermal_cmd_threshold_add;
 		thermal_cmd_threshold_delete;
 		thermal_cmd_threshold_flush;
+		thermal_sampling_exit;
 		thermal_sampling_init;
 		thermal_sampling_handle;
 		thermal_sampling_fd;
diff --git a/tools/lib/thermal/libthermal.pc.template b/tools/lib/thermal/libthermal.pc.template
index ac24d0ab17f5..3b8a24d0a8b8 100644
--- a/tools/lib/thermal/libthermal.pc.template
+++ b/tools/lib/thermal/libthermal.pc.template
@@ -8,5 +8,5 @@ Name: libthermal
 Description: thermal library
 Requires: libnl-3.0 libnl-genl-3.0
 Version: @VERSION@
-Libs: -L${libdir} -lnl-genl-3 -lnl-3
-Cflags: -I${includedir} -I${include}/libnl3
+Libs: -L${libdir} -lnl-genl-3 -lnl-3 -lthermal
+Cflags: -I${includedir} -I${includedir}/libnl3