1143 files changed, 38659 insertions, 11044 deletions
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 1334214546d7..3feac0482fe9 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -192,60 +192,77 @@ static int get_family_id(int sd)
 }
 
 #define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
+#define delay_ms(t) (t / 1000000ULL)
 
 static void print_delayacct(struct taskstats *t)
 {
-	printf("\n\nCPU   %15s%15s%15s%15s%15s\n"
-	       "      %15llu%15llu%15llu%15llu%15.3fms\n"
-	       "IO    %15s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n"
-	       "SWAP  %15s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n"
-	       "RECLAIM  %12s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n"
-	       "THRASHING%12s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n"
-	       "COMPACT  %12s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n"
-	       "WPCOPY   %12s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n"
-	       "IRQ   %15s%15s%15s\n"
-	       "      %15llu%15llu%15.3fms\n",
+	printf("\n\nCPU   %15s%15s%15s%15s%15s%15s%15s\n"
+	       "      %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+	       "IO    %15s%15s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+	       "SWAP  %15s%15s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+	       "RECLAIM  %12s%15s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+	       "THRASHING%12s%15s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+	       "COMPACT  %12s%15s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+	       "WPCOPY   %12s%15s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+	       "IRQ   %15s%15s%15s%15s%15s\n"
+	       "      %15llu%15llu%15.3fms%13.6fms%13.6fms\n",
 	       "count", "real total", "virtual total",
-	       "delay total", "delay average",
+	       "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->cpu_count,
 	       (unsigned long long)t->cpu_run_real_total,
 	       (unsigned long long)t->cpu_run_virtual_total,
 	       (unsigned long long)t->cpu_delay_total,
 	       average_ms((double)t->cpu_delay_total, t->cpu_count),
-	       "count", "delay total", "delay average",
+	       delay_ms((double)t->cpu_delay_max),
+	       delay_ms((double)t->cpu_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->blkio_count,
 	       (unsigned long long)t->blkio_delay_total,
 	       average_ms((double)t->blkio_delay_total, t->blkio_count),
-	       "count", "delay total", "delay average",
+	       delay_ms((double)t->blkio_delay_max),
+	       delay_ms((double)t->blkio_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->swapin_count,
 	       (unsigned long long)t->swapin_delay_total,
 	       average_ms((double)t->swapin_delay_total, t->swapin_count),
-	       "count", "delay total", "delay average",
+	       delay_ms((double)t->swapin_delay_max),
+	       delay_ms((double)t->swapin_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->freepages_count,
 	       (unsigned long long)t->freepages_delay_total,
 	       average_ms((double)t->freepages_delay_total, t->freepages_count),
-	       "count", "delay total", "delay average",
+	       delay_ms((double)t->freepages_delay_max),
+	       delay_ms((double)t->freepages_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->thrashing_count,
 	       (unsigned long long)t->thrashing_delay_total,
 	       average_ms((double)t->thrashing_delay_total, t->thrashing_count),
-	       "count", "delay total", "delay average",
+	       delay_ms((double)t->thrashing_delay_max),
+	       delay_ms((double)t->thrashing_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->compact_count,
 	       (unsigned long long)t->compact_delay_total,
 	       average_ms((double)t->compact_delay_total, t->compact_count),
-	       "count", "delay total", "delay average",
+	       delay_ms((double)t->compact_delay_max),
+	       delay_ms((double)t->compact_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->wpcopy_count,
 	       (unsigned long long)t->wpcopy_delay_total,
 	       average_ms((double)t->wpcopy_delay_total, t->wpcopy_count),
-	       "count", "delay total", "delay average",
+	       delay_ms((double)t->wpcopy_delay_max),
+	       delay_ms((double)t->wpcopy_delay_min),
+	       "count", "delay total", "delay average", "delay max", "delay min",
 	       (unsigned long long)t->irq_count,
 	       (unsigned long long)t->irq_delay_total,
-	       average_ms((double)t->irq_delay_total, t->irq_count));
+	       average_ms((double)t->irq_delay_total, t->irq_count),
+	       delay_ms((double)t->irq_delay_max),
+	       delay_ms((double)t->irq_delay_min));
 }
 
 static void task_context_switch_counts(struct taskstats *t)
diff --git a/tools/accounting/procacct.c b/tools/accounting/procacct.c
index 90c4a37f53d9..e8dee05a6264 100644
--- a/tools/accounting/procacct.c
+++ b/tools/accounting/procacct.c
@@ -274,12 +274,11 @@ int main(int argc, char *argv[])
 	int maskset = 0;
 	char *logfile = NULL;
 	int cfd = 0;
-	int forking = 0;
 
 	struct msgtemplate msg;
 
-	while (!forking) {
-		c = getopt(argc, argv, "m:vr:");
+	while (1) {
+		c = getopt(argc, argv, "m:vr:w:");
 		if (c < 0)
 			break;
 
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index 03cd7c19a683..d5dd96902817 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -246,6 +246,7 @@ struct kvm_vcpu_events {
 #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO  7
 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS	8
+#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ	9
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT	10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
 			(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h
index cd8420e8c3ad..b6c5ece4fdee 100644
--- a/tools/arch/arm64/include/asm/sysreg.h
+++ b/tools/arch/arm64/include/asm/sysreg.h
@@ -11,6 +11,7 @@
 
 #include <linux/bits.h>
 #include <linux/stringify.h>
+#include <linux/kasan-tags.h>
 
 #include <asm/gpr-num.h>
 
@@ -108,6 +109,9 @@
 #define set_pstate_ssbs(x)		asm volatile(SET_PSTATE_SSBS(x))
 #define set_pstate_dit(x)		asm volatile(SET_PSTATE_DIT(x))
 
+/* Register-based PAN access, for save/restore purposes */
+#define SYS_PSTATE_PAN			sys_reg(3, 0, 4, 2, 3)
+
 #define __SYS_BARRIER_INSN(CRm, op2, Rt) \
 	__emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
 
@@ -123,6 +127,37 @@
 #define SYS_DC_CIGSW			sys_insn(1, 0, 7, 14, 4)
 #define SYS_DC_CIGDSW			sys_insn(1, 0, 7, 14, 6)
 
+#define SYS_IC_IALLUIS			sys_insn(1, 0, 7, 1, 0)
+#define SYS_IC_IALLU			sys_insn(1, 0, 7, 5, 0)
+#define SYS_IC_IVAU			sys_insn(1, 3, 7, 5, 1)
+
+#define SYS_DC_IVAC			sys_insn(1, 0, 7, 6, 1)
+#define SYS_DC_IGVAC			sys_insn(1, 0, 7, 6, 3)
+#define SYS_DC_IGDVAC			sys_insn(1, 0, 7, 6, 5)
+
+#define SYS_DC_CVAC			sys_insn(1, 3, 7, 10, 1)
+#define SYS_DC_CGVAC			sys_insn(1, 3, 7, 10, 3)
+#define SYS_DC_CGDVAC			sys_insn(1, 3, 7, 10, 5)
+
+#define SYS_DC_CVAU			sys_insn(1, 3, 7, 11, 1)
+
+#define SYS_DC_CVAP			sys_insn(1, 3, 7, 12, 1)
+#define SYS_DC_CGVAP			sys_insn(1, 3, 7, 12, 3)
+#define SYS_DC_CGDVAP			sys_insn(1, 3, 7, 12, 5)
+
+#define SYS_DC_CVADP			sys_insn(1, 3, 7, 13, 1)
+#define SYS_DC_CGVADP			sys_insn(1, 3, 7, 13, 3)
+#define SYS_DC_CGDVADP			sys_insn(1, 3, 7, 13, 5)
+
+#define SYS_DC_CIVAC			sys_insn(1, 3, 7, 14, 1)
+#define SYS_DC_CIGVAC			sys_insn(1, 3, 7, 14, 3)
+#define SYS_DC_CIGDVAC			sys_insn(1, 3, 7, 14, 5)
+
+/* Data cache zero operations */
+#define SYS_DC_ZVA			sys_insn(1, 3, 7, 4, 1)
+#define SYS_DC_GVA			sys_insn(1, 3, 7, 4, 3)
+#define SYS_DC_GZVA			sys_insn(1, 3, 7, 4, 4)
+
 /*
  * Automatically generated definitions for system registers, the
  * manual encodings below are in the process of being converted to
@@ -162,6 +197,84 @@
 #define SYS_DBGDTRTX_EL0		sys_reg(2, 3, 0, 5, 0)
 #define SYS_DBGVCR32_EL2		sys_reg(2, 4, 0, 7, 0)
 
+#define SYS_BRBINF_EL1(n)		sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
+#define SYS_BRBINFINJ_EL1		sys_reg(2, 1, 9, 1, 0)
+#define SYS_BRBSRC_EL1(n)		sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
+#define SYS_BRBSRCINJ_EL1		sys_reg(2, 1, 9, 1, 1)
+#define SYS_BRBTGT_EL1(n)		sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
+#define SYS_BRBTGTINJ_EL1		sys_reg(2, 1, 9, 1, 2)
+#define SYS_BRBTS_EL1			sys_reg(2, 1, 9, 0, 2)
+
+#define SYS_BRBCR_EL1			sys_reg(2, 1, 9, 0, 0)
+#define SYS_BRBFCR_EL1			sys_reg(2, 1, 9, 0, 1)
+#define SYS_BRBIDR0_EL1			sys_reg(2, 1, 9, 2, 0)
+
+#define SYS_TRCITECR_EL1		sys_reg(3, 0, 1, 2, 3)
+#define SYS_TRCACATR(m)			sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
+#define SYS_TRCACVR(m)			sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3)))
+#define SYS_TRCAUTHSTATUS		sys_reg(2, 1, 7, 14, 6)
+#define SYS_TRCAUXCTLR			sys_reg(2, 1, 0, 6, 0)
+#define SYS_TRCBBCTLR			sys_reg(2, 1, 0, 15, 0)
+#define SYS_TRCCCCTLR			sys_reg(2, 1, 0, 14, 0)
+#define SYS_TRCCIDCCTLR0		sys_reg(2, 1, 3, 0, 2)
+#define SYS_TRCCIDCCTLR1		sys_reg(2, 1, 3, 1, 2)
+#define SYS_TRCCIDCVR(m)		sys_reg(2, 1, 3, ((m & 7) << 1), 0)
+#define SYS_TRCCLAIMCLR			sys_reg(2, 1, 7, 9, 6)
+#define SYS_TRCCLAIMSET			sys_reg(2, 1, 7, 8, 6)
+#define SYS_TRCCNTCTLR(m)		sys_reg(2, 1, 0, (4 | (m & 3)), 5)
+#define SYS_TRCCNTRLDVR(m)		sys_reg(2, 1, 0, (0 | (m & 3)), 5)
+#define SYS_TRCCNTVR(m)			sys_reg(2, 1, 0, (8 | (m & 3)), 5)
+#define SYS_TRCCONFIGR			sys_reg(2, 1, 0, 4, 0)
+#define SYS_TRCDEVARCH			sys_reg(2, 1, 7, 15, 6)
+#define SYS_TRCDEVID			sys_reg(2, 1, 7, 2, 7)
+#define SYS_TRCEVENTCTL0R		sys_reg(2, 1, 0, 8, 0)
+#define SYS_TRCEVENTCTL1R		sys_reg(2, 1, 0, 9, 0)
+#define SYS_TRCEXTINSELR(m)		sys_reg(2, 1, 0, (8 | (m & 3)), 4)
+#define SYS_TRCIDR0			sys_reg(2, 1, 0, 8, 7)
+#define SYS_TRCIDR10			sys_reg(2, 1, 0, 2, 6)
+#define SYS_TRCIDR11			sys_reg(2, 1, 0, 3, 6)
+#define SYS_TRCIDR12			sys_reg(2, 1, 0, 4, 6)
+#define SYS_TRCIDR13			sys_reg(2, 1, 0, 5, 6)
+#define SYS_TRCIDR1			sys_reg(2, 1, 0, 9, 7)
+#define SYS_TRCIDR2			sys_reg(2, 1, 0, 10, 7)
+#define SYS_TRCIDR3			sys_reg(2, 1, 0, 11, 7)
+#define SYS_TRCIDR4			sys_reg(2, 1, 0, 12, 7)
+#define SYS_TRCIDR5			sys_reg(2, 1, 0, 13, 7)
+#define SYS_TRCIDR6			sys_reg(2, 1, 0, 14, 7)
+#define SYS_TRCIDR7			sys_reg(2, 1, 0, 15, 7)
+#define SYS_TRCIDR8			sys_reg(2, 1, 0, 0, 6)
+#define SYS_TRCIDR9			sys_reg(2, 1, 0, 1, 6)
+#define SYS_TRCIMSPEC(m)		sys_reg(2, 1, 0, (m & 7), 7)
+#define SYS_TRCITEEDCR			sys_reg(2, 1, 0, 2, 1)
+#define SYS_TRCOSLSR			sys_reg(2, 1, 1, 1, 4)
+#define SYS_TRCPRGCTLR			sys_reg(2, 1, 0, 1, 0)
+#define SYS_TRCQCTLR			sys_reg(2, 1, 0, 1, 1)
+#define SYS_TRCRSCTLR(m)		sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4)))
+#define SYS_TRCRSR			sys_reg(2, 1, 0, 10, 0)
+#define SYS_TRCSEQEVR(m)		sys_reg(2, 1, 0, (m & 3), 4)
+#define SYS_TRCSEQRSTEVR		sys_reg(2, 1, 0, 6, 4)
+#define SYS_TRCSEQSTR			sys_reg(2, 1, 0, 7, 4)
+#define SYS_TRCSSCCR(m)			sys_reg(2, 1, 1, (m & 7), 2)
+#define SYS_TRCSSCSR(m)			sys_reg(2, 1, 1, (8 | (m & 7)), 2)
+#define SYS_TRCSSPCICR(m)		sys_reg(2, 1, 1, (m & 7), 3)
+#define SYS_TRCSTALLCTLR		sys_reg(2, 1, 0, 11, 0)
+#define SYS_TRCSTATR			sys_reg(2, 1, 0, 3, 0)
+#define SYS_TRCSYNCPR			sys_reg(2, 1, 0, 13, 0)
+#define SYS_TRCTRACEIDR			sys_reg(2, 1, 0, 0, 1)
+#define SYS_TRCTSCTLR			sys_reg(2, 1, 0, 12, 0)
+#define SYS_TRCVICTLR			sys_reg(2, 1, 0, 0, 2)
+#define SYS_TRCVIIECTLR			sys_reg(2, 1, 0, 1, 2)
+#define SYS_TRCVIPCSSCTLR		sys_reg(2, 1, 0, 3, 2)
+#define SYS_TRCVISSCTLR			sys_reg(2, 1, 0, 2, 2)
+#define SYS_TRCVMIDCCTLR0		sys_reg(2, 1, 3, 2, 2)
+#define SYS_TRCVMIDCCTLR1		sys_reg(2, 1, 3, 3, 2)
+#define SYS_TRCVMIDCVR(m)		sys_reg(2, 1, 3, ((m & 7) << 1), 1)
+
+/* ETM */
+#define SYS_TRCOSLAR			sys_reg(2, 1, 1, 0, 4)
+
+#define SYS_BRBCR_EL2			sys_reg(2, 4, 9, 0, 0)
+
 #define SYS_MIDR_EL1			sys_reg(3, 0, 0, 0, 0)
 #define SYS_MPIDR_EL1			sys_reg(3, 0, 0, 0, 5)
 #define SYS_REVIDR_EL1			sys_reg(3, 0, 0, 0, 6)
@@ -170,8 +283,6 @@
 #define SYS_RGSR_EL1			sys_reg(3, 0, 1, 0, 5)
 #define SYS_GCR_EL1			sys_reg(3, 0, 1, 0, 6)
 
-#define SYS_TRFCR_EL1			sys_reg(3, 0, 1, 2, 1)
-
 #define SYS_TCR_EL1			sys_reg(3, 0, 2, 0, 2)
 
 #define SYS_APIAKEYLO_EL1		sys_reg(3, 0, 2, 1, 0)
@@ -202,15 +313,38 @@
 #define SYS_ERXCTLR_EL1			sys_reg(3, 0, 5, 4, 1)
 #define SYS_ERXSTATUS_EL1		sys_reg(3, 0, 5, 4, 2)
 #define SYS_ERXADDR_EL1			sys_reg(3, 0, 5, 4, 3)
+#define SYS_ERXPFGF_EL1			sys_reg(3, 0, 5, 4, 4)
+#define SYS_ERXPFGCTL_EL1		sys_reg(3, 0, 5, 4, 5)
+#define SYS_ERXPFGCDN_EL1		sys_reg(3, 0, 5, 4, 6)
 #define SYS_ERXMISC0_EL1		sys_reg(3, 0, 5, 5, 0)
 #define SYS_ERXMISC1_EL1		sys_reg(3, 0, 5, 5, 1)
+#define SYS_ERXMISC2_EL1		sys_reg(3, 0, 5, 5, 2)
+#define SYS_ERXMISC3_EL1		sys_reg(3, 0, 5, 5, 3)
 #define SYS_TFSR_EL1			sys_reg(3, 0, 5, 6, 0)
 #define SYS_TFSRE0_EL1			sys_reg(3, 0, 5, 6, 1)
 
 #define SYS_PAR_EL1			sys_reg(3, 0, 7, 4, 0)
 
 #define SYS_PAR_EL1_F			BIT(0)
+/* When PAR_EL1.F == 1 */
 #define SYS_PAR_EL1_FST			GENMASK(6, 1)
+#define SYS_PAR_EL1_PTW			BIT(8)
+#define SYS_PAR_EL1_S			BIT(9)
+#define SYS_PAR_EL1_AssuredOnly		BIT(12)
+#define SYS_PAR_EL1_TopLevel		BIT(13)
+#define SYS_PAR_EL1_Overlay		BIT(14)
+#define SYS_PAR_EL1_DirtyBit		BIT(15)
+#define SYS_PAR_EL1_F1_IMPDEF		GENMASK_ULL(63, 48)
+#define SYS_PAR_EL1_F1_RES0		(BIT(7) | BIT(10) | GENMASK_ULL(47, 16))
+#define SYS_PAR_EL1_RES1		BIT(11)
+/* When PAR_EL1.F == 0 */
+#define SYS_PAR_EL1_SH			GENMASK_ULL(8, 7)
+#define SYS_PAR_EL1_NS			BIT(9)
+#define SYS_PAR_EL1_F0_IMPDEF		BIT(10)
+#define SYS_PAR_EL1_NSE			BIT(11)
+#define SYS_PAR_EL1_PA			GENMASK_ULL(51, 12)
+#define SYS_PAR_EL1_ATTR		GENMASK_ULL(63, 56)
+#define SYS_PAR_EL1_F0_RES0		(GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52))
 
 /*** Statistical Profiling Extension ***/
 #define PMSEVFR_EL1_RES0_IMP	\
@@ -274,6 +408,8 @@
 #define SYS_ICC_IGRPEN0_EL1		sys_reg(3, 0, 12, 12, 6)
 #define SYS_ICC_IGRPEN1_EL1		sys_reg(3, 0, 12, 12, 7)
 
+#define SYS_ACCDATA_EL1			sys_reg(3, 0, 13, 0, 5)
+
 #define SYS_CNTKCTL_EL1			sys_reg(3, 0, 14, 1, 0)
 
 #define SYS_AIDR_EL1			sys_reg(3, 1, 0, 0, 7)
@@ -286,7 +422,6 @@
 #define SYS_PMCNTENCLR_EL0		sys_reg(3, 3, 9, 12, 2)
 #define SYS_PMOVSCLR_EL0		sys_reg(3, 3, 9, 12, 3)
 #define SYS_PMSWINC_EL0			sys_reg(3, 3, 9, 12, 4)
-#define SYS_PMSELR_EL0			sys_reg(3, 3, 9, 12, 5)
 #define SYS_PMCEID0_EL0			sys_reg(3, 3, 9, 12, 6)
 #define SYS_PMCEID1_EL0			sys_reg(3, 3, 9, 12, 7)
 #define SYS_PMCCNTR_EL0			sys_reg(3, 3, 9, 13, 0)
@@ -369,6 +504,7 @@
 
 #define SYS_SCTLR_EL2			sys_reg(3, 4, 1, 0, 0)
 #define SYS_ACTLR_EL2			sys_reg(3, 4, 1, 0, 1)
+#define SYS_SCTLR2_EL2			sys_reg(3, 4, 1, 0, 3)
 #define SYS_HCR_EL2			sys_reg(3, 4, 1, 1, 0)
 #define SYS_MDCR_EL2			sys_reg(3, 4, 1, 1, 1)
 #define SYS_CPTR_EL2			sys_reg(3, 4, 1, 1, 2)
@@ -381,13 +517,15 @@
 #define SYS_VTTBR_EL2			sys_reg(3, 4, 2, 1, 0)
 #define SYS_VTCR_EL2			sys_reg(3, 4, 2, 1, 2)
 
-#define SYS_TRFCR_EL2			sys_reg(3, 4, 1, 2, 1)
-#define SYS_HDFGRTR_EL2			sys_reg(3, 4, 3, 1, 4)
-#define SYS_HDFGWTR_EL2			sys_reg(3, 4, 3, 1, 5)
+#define SYS_VNCR_EL2			sys_reg(3, 4, 2, 2, 0)
 #define SYS_HAFGRTR_EL2			sys_reg(3, 4, 3, 1, 6)
 #define SYS_SPSR_EL2			sys_reg(3, 4, 4, 0, 0)
 #define SYS_ELR_EL2			sys_reg(3, 4, 4, 0, 1)
 #define SYS_SP_EL1			sys_reg(3, 4, 4, 1, 0)
+#define SYS_SPSR_irq			sys_reg(3, 4, 4, 3, 0)
+#define SYS_SPSR_abt			sys_reg(3, 4, 4, 3, 1)
+#define SYS_SPSR_und			sys_reg(3, 4, 4, 3, 2)
+#define SYS_SPSR_fiq			sys_reg(3, 4, 4, 3, 3)
 #define SYS_IFSR32_EL2			sys_reg(3, 4, 5, 0, 1)
 #define SYS_AFSR0_EL2			sys_reg(3, 4, 5, 1, 0)
 #define SYS_AFSR1_EL2			sys_reg(3, 4, 5, 1, 1)
@@ -420,9 +558,6 @@
 
 #define SYS_ICH_VSEIR_EL2		sys_reg(3, 4, 12, 9, 4)
 #define SYS_ICC_SRE_EL2			sys_reg(3, 4, 12, 9, 5)
-#define SYS_ICH_HCR_EL2			sys_reg(3, 4, 12, 11, 0)
-#define SYS_ICH_VTR_EL2			sys_reg(3, 4, 12, 11, 1)
-#define SYS_ICH_MISR_EL2		sys_reg(3, 4, 12, 11, 2)
 #define SYS_ICH_EISR_EL2		sys_reg(3, 4, 12, 11, 3)
 #define SYS_ICH_ELRSR_EL2		sys_reg(3, 4, 12, 11, 5)
 #define SYS_ICH_VMCR_EL2		sys_reg(3, 4, 12, 11, 7)
@@ -449,24 +584,49 @@
 
 #define SYS_CONTEXTIDR_EL2		sys_reg(3, 4, 13, 0, 1)
 #define SYS_TPIDR_EL2			sys_reg(3, 4, 13, 0, 2)
+#define SYS_SCXTNUM_EL2			sys_reg(3, 4, 13, 0, 7)
+
+#define __AMEV_op2(m)			(m & 0x7)
+#define __AMEV_CRm(n, m)		(n | ((m & 0x8) >> 3))
+#define __SYS__AMEVCNTVOFF0n_EL2(m)	sys_reg(3, 4, 13, __AMEV_CRm(0x8, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF0n_EL2(m)	__SYS__AMEVCNTVOFF0n_EL2(m)
+#define __SYS__AMEVCNTVOFF1n_EL2(m)	sys_reg(3, 4, 13, __AMEV_CRm(0xA, m), __AMEV_op2(m))
+#define SYS_AMEVCNTVOFF1n_EL2(m)	__SYS__AMEVCNTVOFF1n_EL2(m)
 
 #define SYS_CNTVOFF_EL2			sys_reg(3, 4, 14, 0, 3)
 #define SYS_CNTHCTL_EL2			sys_reg(3, 4, 14, 1, 0)
+#define SYS_CNTHP_TVAL_EL2		sys_reg(3, 4, 14, 2, 0)
+#define SYS_CNTHP_CTL_EL2		sys_reg(3, 4, 14, 2, 1)
+#define SYS_CNTHP_CVAL_EL2		sys_reg(3, 4, 14, 2, 2)
+#define SYS_CNTHV_TVAL_EL2		sys_reg(3, 4, 14, 3, 0)
+#define SYS_CNTHV_CTL_EL2		sys_reg(3, 4, 14, 3, 1)
+#define SYS_CNTHV_CVAL_EL2		sys_reg(3, 4, 14, 3, 2)
 
 /* VHE encodings for architectural EL0/1 system registers */
+#define SYS_BRBCR_EL12			sys_reg(2, 5, 9, 0, 0)
 #define SYS_SCTLR_EL12			sys_reg(3, 5, 1, 0, 0)
+#define SYS_CPACR_EL12			sys_reg(3, 5, 1, 0, 2)
+#define SYS_SCTLR2_EL12			sys_reg(3, 5, 1, 0, 3)
+#define SYS_ZCR_EL12			sys_reg(3, 5, 1, 2, 0)
+#define SYS_TRFCR_EL12			sys_reg(3, 5, 1, 2, 1)
+#define SYS_SMCR_EL12			sys_reg(3, 5, 1, 2, 6)
 #define SYS_TTBR0_EL12			sys_reg(3, 5, 2, 0, 0)
 #define SYS_TTBR1_EL12			sys_reg(3, 5, 2, 0, 1)
 #define SYS_TCR_EL12			sys_reg(3, 5, 2, 0, 2)
+#define SYS_TCR2_EL12			sys_reg(3, 5, 2, 0, 3)
 #define SYS_SPSR_EL12			sys_reg(3, 5, 4, 0, 0)
 #define SYS_ELR_EL12			sys_reg(3, 5, 4, 0, 1)
 #define SYS_AFSR0_EL12			sys_reg(3, 5, 5, 1, 0)
 #define SYS_AFSR1_EL12			sys_reg(3, 5, 5, 1, 1)
 #define SYS_ESR_EL12			sys_reg(3, 5, 5, 2, 0)
 #define SYS_TFSR_EL12			sys_reg(3, 5, 5, 6, 0)
+#define SYS_FAR_EL12			sys_reg(3, 5, 6, 0, 0)
+#define SYS_PMSCR_EL12			sys_reg(3, 5, 9, 9, 0)
 #define SYS_MAIR_EL12			sys_reg(3, 5, 10, 2, 0)
 #define SYS_AMAIR_EL12			sys_reg(3, 5, 10, 3, 0)
 #define SYS_VBAR_EL12			sys_reg(3, 5, 12, 0, 0)
+#define SYS_CONTEXTIDR_EL12		sys_reg(3, 5, 13, 0, 1)
+#define SYS_SCXTNUM_EL12		sys_reg(3, 5, 13, 0, 7)
 #define SYS_CNTKCTL_EL12		sys_reg(3, 5, 14, 1, 0)
 #define SYS_CNTP_TVAL_EL02		sys_reg(3, 5, 14, 2, 0)
 #define SYS_CNTP_CTL_EL02		sys_reg(3, 5, 14, 2, 1)
@@ -477,6 +637,183 @@
 
 #define SYS_SP_EL2			sys_reg(3, 6,  4, 1, 0)
 
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define OP_AT_S1E1R	sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define OP_AT_S1E1W	sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define OP_AT_S1E0R	sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define OP_AT_S1E0W	sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define OP_AT_S1E1RP	sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define OP_AT_S1E1WP	sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E1A	sys_insn(AT_Op0, 0, AT_CRn, 9, 2)
+#define OP_AT_S1E2R	sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define OP_AT_S1E2W	sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define OP_AT_S12E1R	sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define OP_AT_S12E1W	sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define OP_AT_S12E0R	sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define OP_AT_S12E0W	sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+#define OP_AT_S1E2A	sys_insn(AT_Op0, 4, AT_CRn, 9, 2)
+
+/* TLBI instructions */
+#define TLBI_Op0	1
+
+#define TLBI_Op1_EL1	0	/* Accessible from EL1 or higher */
+#define TLBI_Op1_EL2	4	/* Accessible from EL2 or higher */
+
+#define TLBI_CRn_XS	8	/* Extra Slow (the common one) */
+#define TLBI_CRn_nXS	9	/* not Extra Slow (which nobody uses)*/
+
+#define TLBI_CRm_IPAIS	0	/* S2 Inner-Shareable */
+#define TLBI_CRm_nROS	1	/* non-Range, Outer-Sharable */
+#define TLBI_CRm_RIS	2	/* Range, Inner-Sharable */
+#define TLBI_CRm_nRIS	3	/* non-Range, Inner-Sharable */
+#define TLBI_CRm_IPAONS	4	/* S2 Outer and Non-Shareable */
+#define TLBI_CRm_ROS	5	/* Range, Outer-Sharable */
+#define TLBI_CRm_RNS	6	/* Range, Non-Sharable */
+#define TLBI_CRm_nRNS	7	/* non-Range, Non-Sharable */
+
+#define OP_TLBI_VMALLE1OS		sys_insn(1, 0, 8, 1, 0)
+#define OP_TLBI_VAE1OS			sys_insn(1, 0, 8, 1, 1)
+#define OP_TLBI_ASIDE1OS		sys_insn(1, 0, 8, 1, 2)
+#define OP_TLBI_VAAE1OS			sys_insn(1, 0, 8, 1, 3)
+#define OP_TLBI_VALE1OS			sys_insn(1, 0, 8, 1, 5)
+#define OP_TLBI_VAALE1OS		sys_insn(1, 0, 8, 1, 7)
+#define OP_TLBI_RVAE1IS			sys_insn(1, 0, 8, 2, 1)
+#define OP_TLBI_RVAAE1IS		sys_insn(1, 0, 8, 2, 3)
+#define OP_TLBI_RVALE1IS		sys_insn(1, 0, 8, 2, 5)
+#define OP_TLBI_RVAALE1IS		sys_insn(1, 0, 8, 2, 7)
+#define OP_TLBI_VMALLE1IS		sys_insn(1, 0, 8, 3, 0)
+#define OP_TLBI_VAE1IS			sys_insn(1, 0, 8, 3, 1)
+#define OP_TLBI_ASIDE1IS		sys_insn(1, 0, 8, 3, 2)
+#define OP_TLBI_VAAE1IS			sys_insn(1, 0, 8, 3, 3)
+#define OP_TLBI_VALE1IS			sys_insn(1, 0, 8, 3, 5)
+#define OP_TLBI_VAALE1IS		sys_insn(1, 0, 8, 3, 7)
+#define OP_TLBI_RVAE1OS			sys_insn(1, 0, 8, 5, 1)
+#define OP_TLBI_RVAAE1OS		sys_insn(1, 0, 8, 5, 3)
+#define OP_TLBI_RVALE1OS		sys_insn(1, 0, 8, 5, 5)
+#define OP_TLBI_RVAALE1OS		sys_insn(1, 0, 8, 5, 7)
+#define OP_TLBI_RVAE1			sys_insn(1, 0, 8, 6, 1)
+#define OP_TLBI_RVAAE1			sys_insn(1, 0, 8, 6, 3)
+#define OP_TLBI_RVALE1			sys_insn(1, 0, 8, 6, 5)
+#define OP_TLBI_RVAALE1			sys_insn(1, 0, 8, 6, 7)
+#define OP_TLBI_VMALLE1			sys_insn(1, 0, 8, 7, 0)
+#define OP_TLBI_VAE1			sys_insn(1, 0, 8, 7, 1)
+#define OP_TLBI_ASIDE1			sys_insn(1, 0, 8, 7, 2)
+#define OP_TLBI_VAAE1			sys_insn(1, 0, 8, 7, 3)
+#define OP_TLBI_VALE1			sys_insn(1, 0, 8, 7, 5)
+#define OP_TLBI_VAALE1			sys_insn(1, 0, 8, 7, 7)
+#define OP_TLBI_VMALLE1OSNXS		sys_insn(1, 0, 9, 1, 0)
+#define OP_TLBI_VAE1OSNXS		sys_insn(1, 0, 9, 1, 1)
+#define OP_TLBI_ASIDE1OSNXS		sys_insn(1, 0, 9, 1, 2)
+#define OP_TLBI_VAAE1OSNXS		sys_insn(1, 0, 9, 1, 3)
+#define OP_TLBI_VALE1OSNXS		sys_insn(1, 0, 9, 1, 5)
+#define OP_TLBI_VAALE1OSNXS		sys_insn(1, 0, 9, 1, 7)
+#define OP_TLBI_RVAE1ISNXS		sys_insn(1, 0, 9, 2, 1)
+#define OP_TLBI_RVAAE1ISNXS		sys_insn(1, 0, 9, 2, 3)
+#define OP_TLBI_RVALE1ISNXS		sys_insn(1, 0, 9, 2, 5)
+#define OP_TLBI_RVAALE1ISNXS		sys_insn(1, 0, 9, 2, 7)
+#define OP_TLBI_VMALLE1ISNXS		sys_insn(1, 0, 9, 3, 0)
+#define OP_TLBI_VAE1ISNXS		sys_insn(1, 0, 9, 3, 1)
+#define OP_TLBI_ASIDE1ISNXS		sys_insn(1, 0, 9, 3, 2)
+#define OP_TLBI_VAAE1ISNXS		sys_insn(1, 0, 9, 3, 3)
+#define OP_TLBI_VALE1ISNXS		sys_insn(1, 0, 9, 3, 5)
+#define OP_TLBI_VAALE1ISNXS		sys_insn(1, 0, 9, 3, 7)
+#define OP_TLBI_RVAE1OSNXS		sys_insn(1, 0, 9, 5, 1)
+#define OP_TLBI_RVAAE1OSNXS		sys_insn(1, 0, 9, 5, 3)
+#define OP_TLBI_RVALE1OSNXS		sys_insn(1, 0, 9, 5, 5)
+#define OP_TLBI_RVAALE1OSNXS		sys_insn(1, 0, 9, 5, 7)
+#define OP_TLBI_RVAE1NXS		sys_insn(1, 0, 9, 6, 1)
+#define OP_TLBI_RVAAE1NXS		sys_insn(1, 0, 9, 6, 3)
+#define OP_TLBI_RVALE1NXS		sys_insn(1, 0, 9, 6, 5)
+#define OP_TLBI_RVAALE1NXS		sys_insn(1, 0, 9, 6, 7)
+#define OP_TLBI_VMALLE1NXS		sys_insn(1, 0, 9, 7, 0)
+#define OP_TLBI_VAE1NXS			sys_insn(1, 0, 9, 7, 1)
+#define OP_TLBI_ASIDE1NXS		sys_insn(1, 0, 9, 7, 2)
+#define OP_TLBI_VAAE1NXS		sys_insn(1, 0, 9, 7, 3)
+#define OP_TLBI_VALE1NXS		sys_insn(1, 0, 9, 7, 5)
+#define OP_TLBI_VAALE1NXS		sys_insn(1, 0, 9, 7, 7)
+#define OP_TLBI_IPAS2E1IS		sys_insn(1, 4, 8, 0, 1)
+#define OP_TLBI_RIPAS2E1IS		sys_insn(1, 4, 8, 0, 2)
+#define OP_TLBI_IPAS2LE1IS		sys_insn(1, 4, 8, 0, 5)
+#define OP_TLBI_RIPAS2LE1IS		sys_insn(1, 4, 8, 0, 6)
+#define OP_TLBI_ALLE2OS			sys_insn(1, 4, 8, 1, 0)
+#define OP_TLBI_VAE2OS			sys_insn(1, 4, 8, 1, 1)
+#define OP_TLBI_ALLE1OS			sys_insn(1, 4, 8, 1, 4)
+#define OP_TLBI_VALE2OS			sys_insn(1, 4, 8, 1, 5)
+#define OP_TLBI_VMALLS12E1OS		sys_insn(1, 4, 8, 1, 6)
+#define OP_TLBI_RVAE2IS			sys_insn(1, 4, 8, 2, 1)
+#define OP_TLBI_RVALE2IS		sys_insn(1, 4, 8, 2, 5)
+#define OP_TLBI_ALLE2IS			sys_insn(1, 4, 8, 3, 0)
+#define OP_TLBI_VAE2IS			sys_insn(1, 4, 8, 3, 1)
+#define OP_TLBI_ALLE1IS			sys_insn(1, 4, 8, 3, 4)
+#define OP_TLBI_VALE2IS			sys_insn(1, 4, 8, 3, 5)
+#define OP_TLBI_VMALLS12E1IS		sys_insn(1, 4, 8, 3, 6)
+#define OP_TLBI_IPAS2E1OS		sys_insn(1, 4, 8, 4, 0)
+#define OP_TLBI_IPAS2E1			sys_insn(1, 4, 8, 4, 1)
+#define OP_TLBI_RIPAS2E1		sys_insn(1, 4, 8, 4, 2)
+#define OP_TLBI_RIPAS2E1OS		sys_insn(1, 4, 8, 4, 3)
+#define OP_TLBI_IPAS2LE1OS		sys_insn(1, 4, 8, 4, 4)
+#define OP_TLBI_IPAS2LE1		sys_insn(1, 4, 8, 4, 5)
+#define OP_TLBI_RIPAS2LE1		sys_insn(1, 4, 8, 4, 6)
+#define OP_TLBI_RIPAS2LE1OS		sys_insn(1, 4, 8, 4, 7)
+#define OP_TLBI_RVAE2OS			sys_insn(1, 4, 8, 5, 1)
+#define OP_TLBI_RVALE2OS		sys_insn(1, 4, 8, 5, 5)
+#define OP_TLBI_RVAE2			sys_insn(1, 4, 8, 6, 1)
+#define OP_TLBI_RVALE2			sys_insn(1, 4, 8, 6, 5)
+#define OP_TLBI_ALLE2			sys_insn(1, 4, 8, 7, 0)
+#define OP_TLBI_VAE2			sys_insn(1, 4, 8, 7, 1)
+#define OP_TLBI_ALLE1			sys_insn(1, 4, 8, 7, 4)
+#define OP_TLBI_VALE2			sys_insn(1, 4, 8, 7, 5)
+#define OP_TLBI_VMALLS12E1		sys_insn(1, 4, 8, 7, 6)
+#define OP_TLBI_IPAS2E1ISNXS		sys_insn(1, 4, 9, 0, 1)
+#define OP_TLBI_RIPAS2E1ISNXS		sys_insn(1, 4, 9, 0, 2)
+#define OP_TLBI_IPAS2LE1ISNXS		sys_insn(1, 4, 9, 0, 5)
+#define OP_TLBI_RIPAS2LE1ISNXS		sys_insn(1, 4, 9, 0, 6)
+#define OP_TLBI_ALLE2OSNXS		sys_insn(1, 4, 9, 1, 0)
+#define OP_TLBI_VAE2OSNXS		sys_insn(1, 4, 9, 1, 1)
+#define OP_TLBI_ALLE1OSNXS		sys_insn(1, 4, 9, 1, 4)
+#define OP_TLBI_VALE2OSNXS		sys_insn(1, 4, 9, 1, 5)
+#define OP_TLBI_VMALLS12E1OSNXS		sys_insn(1, 4, 9, 1, 6)
+#define OP_TLBI_RVAE2ISNXS		sys_insn(1, 4, 9, 2, 1)
+#define OP_TLBI_RVALE2ISNXS		sys_insn(1, 4, 9, 2, 5)
+#define OP_TLBI_ALLE2ISNXS		sys_insn(1, 4, 9, 3, 0)
+#define OP_TLBI_VAE2ISNXS		sys_insn(1, 4, 9, 3, 1)
+#define OP_TLBI_ALLE1ISNXS		sys_insn(1, 4, 9, 3, 4)
+#define OP_TLBI_VALE2ISNXS		sys_insn(1, 4, 9, 3, 5)
+#define OP_TLBI_VMALLS12E1ISNXS		sys_insn(1, 4, 9, 3, 6)
+#define OP_TLBI_IPAS2E1OSNXS		sys_insn(1, 4, 9, 4, 0)
+#define OP_TLBI_IPAS2E1NXS		sys_insn(1, 4, 9, 4, 1)
+#define OP_TLBI_RIPAS2E1NXS		sys_insn(1, 4, 9, 4, 2)
+#define OP_TLBI_RIPAS2E1OSNXS		sys_insn(1, 4, 9, 4, 3)
+#define OP_TLBI_IPAS2LE1OSNXS		sys_insn(1, 4, 9, 4, 4)
+#define OP_TLBI_IPAS2LE1NXS		sys_insn(1, 4, 9, 4, 5)
+#define OP_TLBI_RIPAS2LE1NXS		sys_insn(1, 4, 9, 4, 6)
+#define OP_TLBI_RIPAS2LE1OSNXS		sys_insn(1, 4, 9, 4, 7)
+#define OP_TLBI_RVAE2OSNXS		sys_insn(1, 4, 9, 5, 1)
+#define OP_TLBI_RVALE2OSNXS		sys_insn(1, 4, 9, 5, 5)
+#define OP_TLBI_RVAE2NXS		sys_insn(1, 4, 9, 6, 1)
+#define OP_TLBI_RVALE2NXS		sys_insn(1, 4, 9, 6, 5)
+#define OP_TLBI_ALLE2NXS		sys_insn(1, 4, 9, 7, 0)
+#define OP_TLBI_VAE2NXS			sys_insn(1, 4, 9, 7, 1)
+#define OP_TLBI_ALLE1NXS		sys_insn(1, 4, 9, 7, 4)
+#define OP_TLBI_VALE2NXS		sys_insn(1, 4, 9, 7, 5)
+#define OP_TLBI_VMALLS12E1NXS		sys_insn(1, 4, 9, 7, 6)
+
+/* Misc instructions */
+#define OP_GCSPUSHX			sys_insn(1, 0, 7, 7, 4)
+#define OP_GCSPOPCX			sys_insn(1, 0, 7, 7, 5)
+#define OP_GCSPOPX			sys_insn(1, 0, 7, 7, 6)
+#define OP_GCSPUSHM			sys_insn(1, 3, 7, 7, 0)
+
+#define OP_BRB_IALL			sys_insn(1, 1, 7, 2, 4)
+#define OP_BRB_INJ			sys_insn(1, 1, 7, 2, 5)
+#define OP_CFP_RCTX			sys_insn(1, 3, 7, 3, 4)
+#define OP_DVP_RCTX			sys_insn(1, 3, 7, 3, 5)
+#define OP_COSP_RCTX			sys_insn(1, 3, 7, 3, 6)
+#define OP_CPP_RCTX			sys_insn(1, 3, 7, 3, 7)
+
 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_ENTP2	(BIT(60))
 #define SCTLR_ELx_DSSBS	(BIT(44))
@@ -555,16 +892,14 @@
 /* Position the attr at the correct index */
 #define MAIR_ATTRIDX(attr, idx)		((attr) << ((idx) * 8))
 
-/* id_aa64pfr0 */
-#define ID_AA64PFR0_EL1_ELx_64BIT_ONLY		0x1
-#define ID_AA64PFR0_EL1_ELx_32BIT_64BIT		0x2
-
 /* id_aa64mmfr0 */
 #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN	0x0
+#define ID_AA64MMFR0_EL1_TGRAN4_LPA2		ID_AA64MMFR0_EL1_TGRAN4_52_BIT
 #define ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX	0x7
 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MIN	0x0
 #define ID_AA64MMFR0_EL1_TGRAN64_SUPPORTED_MAX	0x7
 #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN	0x1
+#define ID_AA64MMFR0_EL1_TGRAN16_LPA2		ID_AA64MMFR0_EL1_TGRAN16_52_BIT
 #define ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX	0xf
 
 #define ARM64_MIN_PARANGE_BITS		32
@@ -572,6 +907,7 @@
 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT	0x0
 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE		0x1
 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN		0x2
+#define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2		0x3
 #define ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX		0x7
 
 #ifdef CONFIG_ARM64_PA_BITS_52
@@ -582,11 +918,13 @@
 
 #if defined(CONFIG_ARM64_4K_PAGES)
 #define ID_AA64MMFR0_EL1_TGRAN_SHIFT		ID_AA64MMFR0_EL1_TGRAN4_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2		ID_AA64MMFR0_EL1_TGRAN4_52_BIT
 #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MIN
 #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_EL1_TGRAN4_SUPPORTED_MAX
 #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT		ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT
 #elif defined(CONFIG_ARM64_16K_PAGES)
 #define ID_AA64MMFR0_EL1_TGRAN_SHIFT		ID_AA64MMFR0_EL1_TGRAN16_SHIFT
+#define ID_AA64MMFR0_EL1_TGRAN_LPA2		ID_AA64MMFR0_EL1_TGRAN16_52_BIT
 #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN	ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MIN
 #define ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX	ID_AA64MMFR0_EL1_TGRAN16_SUPPORTED_MAX
 #define ID_AA64MMFR0_EL1_TGRAN_2_SHIFT		ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT
@@ -610,6 +948,19 @@
 #define SYS_GCR_EL1_RRND	(BIT(16))
 #define SYS_GCR_EL1_EXCL_MASK	0xffffUL
 
+#ifdef CONFIG_KASAN_HW_TAGS
+/*
+ * KASAN always uses a whole byte for its tags. With CONFIG_KASAN_HW_TAGS it
+ * only uses tags in the range 0xF0-0xFF, which we map to MTE tags 0x0-0xF.
+ */
+#define __MTE_TAG_MIN		(KASAN_TAG_MIN & 0xf)
+#define __MTE_TAG_MAX		(KASAN_TAG_MAX & 0xf)
+#define __MTE_TAG_INCL		GENMASK(__MTE_TAG_MAX, __MTE_TAG_MIN)
+#define KERNEL_GCR_EL1_EXCL	(SYS_GCR_EL1_EXCL_MASK & ~__MTE_TAG_INCL)
+#else
+#define KERNEL_GCR_EL1_EXCL	SYS_GCR_EL1_EXCL_MASK
+#endif
+
 #define KERNEL_GCR_EL1		(SYS_GCR_EL1_RRND | KERNEL_GCR_EL1_EXCL)
 
 /* RGSR_EL1 Definitions */
@@ -626,20 +977,7 @@
 /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
 #define SYS_MPIDR_SAFE_VAL	(BIT(31))
 
-#define TRFCR_ELx_TS_SHIFT		5
-#define TRFCR_ELx_TS_MASK		((0x3UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_VIRTUAL		((0x1UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_GUEST_PHYSICAL	((0x2UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_ELx_TS_PHYSICAL		((0x3UL) << TRFCR_ELx_TS_SHIFT)
-#define TRFCR_EL2_CX			BIT(3)
-#define TRFCR_ELx_ExTRE			BIT(1)
-#define TRFCR_ELx_E0TRE			BIT(0)
-
 /* GIC Hypervisor interface registers */
-/* ICH_MISR_EL2 bit definitions */
-#define ICH_MISR_EOI		(1 << 0)
-#define ICH_MISR_U		(1 << 1)
-
 /* ICH_LR*_EL2 bit definitions */
 #define ICH_LR_VIRTUAL_ID_MASK	((1ULL << 32) - 1)
 
@@ -654,17 +992,6 @@
 #define ICH_LR_PRIORITY_SHIFT	48
 #define ICH_LR_PRIORITY_MASK	(0xffULL << ICH_LR_PRIORITY_SHIFT)
 
-/* ICH_HCR_EL2 bit definitions */
-#define ICH_HCR_EN		(1 << 0)
-#define ICH_HCR_UIE		(1 << 1)
-#define ICH_HCR_NPIE		(1 << 3)
-#define ICH_HCR_TC		(1 << 10)
-#define ICH_HCR_TALL0		(1 << 11)
-#define ICH_HCR_TALL1		(1 << 12)
-#define ICH_HCR_TDIR		(1 << 14)
-#define ICH_HCR_EOIcount_SHIFT	27
-#define ICH_HCR_EOIcount_MASK	(0x1f << ICH_HCR_EOIcount_SHIFT)
-
 /* ICH_VMCR_EL2 bit definitions */
 #define ICH_VMCR_ACK_CTL_SHIFT	2
 #define ICH_VMCR_ACK_CTL_MASK	(1 << ICH_VMCR_ACK_CTL_SHIFT)
@@ -685,18 +1012,6 @@
 #define ICH_VMCR_ENG1_SHIFT	1
 #define ICH_VMCR_ENG1_MASK	(1 << ICH_VMCR_ENG1_SHIFT)
 
-/* ICH_VTR_EL2 bit definitions */
-#define ICH_VTR_PRI_BITS_SHIFT	29
-#define ICH_VTR_PRI_BITS_MASK	(7 << ICH_VTR_PRI_BITS_SHIFT)
-#define ICH_VTR_ID_BITS_SHIFT	23
-#define ICH_VTR_ID_BITS_MASK	(7 << ICH_VTR_ID_BITS_SHIFT)
-#define ICH_VTR_SEIS_SHIFT	22
-#define ICH_VTR_SEIS_MASK	(1 << ICH_VTR_SEIS_SHIFT)
-#define ICH_VTR_A3V_SHIFT	21
-#define ICH_VTR_A3V_MASK	(1 << ICH_VTR_A3V_SHIFT)
-#define ICH_VTR_TDS_SHIFT	19
-#define ICH_VTR_TDS_MASK	(1 << ICH_VTR_TDS_SHIFT)
-
 /*
  * Permission Indirection Extension (PIE) permission encodings.
  * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension).
@@ -716,6 +1031,22 @@
 
 #define PIRx_ELx_PERM(idx, perm)	((perm) << ((idx) * 4))
 
+/*
+ * Permission Overlay Extension (POE) permission encodings.
+ */
+#define POE_NONE	UL(0x0)
+#define POE_R		UL(0x1)
+#define POE_X		UL(0x2)
+#define POE_RX		UL(0x3)
+#define POE_W		UL(0x4)
+#define POE_RW		UL(0x5)
+#define POE_XW		UL(0x6)
+#define POE_RXW		UL(0x7)
+#define POE_MASK	UL(0xf)
+
+/* Initial value for Permission Overlay Extension for EL0 */
+#define POR_EL0_INIT	POE_RXW
+
 #define ARM64_FEATURE_FIELD_BITS	4
 
 /* Defined for compatibility only, do not add new users. */
@@ -789,15 +1120,21 @@
 /*
  * For registers without architectural names, or simply unsupported by
  * GAS.
+ *
+ * __check_r forces warnings to be generated by the compiler when
+ * evaluating r which wouldn't normally happen due to being passed to
+ * the assembler via __stringify(r).
  */
 #define read_sysreg_s(r) ({						\
 	u64 __val;							\
+	u32 __maybe_unused __check_r = (u32)(r);			\
 	asm volatile(__mrs_s("%0", r) : "=r" (__val));			\
 	__val;								\
 })
 
 #define write_sysreg_s(v, r) do {					\
 	u64 __val = (u64)(v);						\
+	u32 __maybe_unused __check_r = (u32)(r);			\
 	asm volatile(__msr_s(r, "%x0") : : "rZ" (__val));		\
 } while (0)
 
@@ -827,6 +1164,8 @@
 	par;								\
 })
 
+#define SYS_FIELD_VALUE(reg, field, val)	reg##_##field##_##val
+
 #define SYS_FIELD_GET(reg, field, val)		\
 		 FIELD_GET(reg##_##field##_MASK, val)
 
@@ -834,7 +1173,8 @@
 		 FIELD_PREP(reg##_##field##_MASK, val)
 
 #define SYS_FIELD_PREP_ENUM(reg, field, val)		\
-		 FIELD_PREP(reg##_##field##_MASK, reg##_##field##_##val)
+		 FIELD_PREP(reg##_##field##_MASK,	\
+			    SYS_FIELD_VALUE(reg, field, val))
 
 #endif
 
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 66736ff04011..6d44f8c8a18f 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -374,6 +374,7 @@ enum {
 #endif
 };
 
+/* Vendor hyper call function numbers 0-63 */
 #define KVM_REG_ARM_VENDOR_HYP_BMAP		KVM_REG_ARM_FW_FEAT_BMAP_REG(2)
 
 enum {
@@ -384,6 +385,17 @@ enum {
 #endif
 };
 
+/* Vendor hyper call function numbers 64-127 */
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2		KVM_REG_ARM_FW_FEAT_BMAP_REG(3)
+
+enum {
+	KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_VER	= 0,
+	KVM_REG_ARM_VENDOR_HYP_BIT_DISCOVER_IMPL_CPUS	= 1,
+#ifdef __KERNEL__
+	KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_COUNT,
+#endif
+};
+
 /* Device Control API on vm fd */
 #define KVM_ARM_VM_SMCCC_CTRL		0
 #define   KVM_ARM_VM_SMCCC_FILTER	0
diff --git a/tools/arch/arm64/tools/Makefile b/tools/arch/arm64/tools/Makefile
index 7b42feedf647..de4f1b66ef01 100644
--- a/tools/arch/arm64/tools/Makefile
+++ b/tools/arch/arm64/tools/Makefile
@@ -13,12 +13,6 @@ AWK	?= awk
 MKDIR	?= mkdir
 RM	?= rm
 
-ifeq ($(V),1)
-Q =
-else
-Q = @
-endif
-
 arm64_tools_dir = $(top_srcdir)/arch/arm64/tools
 arm64_sysreg_tbl = $(arm64_tools_dir)/sysreg
 arm64_gen_sysreg = $(arm64_tools_dir)/gen-sysreg.awk
diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd-ibs.h
index 93807b437e4d..cb1740bc3da2 100644
--- a/tools/arch/x86/include/asm/amd-ibs.h
+++ b/tools/arch/x86/include/asm/amd-ibs.h
@@ -64,7 +64,8 @@ union ibs_op_ctl {
 			opmaxcnt_ext:7,	/* 20-26: upper 7 bits of periodic op maximum count */
 			reserved0:5,	/* 27-31: reserved */
 			opcurcnt:27,	/* 32-58: periodic op counter current count */
-			reserved1:5;	/* 59-63: reserved */
+			ldlat_thrsh:4,	/* 59-62: Load Latency threshold */
+			ldlat_en:1;	/* 63: Load Latency enabled */
 	};
 };
 
diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h
index 3ad3da9a7d97..dbe39b44256b 100644
--- a/tools/arch/x86/include/asm/asm.h
+++ b/tools/arch/x86/include/asm/asm.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_X86_ASM_H
 #define _ASM_X86_ASM_H
 
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 # define __ASM_FORM(x, ...)		x,## __VA_ARGS__
 # define __ASM_FORM_RAW(x, ...)		x,## __VA_ARGS__
 # define __ASM_FORM_COMMA(x, ...)	x,## __VA_ARGS__,
@@ -123,7 +123,7 @@
 #ifdef __KERNEL__
 
 /* Exception table entry */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
 # define _ASM_EXTABLE_HANDLE(from, to, handler)			\
 	.pushsection "__ex_table","a" ;				\
 	.balign 4 ;						\
@@ -154,7 +154,7 @@
 #  define _ASM_NOKPROBE(entry)
 # endif
 
-#else /* ! __ASSEMBLY__ */
+#else /* ! __ASSEMBLER__ */
 # define _EXPAND_EXTABLE_HANDLE(x) #x
 # define _ASM_EXTABLE_HANDLE(from, to, handler)			\
 	" .pushsection \"__ex_table\",\"a\"\n"			\
@@ -186,7 +186,7 @@
  */
 register unsigned long current_stack_pointer asm(_ASM_SP);
 #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* __KERNEL__ */
 
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 17b6590748c0..9e3fa7942e7d 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -2,14 +2,6 @@
 #ifndef _ASM_X86_CPUFEATURES_H
 #define _ASM_X86_CPUFEATURES_H
 
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
 /*
  * Defines x86 CPU feature bits
  */
@@ -210,7 +202,6 @@
 #define X86_FEATURE_MBA			( 7*32+18) /* "mba" Memory Bandwidth Allocation */
 #define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* Fill RSB on context switches */
 #define X86_FEATURE_PERFMON_V2		( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */
-#define X86_FEATURE_USE_IBPB		( 7*32+21) /* Indirect Branch Prediction Barrier enabled */
 #define X86_FEATURE_USE_IBRS_FW		( 7*32+22) /* Use IBRS during runtime firmware calls */
 #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* Disable Speculative Store Bypass. */
 #define X86_FEATURE_LS_CFG_SSBD		( 7*32+24)  /* AMD SSBD implementation via LS_CFG MSR */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
deleted file mode 100644
index c492bdc97b05..000000000000
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ /dev/null
@@ -1,161 +0,0 @@
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#define _ASM_X86_DISABLED_FEATURES_H
-
-/* These features, although they might be available in a CPU
- * will not be used because the compile options to support
- * them are not present.
- *
- * This code allows them to be checked and disabled at
- * compile time without an explicit #ifdef.  Use
- * cpu_feature_enabled().
- */
-
-#ifdef CONFIG_X86_UMIP
-# define DISABLE_UMIP	0
-#else
-# define DISABLE_UMIP	(1<<(X86_FEATURE_UMIP & 31))
-#endif
-
-#ifdef CONFIG_X86_64
-# define DISABLE_VME		(1<<(X86_FEATURE_VME & 31))
-# define DISABLE_K6_MTRR	(1<<(X86_FEATURE_K6_MTRR & 31))
-# define DISABLE_CYRIX_ARR	(1<<(X86_FEATURE_CYRIX_ARR & 31))
-# define DISABLE_CENTAUR_MCR	(1<<(X86_FEATURE_CENTAUR_MCR & 31))
-# define DISABLE_PCID		0
-#else
-# define DISABLE_VME		0
-# define DISABLE_K6_MTRR	0
-# define DISABLE_CYRIX_ARR	0
-# define DISABLE_CENTAUR_MCR	0
-# define DISABLE_PCID		(1<<(X86_FEATURE_PCID & 31))
-#endif /* CONFIG_X86_64 */
-
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-# define DISABLE_PKU		0
-# define DISABLE_OSPKE		0
-#else
-# define DISABLE_PKU		(1<<(X86_FEATURE_PKU & 31))
-# define DISABLE_OSPKE		(1<<(X86_FEATURE_OSPKE & 31))
-#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
-
-#ifdef CONFIG_X86_5LEVEL
-# define DISABLE_LA57	0
-#else
-# define DISABLE_LA57	(1<<(X86_FEATURE_LA57 & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
-# define DISABLE_PTI		0
-#else
-# define DISABLE_PTI		(1 << (X86_FEATURE_PTI & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_RETPOLINE
-# define DISABLE_RETPOLINE	0
-#else
-# define DISABLE_RETPOLINE	((1 << (X86_FEATURE_RETPOLINE & 31)) | \
-				 (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
-#endif
-
-#ifdef CONFIG_MITIGATION_RETHUNK
-# define DISABLE_RETHUNK	0
-#else
-# define DISABLE_RETHUNK	(1 << (X86_FEATURE_RETHUNK & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_UNRET_ENTRY
-# define DISABLE_UNRET		0
-#else
-# define DISABLE_UNRET		(1 << (X86_FEATURE_UNRET & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
-# define DISABLE_CALL_DEPTH_TRACKING	0
-#else
-# define DISABLE_CALL_DEPTH_TRACKING	(1 << (X86_FEATURE_CALL_DEPTH & 31))
-#endif
-
-#ifdef CONFIG_ADDRESS_MASKING
-# define DISABLE_LAM		0
-#else
-# define DISABLE_LAM		(1 << (X86_FEATURE_LAM & 31))
-#endif
-
-#ifdef CONFIG_INTEL_IOMMU_SVM
-# define DISABLE_ENQCMD		0
-#else
-# define DISABLE_ENQCMD		(1 << (X86_FEATURE_ENQCMD & 31))
-#endif
-
-#ifdef CONFIG_X86_SGX
-# define DISABLE_SGX	0
-#else
-# define DISABLE_SGX	(1 << (X86_FEATURE_SGX & 31))
-#endif
-
-#ifdef CONFIG_XEN_PV
-# define DISABLE_XENPV		0
-#else
-# define DISABLE_XENPV		(1 << (X86_FEATURE_XENPV & 31))
-#endif
-
-#ifdef CONFIG_INTEL_TDX_GUEST
-# define DISABLE_TDX_GUEST	0
-#else
-# define DISABLE_TDX_GUEST	(1 << (X86_FEATURE_TDX_GUEST & 31))
-#endif
-
-#ifdef CONFIG_X86_USER_SHADOW_STACK
-#define DISABLE_USER_SHSTK	0
-#else
-#define DISABLE_USER_SHSTK	(1 << (X86_FEATURE_USER_SHSTK & 31))
-#endif
-
-#ifdef CONFIG_X86_KERNEL_IBT
-#define DISABLE_IBT	0
-#else
-#define DISABLE_IBT	(1 << (X86_FEATURE_IBT & 31))
-#endif
-
-#ifdef CONFIG_X86_FRED
-# define DISABLE_FRED	0
-#else
-# define DISABLE_FRED	(1 << (X86_FEATURE_FRED & 31))
-#endif
-
-#ifdef CONFIG_KVM_AMD_SEV
-#define DISABLE_SEV_SNP		0
-#else
-#define DISABLE_SEV_SNP		(1 << (X86_FEATURE_SEV_SNP & 31))
-#endif
-
-/*
- * Make sure to add features to the correct mask
- */
-#define DISABLED_MASK0	(DISABLE_VME)
-#define DISABLED_MASK1	0
-#define DISABLED_MASK2	0
-#define DISABLED_MASK3	(DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4	(DISABLE_PCID)
-#define DISABLED_MASK5	0
-#define DISABLED_MASK6	0
-#define DISABLED_MASK7	(DISABLE_PTI)
-#define DISABLED_MASK8	(DISABLE_XENPV|DISABLE_TDX_GUEST)
-#define DISABLED_MASK9	(DISABLE_SGX)
-#define DISABLED_MASK10	0
-#define DISABLED_MASK11	(DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
-			 DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
-#define DISABLED_MASK12	(DISABLE_FRED|DISABLE_LAM)
-#define DISABLED_MASK13	0
-#define DISABLED_MASK14	0
-#define DISABLED_MASK15	0
-#define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
-			 DISABLE_ENQCMD)
-#define DISABLED_MASK17	0
-#define DISABLED_MASK18	(DISABLE_IBT)
-#define DISABLED_MASK19	(DISABLE_SEV_SNP)
-#define DISABLED_MASK20	0
-#define DISABLED_MASK21	0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
-
-#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 3ae84c3b8e6d..dc1c1057f26e 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -25,6 +25,7 @@
 #define _EFER_SVME		12 /* Enable virtualization */
 #define _EFER_LMSLE		13 /* Long Mode Segment Limit Enable */
 #define _EFER_FFXSR		14 /* Enable Fast FXSAVE/FXRSTOR */
+#define _EFER_TCE		15 /* Enable Translation Cache Extensions */
 #define _EFER_AUTOIBRS		21 /* Enable Automatic IBRS */
 
 #define EFER_SCE		(1<<_EFER_SCE)
@@ -34,6 +35,7 @@
 #define EFER_SVME		(1<<_EFER_SVME)
 #define EFER_LMSLE		(1<<_EFER_LMSLE)
 #define EFER_FFXSR		(1<<_EFER_FFXSR)
+#define EFER_TCE		(1<<_EFER_TCE)
 #define EFER_AUTOIBRS		(1<<_EFER_AUTOIBRS)
 
 /*
diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h
index 1c1b7550fa55..cd94221d8335 100644
--- a/tools/arch/x86/include/asm/nops.h
+++ b/tools/arch/x86/include/asm/nops.h
@@ -82,7 +82,7 @@
 #define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
 #define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 extern const unsigned char * const x86_nops[];
 #endif
 
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index 46d7e06763c9..e0125afa53fb 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -45,7 +45,7 @@
 #define ORC_TYPE_REGS			3
 #define ORC_TYPE_REGS_PARTIAL		4
 
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 #include <asm/byteorder.h>
 
 /*
@@ -73,6 +73,6 @@ struct orc_entry {
 #endif
 } __packed;
 
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 
 #endif /* _ORC_TYPES_H */
diff --git a/tools/arch/x86/include/asm/pvclock-abi.h b/tools/arch/x86/include/asm/pvclock-abi.h
index 1436226efe3e..b9fece5fc96d 100644
--- a/tools/arch/x86/include/asm/pvclock-abi.h
+++ b/tools/arch/x86/include/asm/pvclock-abi.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_X86_PVCLOCK_ABI_H
 #define _ASM_X86_PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
 
 /*
  * These structs MUST NOT be changed.
@@ -44,5 +44,5 @@ struct pvclock_wall_clock {
 #define PVCLOCK_GUEST_STOPPED	(1 << 1)
 /* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
 #define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
 #endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
deleted file mode 100644
index e9187ddd3d1f..000000000000
--- a/tools/arch/x86/include/asm/required-features.h
+++ /dev/null
@@ -1,105 +0,0 @@
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#define _ASM_X86_REQUIRED_FEATURES_H
-
-/* Define minimum CPUID feature set for kernel These bits are checked
-   really early to actually display a visible error message before the
-   kernel dies.  Make sure to assign features to the proper mask!
-
-   Some requirements that are not in CPUID yet are also in the
-   CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too.
-
-   The real information is in arch/x86/Kconfig.cpu, this just converts
-   the CONFIGs into a bitmask */
-
-#ifndef CONFIG_MATH_EMULATION
-# define NEED_FPU	(1<<(X86_FEATURE_FPU & 31))
-#else
-# define NEED_FPU	0
-#endif
-
-#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-# define NEED_PAE	(1<<(X86_FEATURE_PAE & 31))
-#else
-# define NEED_PAE	0
-#endif
-
-#ifdef CONFIG_X86_CMPXCHG64
-# define NEED_CX8	(1<<(X86_FEATURE_CX8 & 31))
-#else
-# define NEED_CX8	0
-#endif
-
-#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64)
-# define NEED_CMOV	(1<<(X86_FEATURE_CMOV & 31))
-#else
-# define NEED_CMOV	0
-#endif
-
-# define NEED_3DNOW	0
-
-#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
-# define NEED_NOPL	(1<<(X86_FEATURE_NOPL & 31))
-#else
-# define NEED_NOPL	0
-#endif
-
-#ifdef CONFIG_MATOM
-# define NEED_MOVBE	(1<<(X86_FEATURE_MOVBE & 31))
-#else
-# define NEED_MOVBE	0
-#endif
-
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT_XXL
-/* Paravirtualized systems may not have PSE or PGE available */
-#define NEED_PSE	0
-#define NEED_PGE	0
-#else
-#define NEED_PSE	(1<<(X86_FEATURE_PSE) & 31)
-#define NEED_PGE	(1<<(X86_FEATURE_PGE) & 31)
-#endif
-#define NEED_MSR	(1<<(X86_FEATURE_MSR & 31))
-#define NEED_FXSR	(1<<(X86_FEATURE_FXSR & 31))
-#define NEED_XMM	(1<<(X86_FEATURE_XMM & 31))
-#define NEED_XMM2	(1<<(X86_FEATURE_XMM2 & 31))
-#define NEED_LM		(1<<(X86_FEATURE_LM & 31))
-#else
-#define NEED_PSE	0
-#define NEED_MSR	0
-#define NEED_PGE	0
-#define NEED_FXSR	0
-#define NEED_XMM	0
-#define NEED_XMM2	0
-#define NEED_LM		0
-#endif
-
-#define REQUIRED_MASK0	(NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\
-			 NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\
-			 NEED_XMM|NEED_XMM2)
-#define SSE_MASK	(NEED_XMM|NEED_XMM2)
-
-#define REQUIRED_MASK1	(NEED_LM|NEED_3DNOW)
-
-#define REQUIRED_MASK2	0
-#define REQUIRED_MASK3	(NEED_NOPL)
-#define REQUIRED_MASK4	(NEED_MOVBE)
-#define REQUIRED_MASK5	0
-#define REQUIRED_MASK6	0
-#define REQUIRED_MASK7	0
-#define REQUIRED_MASK8	0
-#define REQUIRED_MASK9	0
-#define REQUIRED_MASK10	0
-#define REQUIRED_MASK11	0
-#define REQUIRED_MASK12	0
-#define REQUIRED_MASK13	0
-#define REQUIRED_MASK14	0
-#define REQUIRED_MASK15	0
-#define REQUIRED_MASK16	0
-#define REQUIRED_MASK17	0
-#define REQUIRED_MASK18	0
-#define REQUIRED_MASK19	0
-#define REQUIRED_MASK20	0
-#define REQUIRED_MASK21	0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
-
-#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 156b62a163c5..8a48cc2536f5 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -226,7 +226,7 @@ static int load_xbc_from_initrd(int fd, char **buf)
 	/* Wrong Checksum */
 	rcsum = xbc_calc_checksum(*buf, size);
 	if (csum != rcsum) {
-		pr_err("checksum error: %d != %d\n", csum, rcsum);
+		pr_err("checksum error: %u != %u\n", csum, rcsum);
 		return -EINVAL;
 	}
 
@@ -395,7 +395,7 @@ static int apply_xbc(const char *path, const char *xbc_path)
 	xbc_get_info(&ret, NULL);
 	printf("\tNumber of nodes: %d\n", ret);
 	printf("\tSize: %u bytes\n", (unsigned int)size);
-	printf("\tChecksum: %d\n", (unsigned int)csum);
+	printf("\tChecksum: %u\n", (unsigned int)csum);
 
 	/* TODO: Check the options by schema */
 	xbc_exit();
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 243b79f2b451..062bbd6cd048 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -27,12 +27,6 @@ srctree := $(patsubst %/,%,$(dir $(CURDIR)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 endif
 
-ifeq ($(V),1)
-  Q =
-else
-  Q = @
-endif
-
 FEATURE_USER = .bpf
 FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled
 FEATURE_DISPLAY = libbfd
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index 4315652678b9..bf843f328812 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -5,12 +5,6 @@ INSTALL ?= install
 RM ?= rm -f
 RMDIR ?= rmdir --ignore-fail-on-non-empty
 
-ifeq ($(V),1)
-  Q =
-else
-  Q = @
-endif
-
 prefix ?= /usr/local
 mandir ?= $(prefix)/man
 man8dir = $(mandir)/man8
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index 3f6bca03ad2e..d47dddc2b4ee 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -24,7 +24,7 @@ BTF COMMANDS
 =============
 
 | **bpftool** **btf** { **show** | **list** } [**id** *BTF_ID*]
-| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*]
+| **bpftool** **btf dump** *BTF_SRC* [**format** *FORMAT*] [**root_id** *ROOT_ID*]
 | **bpftool** **btf help**
 |
 | *BTF_SRC* := { **id** *BTF_ID* | **prog** *PROG* | **map** *MAP* [{**key** | **value** | **kv** | **all**}] | **file** *FILE* }
@@ -43,7 +43,7 @@ bpftool btf { show | list } [id *BTF_ID*]
     that hold open file descriptors (FDs) against BTF objects. On such kernels
     bpftool will automatically emit this information as well.
 
-bpftool btf dump *BTF_SRC*
+bpftool btf dump *BTF_SRC* [format *FORMAT*] [root_id *ROOT_ID*]
     Dump BTF entries from a given *BTF_SRC*.
 
     When **id** is specified, BTF object with that ID will be loaded and all
@@ -67,6 +67,11 @@ bpftool btf dump *BTF_SRC*
     formatting, the output is sorted by default. Use the **unsorted** option
     to avoid sorting the output.
 
+    **root_id** option can be used to filter a dump to a single type and all
+    its dependent types. It cannot be used with any other types of filtering
+    (such as the "key", "value", or "kv" arguments when dumping BTF for a map).
+    It can be passed multiple times to dump multiple types.
+
 bpftool btf help
     Print short help message.
 
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index a4263dfb5e03..6ea4823b770c 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -7,12 +7,6 @@ srctree := $(patsubst %/,%,$(dir $(srctree)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 endif
 
-ifeq ($(V),1)
-  Q =
-else
-  Q = @
-endif
-
 BPF_DIR = $(srctree)/tools/lib/bpf
 
 ifneq ($(OUTPUT),)
@@ -106,6 +100,7 @@ FEATURE_TESTS += libbfd-liberty
 FEATURE_TESTS += libbfd-liberty-z
 FEATURE_TESTS += disassembler-four-args
 FEATURE_TESTS += disassembler-init-styled
+FEATURE_TESTS += libelf-zstd
 
 FEATURE_DISPLAY := clang-bpf-co-re
 FEATURE_DISPLAY += llvm
@@ -132,6 +127,12 @@ endif
 
 LIBS = $(LIBBPF) -lelf -lz
 LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz
+
+ifeq ($(feature-libelf-zstd),1)
+LIBS += -lzstd
+LIBS_BOOTSTRAP += -lzstd
+endif
+
 ifeq ($(feature-libcap), 1)
 CFLAGS += -DUSE_LIBCAP
 LIBS += -lcap
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 0c541498c301..1ce409a6cbd9 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -930,19 +930,24 @@ _bpftool()
                         format)
                             COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) )
                             ;;
+                        root_id)
+                            return 0;
+                            ;;
                         c)
-                            COMPREPLY=( $( compgen -W "unsorted" -- "$cur" ) )
+                            COMPREPLY=( $( compgen -W "unsorted root_id" -- "$cur" ) )
                             ;;
                         *)
                             # emit extra options
                             case ${words[3]} in
                                 id|file)
+                                    COMPREPLY=( $( compgen -W "root_id" -- "$cur" ) )
                                     _bpftool_once_attr 'format'
                                     ;;
                                 map|prog)
                                     if [[ ${words[3]} == "map" ]] && [[ $cword == 6 ]]; then
                                         COMPREPLY+=( $( compgen -W "key value kv all" -- "$cur" ) )
                                     fi
+                                    COMPREPLY=( $( compgen -W "root_id" -- "$cur" ) )
                                     _bpftool_once_attr 'format'
                                     ;;
                                 *)
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index d005e4fd6128..2636655ac180 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -27,6 +27,8 @@
 #define KFUNC_DECL_TAG		"bpf_kfunc"
 #define FASTCALL_DECL_TAG	"bpf_fastcall"
 
+#define MAX_ROOT_IDS		16
+
 static const char * const btf_kind_str[NR_BTF_KINDS] = {
 	[BTF_KIND_UNKN]		= "UNKNOWN",
 	[BTF_KIND_INT]		= "INT",
@@ -880,12 +882,14 @@ static int do_dump(int argc, char **argv)
 {
 	bool dump_c = false, sort_dump_c = true;
 	struct btf *btf = NULL, *base = NULL;
-	__u32 root_type_ids[2];
+	__u32 root_type_ids[MAX_ROOT_IDS];
+	bool have_id_filtering;
 	int root_type_cnt = 0;
 	__u32 btf_id = -1;
 	const char *src;
 	int fd = -1;
 	int err = 0;
+	int i;
 
 	if (!REQ_ARGS(2)) {
 		usage();
@@ -973,6 +977,8 @@ static int do_dump(int argc, char **argv)
 		goto done;
 	}
 
+	have_id_filtering = !!root_type_cnt;
+
 	while (argc) {
 		if (is_prefix(*argv, "format")) {
 			NEXT_ARG();
@@ -992,6 +998,36 @@ static int do_dump(int argc, char **argv)
 				goto done;
 			}
 			NEXT_ARG();
+		} else if (is_prefix(*argv, "root_id")) {
+			__u32 root_id;
+			char *end;
+
+			if (have_id_filtering) {
+				p_err("cannot use root_id with other type filtering");
+				err = -EINVAL;
+				goto done;
+			} else if (root_type_cnt == MAX_ROOT_IDS) {
+				p_err("only %d root_id are supported", MAX_ROOT_IDS);
+				err = -E2BIG;
+				goto done;
+			}
+
+			NEXT_ARG();
+			root_id = strtoul(*argv, &end, 0);
+			if (*end) {
+				err = -1;
+				p_err("can't parse %s as root ID", *argv);
+				goto done;
+			}
+			for (i = 0; i < root_type_cnt; i++) {
+				if (root_type_ids[i] == root_id) {
+					err = -EINVAL;
+					p_err("duplicate root_id %d supplied", root_id);
+					goto done;
+				}
+			}
+			root_type_ids[root_type_cnt++] = root_id;
+			NEXT_ARG();
 		} else if (is_prefix(*argv, "unsorted")) {
 			sort_dump_c = false;
 			NEXT_ARG();
@@ -1017,6 +1053,17 @@ static int do_dump(int argc, char **argv)
 		}
 	}
 
+	/* Invalid root IDs causes half emitted boilerplate and then unclean
+	 * exit. It's an ugly user experience, so handle common error here.
+	 */
+	for (i = 0; i < root_type_cnt; i++) {
+		if (root_type_ids[i] >= btf__type_cnt(btf)) {
+			err = -EINVAL;
+			p_err("invalid root ID: %u", root_type_ids[i]);
+			goto done;
+		}
+	}
+
 	if (dump_c) {
 		if (json_output) {
 			p_err("JSON output for C-syntax dump is not supported");
@@ -1391,7 +1438,7 @@ static int do_help(int argc, char **argv)
 
 	fprintf(stderr,
 		"Usage: %1$s %2$s { show | list } [id BTF_ID]\n"
-		"       %1$s %2$s dump BTF_SRC [format FORMAT]\n"
+		"       %1$s %2$s dump BTF_SRC [format FORMAT] [root_id ROOT_ID]\n"
 		"       %1$s %2$s help\n"
 		"\n"
 		"       BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
index eec437cca2ea..e3785f9a697d 100644
--- a/tools/bpf/bpftool/cfg.c
+++ b/tools/bpf/bpftool/cfg.c
@@ -302,6 +302,7 @@ static bool func_add_bb_edges(struct func_node *func)
 
 		insn = bb->tail;
 		if (!is_jmp_insn(insn->code) ||
+		    BPF_OP(insn->code) == BPF_CALL ||
 		    BPF_OP(insn->code) == BPF_EXIT) {
 			e->dst = bb_next(bb);
 			e->flags |= EDGE_FLAG_FALLTHROUGH;
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 4dbc4fcdf473..24fecdf8e430 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -885,6 +885,28 @@ probe_v3_isa_extension(const char *define_prefix, __u32 ifindex)
 			   "V3_ISA_EXTENSION");
 }
 
+/*
+ * Probe for the v4 instruction set extension introduced in commit 1f9a1ea821ff
+ * ("bpf: Support new sign-extension load insns").
+ */
+static void
+probe_v4_isa_extension(const char *define_prefix, __u32 ifindex)
+{
+	struct bpf_insn insns[5] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 1, 1),
+		BPF_JMP32_A(1),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN()
+	};
+
+	probe_misc_feature(insns, ARRAY_SIZE(insns),
+			   define_prefix, ifindex,
+			   "have_v4_isa_extension",
+			   "ISA extension v4",
+			   "V4_ISA_EXTENSION");
+}
+
 static void
 section_system_config(enum probe_component target, const char *define_prefix)
 {
@@ -1029,6 +1051,7 @@ static void section_misc(const char *define_prefix, __u32 ifindex)
 	probe_bounded_loops(define_prefix, ifindex);
 	probe_v2_isa_extension(define_prefix, ifindex);
 	probe_v3_isa_extension(define_prefix, ifindex);
+	probe_v4_isa_extension(define_prefix, ifindex);
 	print_end_section();
 }
 
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index 4b8079f294f6..afbddea3a39c 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -5,10 +5,8 @@ include ../../scripts/Makefile.arch
 srctree := $(abspath $(CURDIR)/../../../)
 
 ifeq ($(V),1)
-  Q =
   msg =
 else
-  Q = @
   ifeq ($(silent),1)
     msg =
   else
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index bd9f960bce3d..d47191c6e55e 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -141,6 +141,7 @@ struct object {
 };
 
 static int verbose;
+static int warnings;
 
 static int eprintf(int level, int var, const char *fmt, ...)
 {
@@ -604,6 +605,7 @@ static int symbols_resolve(struct object *obj)
 			if (id->id) {
 				pr_info("WARN: multiple IDs found for '%s': %d, %d - using %d\n",
 					str, id->id, type_id, id->id);
+				warnings++;
 			} else {
 				id->id = type_id;
 				(*nr)--;
@@ -625,8 +627,10 @@ static int id_patch(struct object *obj, struct btf_id *id)
 	int i;
 
 	/* For set, set8, id->id may be 0 */
-	if (!id->id && !id->is_set && !id->is_set8)
+	if (!id->id && !id->is_set && !id->is_set8) {
 		pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name);
+		warnings++;
+	}
 
 	for (i = 0; i < id->addr_cnt; i++) {
 		unsigned long addr = id->addr[i];
@@ -782,6 +786,7 @@ int main(int argc, const char **argv)
 		.funcs    = RB_ROOT,
 		.sets     = RB_ROOT,
 	};
+	bool fatal_warnings = false;
 	struct option btfid_options[] = {
 		OPT_INCR('v', "verbose", &verbose,
 			 "be more verbose (show errors, etc)"),
@@ -789,6 +794,8 @@ int main(int argc, const char **argv)
 			   "BTF data"),
 		OPT_STRING('b', "btf_base", &obj.base_btf_path, "file",
 			   "path of file providing base BTF"),
+		OPT_BOOLEAN(0, "fatal_warnings", &fatal_warnings,
+			    "turn warnings into errors"),
 		OPT_END()
 	};
 	int err = -1;
@@ -823,7 +830,8 @@ int main(int argc, const char **argv)
 	if (symbols_patch(&obj))
 		goto out;
 
-	err = 0;
+	if (!(fatal_warnings && warnings))
+		err = 0;
 out:
 	if (obj.efile.elf) {
 		elf_end(obj.efile.elf);
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index c4f1f1735af6..e49203ebd48c 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -26,10 +26,7 @@ VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux)		\
 VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword			       \
 					  $(wildcard $(VMLINUX_BTF_PATHS))))
 
-ifeq ($(V),1)
-Q =
-else
-Q = @
+ifneq ($(V),1)
 MAKEFLAGS += --no-print-directory
 submake_extras := feature_display=0
 endif
diff --git a/tools/build/Build.include b/tools/build/Build.include
index c2a95ab47379..e45b2eb0d24a 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -13,6 +13,8 @@
 comma   := ,
 squote  := '
 pound   := \#
+empty   :=
+space   := $(empty) $(empty)
 
 ###
 # Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 18ad131f6ea7..63ef21878761 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -17,13 +17,7 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld)
 
 export HOSTCC HOSTLD HOSTAR
 
-ifeq ($(V),1)
-  Q =
-else
-  Q = @
-endif
-
-export Q srctree CC LD
+export srctree CC LD
 
 MAKEFLAGS := --no-print-directory
 build     := -f $(srctree)/tools/build/Makefile.build dir=. obj
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index 5fb3fb3d97e0..e710ed67a1b4 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -12,26 +12,6 @@
 PHONY := __build
 __build:
 
-ifeq ($(V),1)
-  quiet =
-  Q =
-else
-  quiet=quiet_
-  Q=@
-endif
-
-# If the user is running make -s (silent mode), suppress echoing of commands
-# make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS.
-ifeq ($(filter 3.%,$(MAKE_VERSION)),)
-short-opts := $(firstword -$(MAKEFLAGS))
-else
-short-opts := $(filter-out --%,$(MAKEFLAGS))
-endif
-
-ifneq ($(findstring s,$(short-opts)),)
-  quiet=silent_
-endif
-
 build-dir := $(srctree)/tools/build
 
 # Define $(fixdep) for dep-cmd function
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index bca47d136f05..1931b6321314 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -28,6 +28,41 @@ endef
 #   the rule that uses them - an example for that is the 'bionic'
 #   feature check. ]
 #
+# These + the ones in FEATURE_TESTS_EXTRA are included in
+# tools/build/feature/test-all.c and we try to build it all together
+# then setting all those features to '1' meaning they are all enabled.
+#
+# There are things like fortify-source that will be set to 1 because test-all
+# is built with the flags needed to test if its enabled, resulting in
+#
+#   $ rm -rf /tmp/b ; mkdir /tmp/b ; make -C tools/perf O=/tmp/b feature-dump
+#   $ grep fortify-source /tmp/b/FEATURE-DUMP
+#   feature-fortify-source=1
+#   $
+#
+#   All the others should have lines in tools/build/feature/test-all.c like:
+#
+#    #define main main_test_disassembler_init_styled
+#    # include "test-disassembler-init-styled.c"
+#    #undef main
+#
+#    #define main main_test_libzstd
+#    # include "test-libzstd.c"
+#    #undef main
+#
+#    int main(int argc, char *argv[])
+#    {
+#      main_test_disassembler_four_args();
+#      main_test_libzstd();
+#      return 0;
+#    }
+#
+#    If the sample above works, then we end up with these lines in the FEATURE-DUMP
+#    file:
+#
+#    feature-disassembler-four-args=1
+#    feature-libzstd=1
+#
 FEATURE_TESTS_BASIC :=                  \
         backtrace                       \
         libdw                           \
@@ -38,17 +73,16 @@ FEATURE_TESTS_BASIC :=                  \
         glibc                           \
         libbfd                          \
         libbfd-buildid			\
-        libcap                          \
         libelf                          \
         libelf-getphdrnum               \
         libelf-gelf_getnote             \
         libelf-getshdrstrndx            \
+        libelf-zstd                     \
         libnuma                         \
         numa_num_possible_cpus          \
         libperl                         \
         libpython                       \
         libslang                        \
-        libslang-include-subdir         \
         libtraceevent                   \
         libtracefs                      \
         libcpupower                     \
@@ -89,13 +123,6 @@ FEATURE_TESTS_EXTRA :=                  \
          libbfd-liberty                 \
          libbfd-liberty-z               \
          libopencsd                     \
-         libunwind-x86                  \
-         libunwind-x86_64               \
-         libunwind-arm                  \
-         libunwind-aarch64              \
-         libunwind-debug-frame          \
-         libunwind-debug-frame-arm      \
-         libunwind-debug-frame-aarch64  \
          cxx                            \
          llvm                           \
          clang                          \
@@ -122,7 +149,6 @@ FEATURE_DISPLAY ?=              \
          glibc                  \
          libbfd                 \
          libbfd-buildid		\
-         libcap                 \
          libelf                 \
          libnuma                \
          numa_num_possible_cpus \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 043dfd00fce7..cb1e3e2feedf 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -13,7 +13,6 @@ FILES=                                          \
          test-gtk2.bin                          \
          test-gtk2-infobar.bin                  \
          test-hello.bin                         \
-         test-libaudit.bin                      \
          test-libbfd.bin                        \
          test-libbfd-buildid.bin		\
          test-disassembler-four-args.bin        \
@@ -28,6 +27,7 @@ FILES=                                          \
          test-libelf-getphdrnum.bin             \
          test-libelf-gelf_getnote.bin           \
          test-libelf-getshdrstrndx.bin          \
+         test-libelf-zstd.bin                   \
          test-libdebuginfod.bin                 \
          test-libnuma.bin                       \
          test-numa_num_possible_cpus.bin        \
@@ -110,7 +110,7 @@ all: $(FILES)
 __BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
   BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
   BUILD_BFD = $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
-  BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap
+  BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd
 
 __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
   BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
@@ -196,6 +196,9 @@ $(OUTPUT)test-libelf-gelf_getnote.bin:
 $(OUTPUT)test-libelf-getshdrstrndx.bin:
 	$(BUILD) -lelf
 
+$(OUTPUT)test-libelf-zstd.bin:
+	$(BUILD) -lelf -lz -lzstd
+
 $(OUTPUT)test-libdebuginfod.bin:
 	$(BUILD) -ldebuginfod
 
@@ -228,9 +231,6 @@ $(OUTPUT)test-libunwind-debug-frame-arm.bin:
 $(OUTPUT)test-libunwind-debug-frame-aarch64.bin:
 	$(BUILD) -lelf -llzma -lunwind-aarch64
 
-$(OUTPUT)test-libaudit.bin:
-	$(BUILD) -laudit
-
 $(OUTPUT)test-libslang.bin:
 	$(BUILD) -lslang
 
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 59ef3d7fe6a4..03ddaac6f4c4 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -58,8 +58,8 @@
 # include "test-libelf-getshdrstrndx.c"
 #undef main
 
-#define main main_test_libunwind
-# include "test-libunwind.c"
+#define main main_test_libelf_zstd
+# include "test-libelf-zstd.c"
 #undef main
 
 #define main main_test_libslang
@@ -170,6 +170,14 @@
 # include "test-libzstd.c"
 #undef main
 
+#define main main_test_libtraceevent
+# include "test-libtraceevent.c"
+#undef main
+
+#define main main_test_libtracefs
+# include "test-libtracefs.c"
+#undef main
+
 int main(int argc, char *argv[])
 {
 	main_test_libpython();
@@ -184,7 +192,6 @@ int main(int argc, char *argv[])
 	main_test_libelf_getphdrnum();
 	main_test_libelf_gelf_getnote();
 	main_test_libelf_getshdrstrndx();
-	main_test_libunwind();
 	main_test_libslang();
 	main_test_libbfd();
 	main_test_libbfd_buildid();
@@ -208,6 +215,8 @@ int main(int argc, char *argv[])
 	main_test_reallocarray();
 	main_test_disassembler_four_args();
 	main_test_libzstd();
+	main_test_libtraceevent();
+	main_test_libtracefs();
 
 	return 0;
 }
diff --git a/tools/build/feature/test-libaudit.c b/tools/build/feature/test-libaudit.c
deleted file mode 100644
index f5b0863fa1ec..000000000000
--- a/tools/build/feature/test-libaudit.c
+++ /dev/null
@@ -1,11 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <libaudit.h>
-
-extern int printf(const char *format, ...);
-
-int main(void)
-{
-	printf("error message: %s\n", audit_errno_to_name(0));
-
-	return audit_open();
-}
diff --git a/tools/build/feature/test-libelf-zstd.c b/tools/build/feature/test-libelf-zstd.c
new file mode 100644
index 000000000000..a1324a1db3bb
--- /dev/null
+++ b/tools/build/feature/test-libelf-zstd.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <libelf.h>
+
+int main(void)
+{
+	elf_compress(NULL, ELFCOMPRESS_ZSTD, 0);
+	return 0;
+}
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 60044b608817..8de2914e6510 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -41,7 +41,7 @@
  * Missing asm support
  *
  * __GENMASK_U128() depends on _BIT128() which would not work
- * in the asm code, as it shifts an 'unsigned __init128' data
+ * in the asm code, as it shifts an 'unsigned __int128' data
  * type instead of direct representation of 128 bit constants
  * such as long and unsigned long. The fundamental problem is
  * that a 128 bit constant will get silently truncated by the
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index 65aa8ce142e5..bcc6df79301a 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -273,6 +273,16 @@
 		.off   = OFF,					\
 		.imm   = 0 })
 
+/* Unconditional jumps, gotol pc + imm32 */
+
+#define BPF_JMP32_A(IMM)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_JA,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
 /* Function call */
 
 #define BPF_EMIT_CALL(FUNC)					\
diff --git a/tools/include/linux/kasan-tags.h b/tools/include/linux/kasan-tags.h
new file mode 100644
index 000000000000..4f85f562512c
--- /dev/null
+++ b/tools/include/linux/kasan-tags.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_KASAN_TAGS_H
+#define _LINUX_KASAN_TAGS_H
+
+#define KASAN_TAG_KERNEL	0xFF /* native kernel pointers tag */
+#define KASAN_TAG_INVALID	0xFE /* inaccessible memory tag */
+#define KASAN_TAG_MAX		0xFD /* maximum value for random tags */
+
+#ifdef CONFIG_KASAN_HW_TAGS
+#define KASAN_TAG_MIN		0xF0 /* minimum value for random tags */
+#else
+#define KASAN_TAG_MIN		0x00 /* minimum value for random tags */
+#endif
+
+#endif /* LINUX_KASAN_TAGS_H */
diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h
index 453a4f4ef39d..df5d9fa84dba 100644
--- a/tools/include/linux/objtool_types.h
+++ b/tools/include/linux/objtool_types.h
@@ -54,4 +54,16 @@ struct unwind_hint {
 #define UNWIND_HINT_TYPE_SAVE		6
 #define UNWIND_HINT_TYPE_RESTORE	7
 
+/*
+ * Annotate types
+ */
+#define ANNOTYPE_NOENDBR		1
+#define ANNOTYPE_RETPOLINE_SAFE		2
+#define ANNOTYPE_INSTR_BEGIN		3
+#define ANNOTYPE_INSTR_END		4
+#define ANNOTYPE_UNRET_BEGIN		5
+#define ANNOTYPE_IGNORE_ALTS		6
+#define ANNOTYPE_INTRA_FUNCTION_CALL	7
+#define ANNOTYPE_REACHABLE		8
+
 #endif /* _LINUX_OBJTOOL_TYPES_H */
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index a1f55fb24bb3..f9702877ac21 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -29,7 +29,9 @@ all_files := \
 		compiler.h \
 		crt.h \
 		ctype.h \
+		dirent.h \
 		errno.h \
+		limits.h \
 		nolibc.h \
 		signal.h \
 		stackprotector.h \
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index 1791a8ce58da..753a8ed2cf69 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -179,6 +179,7 @@
 })
 
 /* startup code, note that it's called __start on MIPS */
+void __start(void);
 void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void)
 {
 	__asm__ volatile (
diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
index f9ab83a219b8..df4c3cc713ac 100644
--- a/tools/include/nolibc/arch-s390.h
+++ b/tools/include/nolibc/arch-s390.h
@@ -5,8 +5,8 @@
 
 #ifndef _NOLIBC_ARCH_S390_H
 #define _NOLIBC_ARCH_S390_H
-#include <asm/signal.h>
-#include <asm/unistd.h>
+#include <linux/signal.h>
+#include <linux/unistd.h>
 
 #include "compiler.h"
 #include "crt.h"
@@ -143,8 +143,13 @@
 void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
 {
 	__asm__ volatile (
+#ifdef __s390x__
 		"lgr	%r2, %r15\n"          /* save stack pointer to %r2, as arg1 of _start_c */
 		"aghi	%r15, -160\n"         /* allocate new stackframe                        */
+#else
+		"lr	%r2, %r15\n"
+		"ahi	%r15, -96\n"
+#endif
 		"xc	0(8,%r15), 0(%r15)\n" /* clear backchain                                */
 		"brasl	%r14, _start_c\n"     /* transfer to c runtime                          */
 	);
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
index c8f4e5d3add9..8a2c143c0fba 100644
--- a/tools/include/nolibc/arch.h
+++ b/tools/include/nolibc/arch.h
@@ -29,7 +29,7 @@
 #include "arch-powerpc.h"
 #elif defined(__riscv)
 #include "arch-riscv.h"
-#elif defined(__s390x__)
+#elif defined(__s390x__) || defined(__s390__)
 #include "arch-s390.h"
 #elif defined(__loongarch__)
 #include "arch-loongarch.h"
diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
index bbcd5fd09806..c4b10103bbec 100644
--- a/tools/include/nolibc/crt.h
+++ b/tools/include/nolibc/crt.h
@@ -10,6 +10,7 @@
 char **environ __attribute__((weak));
 const unsigned long *_auxv __attribute__((weak));
 
+void _start(void);
 static void __stack_chk_init(void);
 static void exit(int);
 
@@ -22,6 +23,7 @@ extern void (*const __init_array_end[])(int, char **, char**) __attribute__((wea
 extern void (*const __fini_array_start[])(void) __attribute__((weak));
 extern void (*const __fini_array_end[])(void) __attribute__((weak));
 
+void _start_c(long *sp);
 __attribute__((weak,used))
 void _start_c(long *sp)
 {
diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h
new file mode 100644
index 000000000000..c5c30d0dd680
--- /dev/null
+++ b/tools/include/nolibc/dirent.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Directory access for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+#ifndef _NOLIBC_DIRENT_H
+#define _NOLIBC_DIRENT_H
+
+#include "stdint.h"
+#include "types.h"
+
+#include <linux/limits.h>
+
+struct dirent {
+	ino_t	d_ino;
+	char	d_name[NAME_MAX + 1];
+};
+
+/* See comment of FILE in stdio.h */
+typedef struct {
+	char dummy[1];
+} DIR;
+
+static __attribute__((unused))
+DIR *fdopendir(int fd)
+{
+	if (fd < 0) {
+		SET_ERRNO(EBADF);
+		return NULL;
+	}
+	return (DIR *)(intptr_t)~fd;
+}
+
+static __attribute__((unused))
+DIR *opendir(const char *name)
+{
+	int fd;
+
+	fd = open(name, O_RDONLY);
+	if (fd == -1)
+		return NULL;
+	return fdopendir(fd);
+}
+
+static __attribute__((unused))
+int closedir(DIR *dirp)
+{
+	intptr_t i = (intptr_t)dirp;
+
+	if (i >= 0) {
+		SET_ERRNO(EBADF);
+		return -1;
+	}
+	return close(~i);
+}
+
+static __attribute__((unused))
+int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result)
+{
+	char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1];
+	struct linux_dirent64 *ldir = (void *)buf;
+	intptr_t i = (intptr_t)dirp;
+	int fd, ret;
+
+	if (i >= 0)
+		return EBADF;
+
+	fd = ~i;
+
+	ret = sys_getdents64(fd, ldir, sizeof(buf));
+	if (ret < 0)
+		return -ret;
+	if (ret == 0) {
+		*result = NULL;
+		return 0;
+	}
+
+	/*
+	 * getdents64() returns as many entries as fit the buffer.
+	 * readdir() can only return one entry at a time.
+	 * Make sure the non-returned ones are not skipped.
+	 */
+	ret = lseek(fd, ldir->d_off, SEEK_SET);
+	if (ret == -1)
+		return errno;
+
+	entry->d_ino = ldir->d_ino;
+	/* the destination should always be big enough */
+	strlcpy(entry->d_name, ldir->d_name, sizeof(entry->d_name));
+	*result = entry;
+	return 0;
+}
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#endif /* _NOLIBC_DIRENT_H */
diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h
index a44486ff0477..1d8d8033e8ff 100644
--- a/tools/include/nolibc/errno.h
+++ b/tools/include/nolibc/errno.h
@@ -7,7 +7,7 @@
 #ifndef _NOLIBC_ERRNO_H
 #define _NOLIBC_ERRNO_H
 
-#include <asm/errno.h>
+#include <linux/errno.h>
 
 #ifndef NOLIBC_IGNORE_ERRNO
 #define SET_ERRNO(v) do { errno = (v); } while (0)
diff --git a/tools/include/nolibc/limits.h b/tools/include/nolibc/limits.h
new file mode 100644
index 000000000000..306d4141f4d2
--- /dev/null
+++ b/tools/include/nolibc/limits.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Shim limits.h header for NOLIBC.
+ * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de>
+ */
+
+#include "nolibc.h"
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 92436b1e4441..70872401aca8 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -31,8 +31,7 @@
  *   - The third level is the libc call definition. It exposes the lower raw
  *     sys_<name>() calls in a way that looks like what a libc usually does,
  *     takes care of specific input values, and of setting errno upon error.
- *     There can be minor variations compared to standard libc calls. For
- *     example the open() call always takes 3 args here.
+ *     There can be minor variations compared to standard libc calls.
  *
  * The errno variable is declared static and unused. This way it can be
  * optimized away if not used. However this means that a program made of
@@ -105,6 +104,7 @@
 #include "string.h"
 #include "time.h"
 #include "stackprotector.h"
+#include "dirent.h"
 
 /* Used by programs to avoid std includes */
 #define NOLIBC
diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h
index 137552216e46..cdcc5904c51e 100644
--- a/tools/include/nolibc/signal.h
+++ b/tools/include/nolibc/signal.h
@@ -13,6 +13,7 @@
 #include "sys.h"
 
 /* This one is not marked static as it's needed by libgcc for divide by zero */
+int raise(int signal);
 __attribute__((weak,unused,section(".text.nolibc_raise")))
 int raise(int signal)
 {
diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h
index 1d0d5259ec41..c71a2c257177 100644
--- a/tools/include/nolibc/stackprotector.h
+++ b/tools/include/nolibc/stackprotector.h
@@ -18,6 +18,7 @@
  * triggering stack protector errors themselves
  */
 
+void __stack_chk_fail(void);
 __attribute__((weak,used,noreturn,section(".text.nolibc_stack_chk")))
 void __stack_chk_fail(void)
 {
@@ -28,6 +29,7 @@ void __stack_chk_fail(void)
 	for (;;);
 }
 
+void __stack_chk_fail_local(void);
 __attribute__((weak,noreturn,section(".text.nolibc_stack_chk")))
 void __stack_chk_fail_local(void)
 {
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index 3892034198dd..a403351dbf60 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -350,6 +350,104 @@ int printf(const char *fmt, ...)
 }
 
 static __attribute__((unused))
+int vsscanf(const char *str, const char *format, va_list args)
+{
+	uintmax_t uval;
+	intmax_t ival;
+	int base;
+	char *endptr;
+	int matches;
+	int lpref;
+
+	matches = 0;
+
+	while (1) {
+		if (*format == '%') {
+			/* start of pattern */
+			lpref = 0;
+			format++;
+
+			if (*format == 'l') {
+				/* same as in printf() */
+				lpref = 1;
+				format++;
+				if (*format == 'l') {
+					lpref = 2;
+					format++;
+				}
+			}
+
+			if (*format == '%') {
+				/* literal % */
+				if ('%' != *str)
+					goto done;
+				str++;
+				format++;
+				continue;
+			} else if (*format == 'd') {
+				ival = strtoll(str, &endptr, 10);
+				if (lpref == 0)
+					*va_arg(args, int *) = ival;
+				else if (lpref == 1)
+					*va_arg(args, long *) = ival;
+				else if (lpref == 2)
+					*va_arg(args, long long *) = ival;
+			} else if (*format == 'u' || *format == 'x' || *format == 'X') {
+				base = *format == 'u' ? 10 : 16;
+				uval = strtoull(str, &endptr, base);
+				if (lpref == 0)
+					*va_arg(args, unsigned int *) = uval;
+				else if (lpref == 1)
+					*va_arg(args, unsigned long *) = uval;
+				else if (lpref == 2)
+					*va_arg(args, unsigned long long *) = uval;
+			} else if (*format == 'p') {
+				*va_arg(args, void **) = (void *)strtoul(str, &endptr, 16);
+			} else {
+				SET_ERRNO(EILSEQ);
+				goto done;
+			}
+
+			format++;
+			str = endptr;
+			matches++;
+
+		} else if (*format == '\0') {
+			goto done;
+		} else if (isspace(*format)) {
+			/* skip spaces in format and str */
+			while (isspace(*format))
+				format++;
+			while (isspace(*str))
+				str++;
+		} else if (*format == *str) {
+			/* literal match */
+			format++;
+			str++;
+		} else {
+			if (!matches)
+				matches = EOF;
+			goto done;
+		}
+	}
+
+done:
+	return matches;
+}
+
+static __attribute__((unused, format(scanf, 2, 3)))
+int sscanf(const char *str, const char *format, ...)
+{
+	va_list args;
+	int ret;
+
+	va_start(args, format);
+	ret = vsscanf(str, format, args);
+	va_end(args);
+	return ret;
+}
+
+static __attribute__((unused))
 void perror(const char *msg)
 {
 	fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno);
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index 75aa273c23a6..86ad378ab1ea 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -30,6 +30,7 @@ static __attribute__((unused)) char itoa_buffer[21];
  */
 
 /* must be exported, as it's used by libgcc for various divide functions */
+void abort(void);
 __attribute__((weak,unused,noreturn,section(".text.nolibc_abort")))
 void abort(void)
 {
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index 9ec9c24f38c0..ba84ab700e30 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -32,6 +32,7 @@ int memcmp(const void *s1, const void *s2, size_t n)
 /* might be ignored by the compiler without -ffreestanding, then found as
  * missing.
  */
+void *memmove(void *dst, const void *src, size_t len);
 __attribute__((weak,unused,section(".text.nolibc_memmove")))
 void *memmove(void *dst, const void *src, size_t len)
 {
@@ -56,6 +57,7 @@ void *memmove(void *dst, const void *src, size_t len)
 
 #ifndef NOLIBC_ARCH_HAS_MEMCPY
 /* must be exported, as it's used by libgcc on ARM */
+void *memcpy(void *dst, const void *src, size_t len);
 __attribute__((weak,unused,section(".text.nolibc_memcpy")))
 void *memcpy(void *dst, const void *src, size_t len)
 {
@@ -73,6 +75,7 @@ void *memcpy(void *dst, const void *src, size_t len)
 /* might be ignored by the compiler without -ffreestanding, then found as
  * missing.
  */
+void *memset(void *dst, int b, size_t len);
 __attribute__((weak,unused,section(".text.nolibc_memset")))
 void *memset(void *dst, int b, size_t len)
 {
@@ -124,6 +127,7 @@ char *strcpy(char *dst, const char *src)
  * thus itself, hence the asm() statement below that's meant to disable this
  * confusing practice.
  */
+size_t strlen(const char *str);
 __attribute__((weak,unused,section(".text.nolibc_strlen")))
 size_t strlen(const char *str)
 {
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 7b82bc3cf107..08c1c074bec8 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -10,10 +10,10 @@
 #include "std.h"
 
 /* system includes */
-#include <asm/unistd.h>
-#include <asm/signal.h>  /* for SIGCHLD */
-#include <asm/ioctls.h>
-#include <asm/mman.h>
+#include <linux/unistd.h>
+#include <linux/signal.h>  /* for SIGCHLD */
+#include <linux/termios.h>
+#include <linux/mman.h>
 #include <linux/fs.h>
 #include <linux/loop.h>
 #include <linux/time.h>
@@ -531,20 +531,16 @@ uid_t getuid(void)
 
 
 /*
- * int ioctl(int fd, unsigned long req, void *value);
+ * int ioctl(int fd, unsigned long cmd, ... arg);
  */
 
 static __attribute__((unused))
-int sys_ioctl(int fd, unsigned long req, void *value)
+long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
 {
-	return my_syscall3(__NR_ioctl, fd, req, value);
+	return my_syscall3(__NR_ioctl, fd, cmd, arg);
 }
 
-static __attribute__((unused))
-int ioctl(int fd, unsigned long req, void *value)
-{
-	return __sysret(sys_ioctl(fd, req, value));
-}
+#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg)))
 
 /*
  * int kill(pid_t pid, int signal);
@@ -601,9 +597,36 @@ off_t sys_lseek(int fd, off_t offset, int whence)
 }
 
 static __attribute__((unused))
+int sys_llseek(int fd, unsigned long offset_high, unsigned long offset_low,
+	       __kernel_loff_t *result, int whence)
+{
+#ifdef __NR_llseek
+	return my_syscall5(__NR_llseek, fd, offset_high, offset_low, result, whence);
+#else
+	return __nolibc_enosys(__func__, fd, offset_high, offset_low, result, whence);
+#endif
+}
+
+static __attribute__((unused))
 off_t lseek(int fd, off_t offset, int whence)
 {
-	return __sysret(sys_lseek(fd, offset, whence));
+	__kernel_loff_t loff = 0;
+	off_t result;
+	int ret;
+
+	result = sys_lseek(fd, offset, whence);
+	if (result == -ENOSYS) {
+		/* Only exists on 32bit where nolibc off_t is also 32bit */
+		ret = sys_llseek(fd, 0, offset, &loff, whence);
+		if (ret < 0)
+			result = ret;
+		else if (loff != (off_t)loff)
+			result = -EOVERFLOW;
+		else
+			result = loff;
+	}
+
+	return __sysret(result);
 }
 
 
@@ -741,6 +764,31 @@ int mount(const char *src, const char *tgt,
 	return __sysret(sys_mount(src, tgt, fst, flags, data));
 }
 
+/*
+ * int openat(int dirfd, const char *path, int flags[, mode_t mode]);
+ */
+
+static __attribute__((unused))
+int sys_openat(int dirfd, const char *path, int flags, mode_t mode)
+{
+	return my_syscall4(__NR_openat, dirfd, path, flags, mode);
+}
+
+static __attribute__((unused))
+int openat(int dirfd, const char *path, int flags, ...)
+{
+	mode_t mode = 0;
+
+	if (flags & O_CREAT) {
+		va_list args;
+
+		va_start(args, flags);
+		mode = va_arg(args, mode_t);
+		va_end(args);
+	}
+
+	return __sysret(sys_openat(dirfd, path, flags, mode));
+}
 
 /*
  * int open(const char *path, int flags[, mode_t mode]);
@@ -749,13 +797,7 @@ int mount(const char *src, const char *tgt,
 static __attribute__((unused))
 int sys_open(const char *path, int flags, mode_t mode)
 {
-#ifdef __NR_openat
 	return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
-#elif defined(__NR_open)
-	return my_syscall3(__NR_open, path, flags, mode);
-#else
-	return __nolibc_enosys(__func__, path, flags, mode);
-#endif
 }
 
 static __attribute__((unused))
@@ -767,7 +809,7 @@ int open(const char *path, int flags, ...)
 		va_list args;
 
 		va_start(args, flags);
-		mode = va_arg(args, int);
+		mode = va_arg(args, mode_t);
 		va_end(args);
 	}
 
@@ -1226,6 +1268,23 @@ pid_t waitpid(pid_t pid, int *status, int options)
 
 
 /*
+ * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options);
+ */
+
+static __attribute__((unused))
+int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage)
+{
+	return my_syscall5(__NR_waitid, which, pid, infop, options, rusage);
+}
+
+static __attribute__((unused))
+int waitid(int which, pid_t pid, siginfo_t *infop, int options)
+{
+	return __sysret(sys_waitid(which, pid, infop, options, NULL));
+}
+
+
+/*
  * ssize_t write(int fd, const void *buf, size_t count);
  */
 
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index 281df9139d2b..aa5016ff3d91 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -119,12 +119,31 @@
 
 #define SO_DETACH_REUSEPORT_BPF 68
 
+#define SO_PREFER_BUSY_POLL	69
+#define SO_BUSY_POLL_BUDGET	70
+
+#define SO_NETNS_COOKIE		71
+
+#define SO_BUF_LOCK		72
+
+#define SO_RESERVE_MEM		73
+
+#define SO_TXREHASH		74
+
 #define SO_RCVMARK		75
 
 #define SO_PASSPIDFD		76
 #define SO_PEERPIDFD		77
 
-#define SCM_TS_OPT_ID		78
+#define SO_DEVMEM_LINEAR	78
+#define SCM_DEVMEM_LINEAR	SO_DEVMEM_LINEAR
+#define SO_DEVMEM_DMABUF	79
+#define SCM_DEVMEM_DMABUF	SO_DEVMEM_DMABUF
+#define SO_DEVMEM_DONTNEED	80
+
+#define SCM_TS_OPT_ID		81
+
+#define SO_RCVPRIORITY		82
 
 #if !defined(__KERNEL__)
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4162afc6b5d0..2acf9b336371 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1573,6 +1573,16 @@ union bpf_attr {
 		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
 		 */
 		__s32		prog_token_fd;
+		/* The fd_array_cnt can be used to pass the length of the
+		 * fd_array array. In this case all the [map] file descriptors
+		 * passed in this array will be bound to the program, even if
+		 * the maps are not referenced directly. The functionality is
+		 * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
+		 * used by the verifier during the program load. If provided,
+		 * then the fd_array[0,...,fd_array_cnt-1] is expected to be
+		 * continuous.
+		 */
+		__u32		fd_array_cnt;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h
index e16be0d37746..b8f629ef135f 100644
--- a/tools/include/uapi/linux/const.h
+++ b/tools/include/uapi/linux/const.h
@@ -33,7 +33,7 @@
  * Missing asm support
  *
  * __BIT128() would not work in the asm code, as it shifts an
- * 'unsigned __init128' data type as direct representation of
+ * 'unsigned __int128' data type as direct representation of
  * 128 bit constants is not supported in the gcc compiler, as
  * they get silently truncated.
  *
diff --git a/tools/include/uapi/linux/elf.h b/tools/include/uapi/linux/elf.h
new file mode 100644
index 000000000000..5834b83d7f9a
--- /dev/null
+++ b/tools/include/uapi/linux/elf.h
@@ -0,0 +1,524 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_ELF_H
+#define _LINUX_ELF_H
+
+#include <linux/types.h>
+#include <linux/elf-em.h>
+
+/* 32-bit ELF base types. */
+typedef __u32	Elf32_Addr;
+typedef __u16	Elf32_Half;
+typedef __u32	Elf32_Off;
+typedef __s32	Elf32_Sword;
+typedef __u32	Elf32_Word;
+typedef __u16	Elf32_Versym;
+
+/* 64-bit ELF base types. */
+typedef __u64	Elf64_Addr;
+typedef __u16	Elf64_Half;
+typedef __s16	Elf64_SHalf;
+typedef __u64	Elf64_Off;
+typedef __s32	Elf64_Sword;
+typedef __u32	Elf64_Word;
+typedef __u64	Elf64_Xword;
+typedef __s64	Elf64_Sxword;
+typedef __u16	Elf64_Versym;
+
+/* These constants are for the segment types stored in the image headers */
+#define PT_NULL    0
+#define PT_LOAD    1
+#define PT_DYNAMIC 2
+#define PT_INTERP  3
+#define PT_NOTE    4
+#define PT_SHLIB   5
+#define PT_PHDR    6
+#define PT_TLS     7		/* Thread local storage segment */
+#define PT_LOOS    0x60000000	/* OS-specific */
+#define PT_HIOS    0x6fffffff	/* OS-specific */
+#define PT_LOPROC  0x70000000
+#define PT_HIPROC  0x7fffffff
+#define PT_GNU_EH_FRAME	(PT_LOOS + 0x474e550)
+#define PT_GNU_STACK	(PT_LOOS + 0x474e551)
+#define PT_GNU_RELRO	(PT_LOOS + 0x474e552)
+#define PT_GNU_PROPERTY	(PT_LOOS + 0x474e553)
+
+
+/* ARM MTE memory tag segment type */
+#define PT_AARCH64_MEMTAG_MTE	(PT_LOPROC + 0x2)
+
+/*
+ * Extended Numbering
+ *
+ * If the real number of program header table entries is larger than
+ * or equal to PN_XNUM(0xffff), it is set to sh_info field of the
+ * section header at index 0, and PN_XNUM is set to e_phnum
+ * field. Otherwise, the section header at index 0 is zero
+ * initialized, if it exists.
+ *
+ * Specifications are available in:
+ *
+ * - Oracle: Linker and Libraries.
+ *   Part No: 817–1984–19, August 2011.
+ *   https://docs.oracle.com/cd/E18752_01/pdf/817-1984.pdf
+ *
+ * - System V ABI AMD64 Architecture Processor Supplement
+ *   Draft Version 0.99.4,
+ *   January 13, 2010.
+ *   http://www.cs.washington.edu/education/courses/cse351/12wi/supp-docs/abi.pdf
+ */
+#define PN_XNUM 0xffff
+
+/* These constants define the different elf file types */
+#define ET_NONE   0
+#define ET_REL    1
+#define ET_EXEC   2
+#define ET_DYN    3
+#define ET_CORE   4
+#define ET_LOPROC 0xff00
+#define ET_HIPROC 0xffff
+
+/* This is the info that is needed to parse the dynamic section of the file */
+#define DT_NULL		0
+#define DT_NEEDED	1
+#define DT_PLTRELSZ	2
+#define DT_PLTGOT	3
+#define DT_HASH		4
+#define DT_STRTAB	5
+#define DT_SYMTAB	6
+#define DT_RELA		7
+#define DT_RELASZ	8
+#define DT_RELAENT	9
+#define DT_STRSZ	10
+#define DT_SYMENT	11
+#define DT_INIT		12
+#define DT_FINI		13
+#define DT_SONAME	14
+#define DT_RPATH	15
+#define DT_SYMBOLIC	16
+#define DT_REL		17
+#define DT_RELSZ	18
+#define DT_RELENT	19
+#define DT_PLTREL	20
+#define DT_DEBUG	21
+#define DT_TEXTREL	22
+#define DT_JMPREL	23
+#define DT_ENCODING	32
+#define OLD_DT_LOOS	0x60000000
+#define DT_LOOS		0x6000000d
+#define DT_HIOS		0x6ffff000
+#define DT_VALRNGLO	0x6ffffd00
+#define DT_VALRNGHI	0x6ffffdff
+#define DT_ADDRRNGLO	0x6ffffe00
+#define DT_GNU_HASH	0x6ffffef5
+#define DT_ADDRRNGHI	0x6ffffeff
+#define DT_VERSYM	0x6ffffff0
+#define DT_RELACOUNT	0x6ffffff9
+#define DT_RELCOUNT	0x6ffffffa
+#define DT_FLAGS_1	0x6ffffffb
+#define DT_VERDEF	0x6ffffffc
+#define	DT_VERDEFNUM	0x6ffffffd
+#define DT_VERNEED	0x6ffffffe
+#define	DT_VERNEEDNUM	0x6fffffff
+#define OLD_DT_HIOS     0x6fffffff
+#define DT_LOPROC	0x70000000
+#define DT_HIPROC	0x7fffffff
+
+/* This info is needed when parsing the symbol table */
+#define STB_LOCAL  0
+#define STB_GLOBAL 1
+#define STB_WEAK   2
+
+#define STN_UNDEF 0
+
+#define STT_NOTYPE  0
+#define STT_OBJECT  1
+#define STT_FUNC    2
+#define STT_SECTION 3
+#define STT_FILE    4
+#define STT_COMMON  5
+#define STT_TLS     6
+
+#define VER_FLG_BASE 0x1
+#define VER_FLG_WEAK 0x2
+
+#define ELF_ST_BIND(x)		((x) >> 4)
+#define ELF_ST_TYPE(x)		((x) & 0xf)
+#define ELF32_ST_BIND(x)	ELF_ST_BIND(x)
+#define ELF32_ST_TYPE(x)	ELF_ST_TYPE(x)
+#define ELF64_ST_BIND(x)	ELF_ST_BIND(x)
+#define ELF64_ST_TYPE(x)	ELF_ST_TYPE(x)
+
+typedef struct {
+  Elf32_Sword d_tag;
+  union {
+    Elf32_Sword	d_val;
+    Elf32_Addr	d_ptr;
+  } d_un;
+} Elf32_Dyn;
+
+typedef struct {
+  Elf64_Sxword d_tag;		/* entry tag value */
+  union {
+    Elf64_Xword d_val;
+    Elf64_Addr d_ptr;
+  } d_un;
+} Elf64_Dyn;
+
+/* The following are used with relocations */
+#define ELF32_R_SYM(x) ((x) >> 8)
+#define ELF32_R_TYPE(x) ((x) & 0xff)
+
+#define ELF64_R_SYM(i)			((i) >> 32)
+#define ELF64_R_TYPE(i)			((i) & 0xffffffff)
+
+typedef struct elf32_rel {
+  Elf32_Addr	r_offset;
+  Elf32_Word	r_info;
+} Elf32_Rel;
+
+typedef struct elf64_rel {
+  Elf64_Addr r_offset;	/* Location at which to apply the action */
+  Elf64_Xword r_info;	/* index and type of relocation */
+} Elf64_Rel;
+
+typedef struct elf32_rela {
+  Elf32_Addr	r_offset;
+  Elf32_Word	r_info;
+  Elf32_Sword	r_addend;
+} Elf32_Rela;
+
+typedef struct elf64_rela {
+  Elf64_Addr r_offset;	/* Location at which to apply the action */
+  Elf64_Xword r_info;	/* index and type of relocation */
+  Elf64_Sxword r_addend;	/* Constant addend used to compute value */
+} Elf64_Rela;
+
+typedef struct elf32_sym {
+  Elf32_Word	st_name;
+  Elf32_Addr	st_value;
+  Elf32_Word	st_size;
+  unsigned char	st_info;
+  unsigned char	st_other;
+  Elf32_Half	st_shndx;
+} Elf32_Sym;
+
+typedef struct elf64_sym {
+  Elf64_Word st_name;		/* Symbol name, index in string tbl */
+  unsigned char	st_info;	/* Type and binding attributes */
+  unsigned char	st_other;	/* No defined meaning, 0 */
+  Elf64_Half st_shndx;		/* Associated section index */
+  Elf64_Addr st_value;		/* Value of the symbol */
+  Elf64_Xword st_size;		/* Associated symbol size */
+} Elf64_Sym;
+
+
+#define EI_NIDENT	16
+
+typedef struct elf32_hdr {
+  unsigned char	e_ident[EI_NIDENT];
+  Elf32_Half	e_type;
+  Elf32_Half	e_machine;
+  Elf32_Word	e_version;
+  Elf32_Addr	e_entry;  /* Entry point */
+  Elf32_Off	e_phoff;
+  Elf32_Off	e_shoff;
+  Elf32_Word	e_flags;
+  Elf32_Half	e_ehsize;
+  Elf32_Half	e_phentsize;
+  Elf32_Half	e_phnum;
+  Elf32_Half	e_shentsize;
+  Elf32_Half	e_shnum;
+  Elf32_Half	e_shstrndx;
+} Elf32_Ehdr;
+
+typedef struct elf64_hdr {
+  unsigned char	e_ident[EI_NIDENT];	/* ELF "magic number" */
+  Elf64_Half e_type;
+  Elf64_Half e_machine;
+  Elf64_Word e_version;
+  Elf64_Addr e_entry;		/* Entry point virtual address */
+  Elf64_Off e_phoff;		/* Program header table file offset */
+  Elf64_Off e_shoff;		/* Section header table file offset */
+  Elf64_Word e_flags;
+  Elf64_Half e_ehsize;
+  Elf64_Half e_phentsize;
+  Elf64_Half e_phnum;
+  Elf64_Half e_shentsize;
+  Elf64_Half e_shnum;
+  Elf64_Half e_shstrndx;
+} Elf64_Ehdr;
+
+/* These constants define the permissions on sections in the program
+   header, p_flags. */
+#define PF_R		0x4
+#define PF_W		0x2
+#define PF_X		0x1
+
+typedef struct elf32_phdr {
+  Elf32_Word	p_type;
+  Elf32_Off	p_offset;
+  Elf32_Addr	p_vaddr;
+  Elf32_Addr	p_paddr;
+  Elf32_Word	p_filesz;
+  Elf32_Word	p_memsz;
+  Elf32_Word	p_flags;
+  Elf32_Word	p_align;
+} Elf32_Phdr;
+
+typedef struct elf64_phdr {
+  Elf64_Word p_type;
+  Elf64_Word p_flags;
+  Elf64_Off p_offset;		/* Segment file offset */
+  Elf64_Addr p_vaddr;		/* Segment virtual address */
+  Elf64_Addr p_paddr;		/* Segment physical address */
+  Elf64_Xword p_filesz;		/* Segment size in file */
+  Elf64_Xword p_memsz;		/* Segment size in memory */
+  Elf64_Xword p_align;		/* Segment alignment, file & memory */
+} Elf64_Phdr;
+
+/* sh_type */
+#define SHT_NULL	0
+#define SHT_PROGBITS	1
+#define SHT_SYMTAB	2
+#define SHT_STRTAB	3
+#define SHT_RELA	4
+#define SHT_HASH	5
+#define SHT_DYNAMIC	6
+#define SHT_NOTE	7
+#define SHT_NOBITS	8
+#define SHT_REL		9
+#define SHT_SHLIB	10
+#define SHT_DYNSYM	11
+#define SHT_NUM		12
+#define SHT_LOPROC	0x70000000
+#define SHT_HIPROC	0x7fffffff
+#define SHT_LOUSER	0x80000000
+#define SHT_HIUSER	0xffffffff
+
+/* sh_flags */
+#define SHF_WRITE		0x1
+#define SHF_ALLOC		0x2
+#define SHF_EXECINSTR		0x4
+#define SHF_RELA_LIVEPATCH	0x00100000
+#define SHF_RO_AFTER_INIT	0x00200000
+#define SHF_MASKPROC		0xf0000000
+
+/* special section indexes */
+#define SHN_UNDEF	0
+#define SHN_LORESERVE	0xff00
+#define SHN_LOPROC	0xff00
+#define SHN_HIPROC	0xff1f
+#define SHN_LIVEPATCH	0xff20
+#define SHN_ABS		0xfff1
+#define SHN_COMMON	0xfff2
+#define SHN_HIRESERVE	0xffff
+
+typedef struct elf32_shdr {
+  Elf32_Word	sh_name;
+  Elf32_Word	sh_type;
+  Elf32_Word	sh_flags;
+  Elf32_Addr	sh_addr;
+  Elf32_Off	sh_offset;
+  Elf32_Word	sh_size;
+  Elf32_Word	sh_link;
+  Elf32_Word	sh_info;
+  Elf32_Word	sh_addralign;
+  Elf32_Word	sh_entsize;
+} Elf32_Shdr;
+
+typedef struct elf64_shdr {
+  Elf64_Word sh_name;		/* Section name, index in string tbl */
+  Elf64_Word sh_type;		/* Type of section */
+  Elf64_Xword sh_flags;		/* Miscellaneous section attributes */
+  Elf64_Addr sh_addr;		/* Section virtual addr at execution */
+  Elf64_Off sh_offset;		/* Section file offset */
+  Elf64_Xword sh_size;		/* Size of section in bytes */
+  Elf64_Word sh_link;		/* Index of another section */
+  Elf64_Word sh_info;		/* Additional section information */
+  Elf64_Xword sh_addralign;	/* Section alignment */
+  Elf64_Xword sh_entsize;	/* Entry size if section holds table */
+} Elf64_Shdr;
+
+#define	EI_MAG0		0		/* e_ident[] indexes */
+#define	EI_MAG1		1
+#define	EI_MAG2		2
+#define	EI_MAG3		3
+#define	EI_CLASS	4
+#define	EI_DATA		5
+#define	EI_VERSION	6
+#define	EI_OSABI	7
+#define	EI_PAD		8
+
+#define	ELFMAG0		0x7f		/* EI_MAG */
+#define	ELFMAG1		'E'
+#define	ELFMAG2		'L'
+#define	ELFMAG3		'F'
+#define	ELFMAG		"\177ELF"
+#define	SELFMAG		4
+
+#define	ELFCLASSNONE	0		/* EI_CLASS */
+#define	ELFCLASS32	1
+#define	ELFCLASS64	2
+#define	ELFCLASSNUM	3
+
+#define ELFDATANONE	0		/* e_ident[EI_DATA] */
+#define ELFDATA2LSB	1
+#define ELFDATA2MSB	2
+
+#define EV_NONE		0		/* e_version, EI_VERSION */
+#define EV_CURRENT	1
+#define EV_NUM		2
+
+#define ELFOSABI_NONE	0
+#define ELFOSABI_LINUX	3
+
+#ifndef ELF_OSABI
+#define ELF_OSABI ELFOSABI_NONE
+#endif
+
+/*
+ * Notes used in ET_CORE. Architectures export some of the arch register sets
+ * using the corresponding note types via the PTRACE_GETREGSET and
+ * PTRACE_SETREGSET requests.
+ * The note name for these types is "LINUX", except NT_PRFPREG that is named
+ * "CORE".
+ */
+#define NT_PRSTATUS	1
+#define NT_PRFPREG	2
+#define NT_PRPSINFO	3
+#define NT_TASKSTRUCT	4
+#define NT_AUXV		6
+/*
+ * Note to userspace developers: size of NT_SIGINFO note may increase
+ * in the future to accomodate more fields, don't assume it is fixed!
+ */
+#define NT_SIGINFO      0x53494749
+#define NT_FILE		0x46494c45
+#define NT_PRXFPREG     0x46e62b7f      /* copied from gdb5.1/include/elf/common.h */
+#define NT_PPC_VMX	0x100		/* PowerPC Altivec/VMX registers */
+#define NT_PPC_SPE	0x101		/* PowerPC SPE/EVR registers */
+#define NT_PPC_VSX	0x102		/* PowerPC VSX registers */
+#define NT_PPC_TAR	0x103		/* Target Address Register */
+#define NT_PPC_PPR	0x104		/* Program Priority Register */
+#define NT_PPC_DSCR	0x105		/* Data Stream Control Register */
+#define NT_PPC_EBB	0x106		/* Event Based Branch Registers */
+#define NT_PPC_PMU	0x107		/* Performance Monitor Registers */
+#define NT_PPC_TM_CGPR	0x108		/* TM checkpointed GPR Registers */
+#define NT_PPC_TM_CFPR	0x109		/* TM checkpointed FPR Registers */
+#define NT_PPC_TM_CVMX	0x10a		/* TM checkpointed VMX Registers */
+#define NT_PPC_TM_CVSX	0x10b		/* TM checkpointed VSX Registers */
+#define NT_PPC_TM_SPR	0x10c		/* TM Special Purpose Registers */
+#define NT_PPC_TM_CTAR	0x10d		/* TM checkpointed Target Address Register */
+#define NT_PPC_TM_CPPR	0x10e		/* TM checkpointed Program Priority Register */
+#define NT_PPC_TM_CDSCR	0x10f		/* TM checkpointed Data Stream Control Register */
+#define NT_PPC_PKEY	0x110		/* Memory Protection Keys registers */
+#define NT_PPC_DEXCR	0x111		/* PowerPC DEXCR registers */
+#define NT_PPC_HASHKEYR	0x112		/* PowerPC HASHKEYR register */
+#define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
+#define NT_386_IOPERM	0x201		/* x86 io permission bitmap (1=deny) */
+#define NT_X86_XSTATE	0x202		/* x86 extended state using xsave */
+/* Old binutils treats 0x203 as a CET state */
+#define NT_X86_SHSTK	0x204		/* x86 SHSTK state */
+#define NT_X86_XSAVE_LAYOUT	0x205	/* XSAVE layout description */
+#define NT_S390_HIGH_GPRS	0x300	/* s390 upper register halves */
+#define NT_S390_TIMER	0x301		/* s390 timer register */
+#define NT_S390_TODCMP	0x302		/* s390 TOD clock comparator register */
+#define NT_S390_TODPREG	0x303		/* s390 TOD programmable register */
+#define NT_S390_CTRS	0x304		/* s390 control registers */
+#define NT_S390_PREFIX	0x305		/* s390 prefix register */
+#define NT_S390_LAST_BREAK	0x306	/* s390 breaking event address */
+#define NT_S390_SYSTEM_CALL	0x307	/* s390 system call restart data */
+#define NT_S390_TDB	0x308		/* s390 transaction diagnostic block */
+#define NT_S390_VXRS_LOW	0x309	/* s390 vector registers 0-15 upper half */
+#define NT_S390_VXRS_HIGH	0x30a	/* s390 vector registers 16-31 */
+#define NT_S390_GS_CB	0x30b		/* s390 guarded storage registers */
+#define NT_S390_GS_BC	0x30c		/* s390 guarded storage broadcast control block */
+#define NT_S390_RI_CB	0x30d		/* s390 runtime instrumentation */
+#define NT_S390_PV_CPU_DATA	0x30e	/* s390 protvirt cpu dump data */
+#define NT_ARM_VFP	0x400		/* ARM VFP/NEON registers */
+#define NT_ARM_TLS	0x401		/* ARM TLS register */
+#define NT_ARM_HW_BREAK	0x402		/* ARM hardware breakpoint registers */
+#define NT_ARM_HW_WATCH	0x403		/* ARM hardware watchpoint registers */
+#define NT_ARM_SYSTEM_CALL	0x404	/* ARM system call number */
+#define NT_ARM_SVE	0x405		/* ARM Scalable Vector Extension registers */
+#define NT_ARM_PAC_MASK		0x406	/* ARM pointer authentication code masks */
+#define NT_ARM_PACA_KEYS	0x407	/* ARM pointer authentication address keys */
+#define NT_ARM_PACG_KEYS	0x408	/* ARM pointer authentication generic key */
+#define NT_ARM_TAGGED_ADDR_CTRL	0x409	/* arm64 tagged address control (prctl()) */
+#define NT_ARM_PAC_ENABLED_KEYS	0x40a	/* arm64 ptr auth enabled keys (prctl()) */
+#define NT_ARM_SSVE	0x40b		/* ARM Streaming SVE registers */
+#define NT_ARM_ZA	0x40c		/* ARM SME ZA registers */
+#define NT_ARM_ZT	0x40d		/* ARM SME ZT registers */
+#define NT_ARM_FPMR	0x40e		/* ARM floating point mode register */
+#define NT_ARM_POE	0x40f		/* ARM POE registers */
+#define NT_ARM_GCS	0x410		/* ARM GCS state */
+#define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
+#define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
+#define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
+#define NT_MIPS_FP_MODE	0x801		/* MIPS floating-point mode */
+#define NT_MIPS_MSA	0x802		/* MIPS SIMD registers */
+#define NT_RISCV_CSR	0x900		/* RISC-V Control and Status Registers */
+#define NT_RISCV_VECTOR	0x901		/* RISC-V vector registers */
+#define NT_RISCV_TAGGED_ADDR_CTRL 0x902	/* RISC-V tagged address control (prctl()) */
+#define NT_LOONGARCH_CPUCFG	0xa00	/* LoongArch CPU config registers */
+#define NT_LOONGARCH_CSR	0xa01	/* LoongArch control and status registers */
+#define NT_LOONGARCH_LSX	0xa02	/* LoongArch Loongson SIMD Extension registers */
+#define NT_LOONGARCH_LASX	0xa03	/* LoongArch Loongson Advanced SIMD Extension registers */
+#define NT_LOONGARCH_LBT	0xa04	/* LoongArch Loongson Binary Translation registers */
+#define NT_LOONGARCH_HW_BREAK	0xa05   /* LoongArch hardware breakpoint registers */
+#define NT_LOONGARCH_HW_WATCH	0xa06   /* LoongArch hardware watchpoint registers */
+
+/* Note types with note name "GNU" */
+#define NT_GNU_PROPERTY_TYPE_0	5
+
+/* Note header in a PT_NOTE section */
+typedef struct elf32_note {
+  Elf32_Word	n_namesz;	/* Name size */
+  Elf32_Word	n_descsz;	/* Content size */
+  Elf32_Word	n_type;		/* Content type */
+} Elf32_Nhdr;
+
+/* Note header in a PT_NOTE section */
+typedef struct elf64_note {
+  Elf64_Word n_namesz;	/* Name size */
+  Elf64_Word n_descsz;	/* Content size */
+  Elf64_Word n_type;	/* Content type */
+} Elf64_Nhdr;
+
+/* .note.gnu.property types for EM_AARCH64: */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND	0xc0000000
+
+/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI	(1U << 0)
+
+typedef struct {
+  Elf32_Half	vd_version;
+  Elf32_Half	vd_flags;
+  Elf32_Half	vd_ndx;
+  Elf32_Half	vd_cnt;
+  Elf32_Word	vd_hash;
+  Elf32_Word	vd_aux;
+  Elf32_Word	vd_next;
+} Elf32_Verdef;
+
+typedef struct {
+  Elf64_Half	vd_version;
+  Elf64_Half	vd_flags;
+  Elf64_Half	vd_ndx;
+  Elf64_Half	vd_cnt;
+  Elf64_Word	vd_hash;
+  Elf64_Word	vd_aux;
+  Elf64_Word	vd_next;
+} Elf64_Verdef;
+
+typedef struct {
+  Elf32_Word    vda_name;
+  Elf32_Word    vda_next;
+} Elf32_Verdaux;
+
+typedef struct {
+  Elf64_Word    vda_name;
+  Elf64_Word    vda_next;
+} Elf64_Verdaux;
+
+#endif /* _LINUX_ELF_H */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 8516c1ccd57a..7e46ca4cd31b 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -1315,6 +1315,8 @@ enum {
 	IFLA_NETKIT_MODE,
 	IFLA_NETKIT_SCRUB,
 	IFLA_NETKIT_PEER_SCRUB,
+	IFLA_NETKIT_HEADROOM,
+	IFLA_NETKIT_TAILROOM,
 	__IFLA_NETKIT_MAX,
 };
 #define IFLA_NETKIT_MAX	(__IFLA_NETKIT_MAX - 1)
diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h
index 2f082b01ff22..42ec5ddaab8d 100644
--- a/tools/include/uapi/linux/if_xdp.h
+++ b/tools/include/uapi/linux/if_xdp.h
@@ -117,12 +117,12 @@ struct xdp_options {
 	((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
 
 /* Request transmit timestamp. Upon completion, put it into tx_timestamp
- * field of union xsk_tx_metadata.
+ * field of struct xsk_tx_metadata.
  */
 #define XDP_TXMD_FLAGS_TIMESTAMP		(1 << 0)
 
 /* Request transmit checksum offload. Checksum start position and offset
- * are communicated via csum_start and csum_offset fields of union
+ * are communicated via csum_start and csum_offset fields of struct
  * xsk_tx_metadata.
  */
 #define XDP_TXMD_FLAGS_CHECKSUM			(1 << 1)
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 337fde770e45..edec23406dbc 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -296,7 +296,7 @@ int filename__read_int(const char *filename, int *value)
 	int fd = open(filename, O_RDONLY), err = -1;
 
 	if (fd < 0)
-		return -1;
+		return -errno;
 
 	if (read(fd, line, sizeof(line)) > 0) {
 		*value = atoi(line);
@@ -314,7 +314,7 @@ static int filename__read_ull_base(const char *filename,
 	int fd = open(filename, O_RDONLY), err = -1;
 
 	if (fd < 0)
-		return -1;
+		return -errno;
 
 	if (read(fd, line, sizeof(line)) > 0) {
 		*value = strtoull(line, NULL, base);
@@ -372,7 +372,7 @@ int filename__write_int(const char *filename, int value)
 	char buf[64];
 
 	if (fd < 0)
-		return err;
+		return -errno;
 
 	sprintf(buf, "%d", value);
 	if (write(fd, buf, sizeof(buf)) == sizeof(buf))
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 857a5f7b413d..168140f8e646 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -53,13 +53,6 @@ include $(srctree)/tools/scripts/Makefile.include
 
 # copy a bit from Linux kbuild
 
-ifeq ("$(origin V)", "command line")
-  VERBOSE = $(V)
-endif
-ifndef VERBOSE
-  VERBOSE = 0
-endif
-
 INCLUDES = -I$(or $(OUTPUT),.) \
 	   -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi \
 	   -I$(srctree)/tools/arch/$(SRCARCH)/include
@@ -96,12 +89,6 @@ override CFLAGS += $(CLANG_CROSS_FLAGS)
 # flags specific for shared library
 SHLIB_FLAGS := -DSHARED -fPIC
 
-ifeq ($(VERBOSE),1)
-  Q =
-else
-  Q = @
-endif
-
 # Disable command line variables (CFLAGS) override from top
 # level Makefile (perf), otherwise build Makefile will get
 # the same command line setup.
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index becdfa701c75..359f73ead613 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -238,7 +238,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 		  const struct bpf_insn *insns, size_t insn_cnt,
 		  struct bpf_prog_load_opts *opts)
 {
-	const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd);
+	const size_t attr_sz = offsetofend(union bpf_attr, fd_array_cnt);
 	void *finfo = NULL, *linfo = NULL;
 	const char *func_info, *line_info;
 	__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
@@ -311,6 +311,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 	attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0);
 
 	attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL));
+	attr.fd_array_cnt = OPTS_GET(opts, fd_array_cnt, 0);
 
 	if (log_level) {
 		attr.log_buf = ptr_to_u64(log_buf);
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index a4a7b1ad1b63..435da95d2058 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -107,9 +107,12 @@ struct bpf_prog_load_opts {
 	 */
 	__u32 log_true_size;
 	__u32 token_fd;
+
+	/* if set, provides the length of fd_array */
+	__u32 fd_array_cnt;
 	size_t :0;
 };
-#define bpf_prog_load_opts__last_field token_fd
+#define bpf_prog_load_opts__last_field fd_array_cnt
 
 LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type,
 			     const char *prog_name, const char *license,
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 12468ae0d573..48c66f3a9200 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -283,7 +283,7 @@ static int btf_parse_str_sec(struct btf *btf)
 		return -EINVAL;
 	}
 	if (!btf->base_btf && start[0]) {
-		pr_debug("Invalid BTF string section\n");
+		pr_debug("Malformed BTF string section, did you forget to provide base BTF?\n");
 		return -EINVAL;
 	}
 	return 0;
@@ -1186,6 +1186,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
 
 	elf = elf_begin(fd, ELF_C_READ, NULL);
 	if (!elf) {
+		err = -LIBBPF_ERRNO__FORMAT;
 		pr_warn("failed to open %s as ELF file\n", path);
 		goto done;
 	}
diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c
index b72f83e15156..53d1f3541bce 100644
--- a/tools/lib/bpf/btf_relocate.c
+++ b/tools/lib/bpf/btf_relocate.c
@@ -212,7 +212,7 @@ static int btf_relocate_map_distilled_base(struct btf_relocate *r)
 	 * need to match both name and size, otherwise embedding the base
 	 * struct/union in the split type is invalid.
 	 */
-	for (id = r->nr_dist_base_types; id < r->nr_split_types; id++) {
+	for (id = r->nr_dist_base_types; id < r->nr_dist_base_types + r->nr_split_types; id++) {
 		err = btf_mark_embedded_composite_type_ids(r, id);
 		if (err)
 			goto done;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 66173ddb5a2d..194809da5172 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1731,12 +1731,24 @@ static int sys_memfd_create(const char *name, unsigned flags)
 #ifndef MFD_CLOEXEC
 #define MFD_CLOEXEC 0x0001U
 #endif
+#ifndef MFD_NOEXEC_SEAL
+#define MFD_NOEXEC_SEAL 0x0008U
+#endif
 
 static int create_placeholder_fd(void)
 {
+	unsigned int flags = MFD_CLOEXEC | MFD_NOEXEC_SEAL;
+	const char *name = "libbpf-placeholder-fd";
 	int fd;
 
-	fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
+	fd = ensure_good_fd(sys_memfd_create(name, flags));
+	if (fd >= 0)
+		return fd;
+	else if (errno != EINVAL)
+		return -errno;
+
+	/* Possibly running on kernel without MFD_NOEXEC_SEAL */
+	fd = ensure_good_fd(sys_memfd_create(name, flags & ~MFD_NOEXEC_SEAL));
 	if (fd < 0)
 		return -errno;
 	return fd;
@@ -11375,9 +11387,33 @@ static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
 	struct kprobe_multi_resolve *res = data->res;
 	int err;
 
-	if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp))
+	if (!glob_match(sym_name, res->pattern))
 		return 0;
 
+	if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp)) {
+		/* Some versions of kernel strip out .llvm.<hash> suffix from
+		 * function names reported in available_filter_functions, but
+		 * don't do so for kallsyms. While this is clearly a kernel
+		 * bug (fixed by [0]) we try to accommodate that in libbpf to
+		 * make multi-kprobe usability a bit better: if no match is
+		 * found, we will strip .llvm. suffix and try one more time.
+		 *
+		 *   [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG")
+		 */
+		char sym_trim[256], *psym_trim = sym_trim, *sym_sfx;
+
+		if (!(sym_sfx = strstr(sym_name, ".llvm.")))
+			return 0;
+
+		/* psym_trim vs sym_trim dance is done to avoid pointer vs array
+		 * coercion differences and get proper `const char **` pointer
+		 * which avail_func_cmp() expects
+		 */
+		snprintf(sym_trim, sizeof(sym_trim), "%.*s", (int)(sym_sfx - sym_name), sym_name);
+		if (!bsearch(&psym_trim, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp))
+			return 0;
+	}
+
 	err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1);
 	if (err)
 		return err;
@@ -11522,7 +11558,7 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
 	struct bpf_link *link = NULL;
 	const unsigned long *addrs;
 	int err, link_fd, prog_fd;
-	bool retprobe, session;
+	bool retprobe, session, unique_match;
 	const __u64 *cookies;
 	const char **syms;
 	size_t cnt;
@@ -11541,6 +11577,7 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
 	addrs   = OPTS_GET(opts, addrs, false);
 	cnt     = OPTS_GET(opts, cnt, false);
 	cookies = OPTS_GET(opts, cookies, false);
+	unique_match = OPTS_GET(opts, unique_match, false);
 
 	if (!pattern && !addrs && !syms)
 		return libbpf_err_ptr(-EINVAL);
@@ -11548,6 +11585,8 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
 		return libbpf_err_ptr(-EINVAL);
 	if (!pattern && !cnt)
 		return libbpf_err_ptr(-EINVAL);
+	if (!pattern && unique_match)
+		return libbpf_err_ptr(-EINVAL);
 	if (addrs && syms)
 		return libbpf_err_ptr(-EINVAL);
 
@@ -11558,6 +11597,14 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
 			err = libbpf_available_kallsyms_parse(&res);
 		if (err)
 			goto error;
+
+		if (unique_match && res.cnt != 1) {
+			pr_warn("prog '%s': failed to find a unique match for '%s' (%zu matches)\n",
+				prog->name, pattern, res.cnt);
+			err = -EINVAL;
+			goto error;
+		}
+
 		addrs = res.addrs;
 		cnt = res.cnt;
 	}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index b2ce3a72b11d..3020ee45303a 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -552,10 +552,12 @@ struct bpf_kprobe_multi_opts {
 	bool retprobe;
 	/* create session kprobes */
 	bool session;
+	/* enforce unique match */
+	bool unique_match;
 	size_t :0;
 };
 
-#define bpf_kprobe_multi_opts__last_field session
+#define bpf_kprobe_multi_opts__last_field unique_match
 
 LIBBPF_API struct bpf_link *
 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
@@ -1796,9 +1798,14 @@ struct bpf_linker_file_opts {
 struct bpf_linker;
 
 LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts);
+LIBBPF_API struct bpf_linker *bpf_linker__new_fd(int fd, struct bpf_linker_opts *opts);
 LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker,
 				    const char *filename,
 				    const struct bpf_linker_file_opts *opts);
+LIBBPF_API int bpf_linker__add_fd(struct bpf_linker *linker, int fd,
+				  const struct bpf_linker_file_opts *opts);
+LIBBPF_API int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz,
+				   const struct bpf_linker_file_opts *opts);
 LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker);
 LIBBPF_API void bpf_linker__free(struct bpf_linker *linker);
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 54b6f312cfa8..a8b2936a1646 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -432,4 +432,8 @@ LIBBPF_1.5.0 {
 } LIBBPF_1.4.0;
 
 LIBBPF_1.6.0 {
+	global:
+		bpf_linker__add_buf;
+		bpf_linker__add_fd;
+		bpf_linker__new_fd;
 } LIBBPF_1.5.0;
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index cf71d149fe26..b52f71c59616 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -4,6 +4,10 @@
  *
  * Copyright (c) 2021 Facebook
  */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdio.h>
@@ -16,6 +20,7 @@
 #include <elf.h>
 #include <libelf.h>
 #include <fcntl.h>
+#include <sys/mman.h>
 #include "libbpf.h"
 #include "btf.h"
 #include "libbpf_internal.h"
@@ -152,15 +157,19 @@ struct bpf_linker {
 	/* global (including extern) ELF symbols */
 	int glob_sym_cnt;
 	struct glob_sym *glob_syms;
+
+	bool fd_is_owned;
 };
 
 #define pr_warn_elf(fmt, ...)									\
 	libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1))
 
-static int init_output_elf(struct bpf_linker *linker, const char *file);
+static int init_output_elf(struct bpf_linker *linker);
 
-static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
-				const struct bpf_linker_file_opts *opts,
+static int bpf_linker_add_file(struct bpf_linker *linker, int fd,
+			       const char *filename);
+
+static int linker_load_obj_file(struct bpf_linker *linker,
 				struct src_obj *obj);
 static int linker_sanity_check_elf(struct src_obj *obj);
 static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec);
@@ -191,7 +200,7 @@ void bpf_linker__free(struct bpf_linker *linker)
 	if (linker->elf)
 		elf_end(linker->elf);
 
-	if (linker->fd >= 0)
+	if (linker->fd >= 0 && linker->fd_is_owned)
 		close(linker->fd);
 
 	strset__free(linker->strtab_strs);
@@ -233,9 +242,63 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts
 	if (!linker)
 		return errno = ENOMEM, NULL;
 
-	linker->fd = -1;
+	linker->filename = strdup(filename);
+	if (!linker->filename) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	linker->fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644);
+	if (linker->fd < 0) {
+		err = -errno;
+		pr_warn("failed to create '%s': %d\n", filename, err);
+		goto err_out;
+	}
+	linker->fd_is_owned = true;
+
+	err = init_output_elf(linker);
+	if (err)
+		goto err_out;
+
+	return linker;
+
+err_out:
+	bpf_linker__free(linker);
+	return errno = -err, NULL;
+}
+
+struct bpf_linker *bpf_linker__new_fd(int fd, struct bpf_linker_opts *opts)
+{
+	struct bpf_linker *linker;
+	char filename[32];
+	int err;
+
+	if (fd < 0)
+		return errno = EINVAL, NULL;
+
+	if (!OPTS_VALID(opts, bpf_linker_opts))
+		return errno = EINVAL, NULL;
+
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		pr_warn_elf("libelf initialization failed");
+		return errno = EINVAL, NULL;
+	}
 
-	err = init_output_elf(linker, filename);
+	linker = calloc(1, sizeof(*linker));
+	if (!linker)
+		return errno = ENOMEM, NULL;
+
+	snprintf(filename, sizeof(filename), "fd:%d", fd);
+	linker->filename = strdup(filename);
+	if (!linker->filename) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	linker->fd = fd;
+	linker->fd_is_owned = false;
+
+	err = init_output_elf(linker);
 	if (err)
 		goto err_out;
 
@@ -294,23 +357,12 @@ static Elf64_Sym *add_new_sym(struct bpf_linker *linker, size_t *sym_idx)
 	return sym;
 }
 
-static int init_output_elf(struct bpf_linker *linker, const char *file)
+static int init_output_elf(struct bpf_linker *linker)
 {
 	int err, str_off;
 	Elf64_Sym *init_sym;
 	struct dst_sec *sec;
 
-	linker->filename = strdup(file);
-	if (!linker->filename)
-		return -ENOMEM;
-
-	linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644);
-	if (linker->fd < 0) {
-		err = -errno;
-		pr_warn("failed to create '%s': %s\n", file, errstr(err));
-		return err;
-	}
-
 	linker->elf = elf_begin(linker->fd, ELF_C_WRITE, NULL);
 	if (!linker->elf) {
 		pr_warn_elf("failed to create ELF object");
@@ -436,19 +488,16 @@ static int init_output_elf(struct bpf_linker *linker, const char *file)
 	return 0;
 }
 
-int bpf_linker__add_file(struct bpf_linker *linker, const char *filename,
-			 const struct bpf_linker_file_opts *opts)
+static int bpf_linker_add_file(struct bpf_linker *linker, int fd,
+			       const char *filename)
 {
 	struct src_obj obj = {};
 	int err = 0;
 
-	if (!OPTS_VALID(opts, bpf_linker_file_opts))
-		return libbpf_err(-EINVAL);
-
-	if (!linker->elf)
-		return libbpf_err(-EINVAL);
+	obj.filename = filename;
+	obj.fd = fd;
 
-	err = err ?: linker_load_obj_file(linker, filename, opts, &obj);
+	err = err ?: linker_load_obj_file(linker, &obj);
 	err = err ?: linker_append_sec_data(linker, &obj);
 	err = err ?: linker_append_elf_syms(linker, &obj);
 	err = err ?: linker_append_elf_relos(linker, &obj);
@@ -463,12 +512,91 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename,
 	free(obj.sym_map);
 	if (obj.elf)
 		elf_end(obj.elf);
-	if (obj.fd >= 0)
-		close(obj.fd);
 
+	return err;
+}
+
+int bpf_linker__add_file(struct bpf_linker *linker, const char *filename,
+			 const struct bpf_linker_file_opts *opts)
+{
+	int fd, err;
+
+	if (!OPTS_VALID(opts, bpf_linker_file_opts))
+		return libbpf_err(-EINVAL);
+
+	if (!linker->elf)
+		return libbpf_err(-EINVAL);
+
+	fd = open(filename, O_RDONLY | O_CLOEXEC);
+	if (fd < 0) {
+		err = -errno;
+		pr_warn("failed to open file '%s': %s\n", filename, errstr(err));
+		return libbpf_err(err);
+	}
+
+	err = bpf_linker_add_file(linker, fd, filename);
+	close(fd);
 	return libbpf_err(err);
 }
 
+int bpf_linker__add_fd(struct bpf_linker *linker, int fd,
+		       const struct bpf_linker_file_opts *opts)
+{
+	char filename[32];
+	int err;
+
+	if (!OPTS_VALID(opts, bpf_linker_file_opts))
+		return libbpf_err(-EINVAL);
+
+	if (!linker->elf)
+		return libbpf_err(-EINVAL);
+
+	if (fd < 0)
+		return libbpf_err(-EINVAL);
+
+	snprintf(filename, sizeof(filename), "fd:%d", fd);
+	err = bpf_linker_add_file(linker, fd, filename);
+	return libbpf_err(err);
+}
+
+int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz,
+			const struct bpf_linker_file_opts *opts)
+{
+	char filename[32];
+	int fd, written, ret;
+
+	if (!OPTS_VALID(opts, bpf_linker_file_opts))
+		return libbpf_err(-EINVAL);
+
+	if (!linker->elf)
+		return libbpf_err(-EINVAL);
+
+	snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz);
+
+	fd = memfd_create(filename, 0);
+	if (fd < 0) {
+		ret = -errno;
+		pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret));
+		return libbpf_err(ret);
+	}
+
+	written = 0;
+	while (written < buf_sz) {
+		ret = write(fd, buf, buf_sz);
+		if (ret < 0) {
+			ret = -errno;
+			pr_warn("failed to write '%s': %s\n", filename, errstr(ret));
+			goto err_out;
+		}
+		written += ret;
+	}
+
+	ret = bpf_linker_add_file(linker, fd, filename);
+err_out:
+	close(fd);
+	return libbpf_err(ret);
+}
+
 static bool is_dwarf_sec_name(const char *name)
 {
 	/* approximation, but the actual list is too long */
@@ -534,8 +662,7 @@ static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name)
 	return sec;
 }
 
-static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
-				const struct bpf_linker_file_opts *opts,
+static int linker_load_obj_file(struct bpf_linker *linker,
 				struct src_obj *obj)
 {
 	int err = 0;
@@ -554,36 +681,26 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
 #error "Unknown __BYTE_ORDER__"
 #endif
 
-	pr_debug("linker: adding object file '%s'...\n", filename);
-
-	obj->filename = filename;
+	pr_debug("linker: adding object file '%s'...\n", obj->filename);
 
-	obj->fd = open(filename, O_RDONLY | O_CLOEXEC);
-	if (obj->fd < 0) {
-		err = -errno;
-		pr_warn("failed to open file '%s': %s\n", filename, errstr(err));
-		return err;
-	}
 	obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL);
 	if (!obj->elf) {
-		err = -errno;
-		pr_warn_elf("failed to parse ELF file '%s'", filename);
-		return err;
+		pr_warn_elf("failed to parse ELF file '%s'", obj->filename);
+		return -EINVAL;
 	}
 
 	/* Sanity check ELF file high-level properties */
 	ehdr = elf64_getehdr(obj->elf);
 	if (!ehdr) {
-		err = -errno;
-		pr_warn_elf("failed to get ELF header for %s", filename);
-		return err;
+		pr_warn_elf("failed to get ELF header for %s", obj->filename);
+		return -EINVAL;
 	}
 
 	/* Linker output endianness set by first input object */
 	obj_byteorder = ehdr->e_ident[EI_DATA];
 	if (obj_byteorder != ELFDATA2LSB && obj_byteorder != ELFDATA2MSB) {
 		err = -EOPNOTSUPP;
-		pr_warn("unknown byte order of ELF file %s\n", filename);
+		pr_warn("unknown byte order of ELF file %s\n", obj->filename);
 		return err;
 	}
 	if (link_byteorder == ELFDATANONE) {
@@ -593,7 +710,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
 			 obj_byteorder == ELFDATA2MSB ? "big" : "little");
 	} else if (link_byteorder != obj_byteorder) {
 		err = -EOPNOTSUPP;
-		pr_warn("byte order mismatch with ELF file %s\n", filename);
+		pr_warn("byte order mismatch with ELF file %s\n", obj->filename);
 		return err;
 	}
 
@@ -601,14 +718,13 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
 	    || ehdr->e_machine != EM_BPF
 	    || ehdr->e_ident[EI_CLASS] != ELFCLASS64) {
 		err = -EOPNOTSUPP;
-		pr_warn_elf("unsupported kind of ELF file %s", filename);
+		pr_warn_elf("unsupported kind of ELF file %s", obj->filename);
 		return err;
 	}
 
 	if (elf_getshdrstrndx(obj->elf, &obj->shstrs_sec_idx)) {
-		err = -errno;
-		pr_warn_elf("failed to get SHSTRTAB section index for %s", filename);
-		return err;
+		pr_warn_elf("failed to get SHSTRTAB section index for %s", obj->filename);
+		return -EINVAL;
 	}
 
 	scn = NULL;
@@ -618,26 +734,23 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
 
 		shdr = elf64_getshdr(scn);
 		if (!shdr) {
-			err = -errno;
 			pr_warn_elf("failed to get section #%zu header for %s",
-				    sec_idx, filename);
-			return err;
+				    sec_idx, obj->filename);
+			return -EINVAL;
 		}
 
 		sec_name = elf_strptr(obj->elf, obj->shstrs_sec_idx, shdr->sh_name);
 		if (!sec_name) {
-			err = -errno;
 			pr_warn_elf("failed to get section #%zu name for %s",
-				    sec_idx, filename);
-			return err;
+				    sec_idx, obj->filename);
+			return -EINVAL;
 		}
 
 		data = elf_getdata(scn, 0);
 		if (!data) {
-			err = -errno;
 			pr_warn_elf("failed to get section #%zu (%s) data from %s",
-				    sec_idx, sec_name, filename);
-			return err;
+				    sec_idx, sec_name, obj->filename);
+			return -EINVAL;
 		}
 
 		sec = add_src_sec(obj, sec_name);
@@ -672,7 +785,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
 				err = libbpf_get_error(obj->btf);
 				if (err) {
 					pr_warn("failed to parse .BTF from %s: %s\n",
-						filename, errstr(err));
+						obj->filename, errstr(err));
 					return err;
 				}
 				sec->skipped = true;
@@ -683,7 +796,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
 				err = libbpf_get_error(obj->btf_ext);
 				if (err) {
 					pr_warn("failed to parse .BTF.ext from '%s': %s\n",
-						filename, errstr(err));
+						obj->filename, errstr(err));
 					return err;
 				}
 				sec->skipped = true;
@@ -700,7 +813,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename,
 			break;
 		default:
 			pr_warn("unrecognized section #%zu (%s) in %s\n",
-				sec_idx, sec_name, filename);
+				sec_idx, sec_name, obj->filename);
 			err = -EINVAL;
 			return err;
 		}
@@ -2680,22 +2793,23 @@ int bpf_linker__finalize(struct bpf_linker *linker)
 
 	/* Finalize ELF layout */
 	if (elf_update(linker->elf, ELF_C_NULL) < 0) {
-		err = -errno;
+		err = -EINVAL;
 		pr_warn_elf("failed to finalize ELF layout");
 		return libbpf_err(err);
 	}
 
 	/* Write out final ELF contents */
 	if (elf_update(linker->elf, ELF_C_WRITE) < 0) {
-		err = -errno;
+		err = -EINVAL;
 		pr_warn_elf("failed to write ELF contents");
 		return libbpf_err(err);
 	}
 
 	elf_end(linker->elf);
-	close(linker->fd);
-
 	linker->elf = NULL;
+
+	if (linker->fd_is_owned)
+		close(linker->fd);
 	linker->fd = -1;
 
 	return 0;
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index 5f085736c6c4..4e4a52742b01 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -661,7 +661,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
 		 *   [0] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
 		 */
 		usdt_abs_ip = note.loc_addr;
-		if (base_addr)
+		if (base_addr && note.base_addr)
 			usdt_abs_ip += base_addr - note.base_addr;
 
 		/* When attaching uprobes (which is what USDTs basically are)
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index fcfb9499ef9c..59aabdd3cabf 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -39,7 +39,6 @@ SYNOPSIS
 
   struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
   struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
-  struct perf_cpu_map *perf_cpu_map__read(FILE *file);
   struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
   struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
                                            struct perf_cpu_map *other);
diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index 3a9b2140aa04..e9a7ac2c062e 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative)
 libdir_SQ = $(subst ','\'',$(libdir))
 libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
 
-ifeq ("$(origin V)", "command line")
-  VERBOSE = $(V)
-endif
-ifndef VERBOSE
-  VERBOSE = 0
-endif
-
-ifeq ($(VERBOSE),1)
-  Q =
-else
-  Q = @
-endif
-
 TEST_ARGS := $(if $(V),-v)
 
 # Set compile option CFLAGS
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index cae799ad44e1..fcc47214062a 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
 #include <perf/cpumap.h>
 #include <stdlib.h>
 #include <linux/refcount.h>
@@ -10,6 +11,9 @@
 #include <ctype.h>
 #include <limits.h>
 #include "internal.h"
+#include <api/fs/fs.h>
+
+#define MAX_NR_CPUS 4096
 
 void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus)
 {
@@ -100,12 +104,12 @@ static struct perf_cpu_map *cpu_map__new_sysconf(void)
 static struct perf_cpu_map *cpu_map__new_sysfs_online(void)
 {
 	struct perf_cpu_map *cpus = NULL;
-	FILE *onlnf;
+	char *buf = NULL;
+	size_t buf_len;
 
-	onlnf = fopen("/sys/devices/system/cpu/online", "r");
-	if (onlnf) {
-		cpus = perf_cpu_map__read(onlnf);
-		fclose(onlnf);
+	if (sysfs__read_str("devices/system/cpu/online", &buf, &buf_len) >= 0) {
+		cpus = perf_cpu_map__new(buf);
+		free(buf);
 	}
 	return cpus;
 }
@@ -158,62 +162,6 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu
 	return cpus;
 }
 
-struct perf_cpu_map *perf_cpu_map__read(FILE *file)
-{
-	struct perf_cpu_map *cpus = NULL;
-	int nr_cpus = 0;
-	struct perf_cpu *tmp_cpus = NULL, *tmp;
-	int max_entries = 0;
-	int n, cpu, prev;
-	char sep;
-
-	sep = 0;
-	prev = -1;
-	for (;;) {
-		n = fscanf(file, "%u%c", &cpu, &sep);
-		if (n <= 0)
-			break;
-		if (prev >= 0) {
-			int new_max = nr_cpus + cpu - prev - 1;
-
-			WARN_ONCE(new_max >= MAX_NR_CPUS, "Perf can support %d CPUs. "
-							  "Consider raising MAX_NR_CPUS\n", MAX_NR_CPUS);
-
-			if (new_max >= max_entries) {
-				max_entries = new_max + MAX_NR_CPUS / 2;
-				tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
-				if (tmp == NULL)
-					goto out_free_tmp;
-				tmp_cpus = tmp;
-			}
-
-			while (++prev < cpu)
-				tmp_cpus[nr_cpus++].cpu = prev;
-		}
-		if (nr_cpus == max_entries) {
-			max_entries += MAX_NR_CPUS;
-			tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
-			if (tmp == NULL)
-				goto out_free_tmp;
-			tmp_cpus = tmp;
-		}
-
-		tmp_cpus[nr_cpus++].cpu = cpu;
-		if (n == 2 && sep == '-')
-			prev = cpu;
-		else
-			prev = -1;
-		if (n == 1 || sep == '\n')
-			break;
-	}
-
-	if (nr_cpus > 0)
-		cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
-out_free_tmp:
-	free(tmp_cpus);
-	return cpus;
-}
-
 struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 {
 	struct perf_cpu_map *cpus = NULL;
@@ -238,7 +186,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 		p = NULL;
 		start_cpu = strtoul(cpu_list, &p, 0);
 		if (start_cpu >= INT_MAX
-		    || (*p != '\0' && *p != ',' && *p != '-'))
+		    || (*p != '\0' && *p != ',' && *p != '-' && *p != '\n'))
 			goto invalid;
 
 		if (*p == '-') {
@@ -246,7 +194,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 			p = NULL;
 			end_cpu = strtoul(cpu_list, &p, 0);
 
-			if (end_cpu >= INT_MAX || (*p != '\0' && *p != ','))
+			if (end_cpu >= INT_MAX || (*p != '\0' && *p != ',' && *p != '\n'))
 				goto invalid;
 
 			if (end_cpu < start_cpu)
@@ -265,7 +213,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 					goto invalid;
 
 			if (nr_cpus == max_entries) {
-				max_entries += MAX_NR_CPUS;
+				max_entries += max(end_cpu - start_cpu + 1, 16UL);
 				tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
 				if (tmp == NULL)
 					goto invalid;
@@ -279,14 +227,15 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
 		cpu_list = p;
 	}
 
-	if (nr_cpus > 0)
+	if (nr_cpus > 0) {
 		cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
-	else if (*cpu_list != '\0') {
+	} else if (*cpu_list != '\0') {
 		pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.",
 			   cpu_list);
 		cpus = perf_cpu_map__new_online_cpus();
-	} else
+	} else {
 		cpus = perf_cpu_map__new_any_cpu();
+	}
 invalid:
 	free(tmp_cpus);
 out:
@@ -436,46 +385,49 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu
 }
 
 /*
- * Merge two cpumaps
+ * Merge two cpumaps.
+ *
+ * If 'other' is subset of '*orig', '*orig' keeps itself with no reference count
+ * change (similar to "realloc").
+ *
+ * If '*orig' is subset of 'other', '*orig' reuses 'other' with its reference
+ * count increased.
  *
- * orig either gets freed and replaced with a new map, or reused
- * with no reference count change (similar to "realloc")
- * other has its reference count increased.
+ * Otherwise, '*orig' gets freed and replaced with a new map.
  */
-
-struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
-					 struct perf_cpu_map *other)
+int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other)
 {
 	struct perf_cpu *tmp_cpus;
 	int tmp_len;
 	int i, j, k;
 	struct perf_cpu_map *merged;
 
-	if (perf_cpu_map__is_subset(orig, other))
-		return orig;
-	if (perf_cpu_map__is_subset(other, orig)) {
-		perf_cpu_map__put(orig);
-		return perf_cpu_map__get(other);
+	if (perf_cpu_map__is_subset(*orig, other))
+		return 0;
+	if (perf_cpu_map__is_subset(other, *orig)) {
+		perf_cpu_map__put(*orig);
+		*orig = perf_cpu_map__get(other);
+		return 0;
 	}
 
-	tmp_len = __perf_cpu_map__nr(orig) + __perf_cpu_map__nr(other);
+	tmp_len = __perf_cpu_map__nr(*orig) + __perf_cpu_map__nr(other);
 	tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
 	if (!tmp_cpus)
-		return NULL;
+		return -ENOMEM;
 
 	/* Standard merge algorithm from wikipedia */
 	i = j = k = 0;
-	while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
-		if (__perf_cpu_map__cpu(orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) {
-			if (__perf_cpu_map__cpu(orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu)
+	while (i < __perf_cpu_map__nr(*orig) && j < __perf_cpu_map__nr(other)) {
+		if (__perf_cpu_map__cpu(*orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) {
+			if (__perf_cpu_map__cpu(*orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu)
 				j++;
-			tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
+			tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++);
 		} else
 			tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
 	}
 
-	while (i < __perf_cpu_map__nr(orig))
-		tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
+	while (i < __perf_cpu_map__nr(*orig))
+		tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++);
 
 	while (j < __perf_cpu_map__nr(other))
 		tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
@@ -483,8 +435,9 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
 
 	merged = cpu_map__trim_new(k, tmp_cpus);
 	free(tmp_cpus);
-	perf_cpu_map__put(orig);
-	return merged;
+	perf_cpu_map__put(*orig);
+	*orig = merged;
+	return 0;
 }
 
 struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 83c43dc13313..b1f4c8176b32 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -89,7 +89,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
 		evsel->threads = perf_thread_map__get(evlist->threads);
 	}
 
-	evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
+	perf_cpu_map__merge(&evlist->all_cpus, evsel->cpus);
 }
 
 static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h
index 49649eb51ce4..e2be2d17c32b 100644
--- a/tools/lib/perf/include/internal/cpumap.h
+++ b/tools/lib/perf/include/internal/cpumap.h
@@ -21,10 +21,6 @@ DECLARE_RC_STRUCT(perf_cpu_map) {
 	struct perf_cpu	map[];
 };
 
-#ifndef MAX_NR_CPUS
-#define MAX_NR_CPUS	2048
-#endif
-
 struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus);
 int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
 bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b);
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 90457d17fb2f..188a667babc6 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -3,7 +3,6 @@
 #define __LIBPERF_CPUMAP_H
 
 #include <perf/core.h>
-#include <stdio.h>
 #include <stdbool.h>
 
 /** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
@@ -37,10 +36,9 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void);
  *                     perf_cpu_map__new_online_cpus is returned.
  */
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
-						     struct perf_cpu_map *other);
+LIBPERF_API int perf_cpu_map__merge(struct perf_cpu_map **orig,
+				    struct perf_cpu_map *other);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
 							 struct perf_cpu_map *other);
 LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 2aa79b696032..fdd8304fe9d0 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -6,7 +6,6 @@ LIBPERF_0.0.1 {
 		perf_cpu_map__get;
 		perf_cpu_map__put;
 		perf_cpu_map__new;
-		perf_cpu_map__read;
 		perf_cpu_map__nr;
 		perf_cpu_map__cpu;
 		perf_cpu_map__has_any_cpu_or_is_empty;
diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile
index 8890fd57b110..a1f5e388644d 100644
--- a/tools/lib/thermal/Makefile
+++ b/tools/lib/thermal/Makefile
@@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative)
 libdir_SQ = $(subst ','\'',$(libdir))
 libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
 
-ifeq ("$(origin V)", "command line")
-  VERBOSE = $(V)
-endif
-ifndef VERBOSE
-  VERBOSE = 0
-endif
-
-ifeq ($(VERBOSE),1)
-  Q =
-else
-  Q = @
-endif
-
 # Set compile option CFLAGS
 ifdef EXTRA_CFLAGS
   CFLAGS := $(EXTRA_CFLAGS)
diff --git a/tools/memory-model/Documentation/glossary.txt b/tools/memory-model/Documentation/glossary.txt
index 6f3d16dbf467..7ead94bffa4e 100644
--- a/tools/memory-model/Documentation/glossary.txt
+++ b/tools/memory-model/Documentation/glossary.txt
@@ -15,14 +15,14 @@ Address Dependency:  When the address of a later memory access is computed
 	 3 do_something(p->a);
 	 4 rcu_read_unlock();
 
-	 In this case, because the address of "p->a" on line 3 is computed
-	 from the value returned by the rcu_dereference() on line 2, the
-	 address dependency extends from that rcu_dereference() to that
-	 "p->a".  In rare cases, optimizing compilers can destroy address
-	 dependencies.	Please see Documentation/RCU/rcu_dereference.rst
-	 for more information.
+	In this case, because the address of "p->a" on line 3 is computed
+	from the value returned by the rcu_dereference() on line 2, the
+	address dependency extends from that rcu_dereference() to that
+	"p->a".  In rare cases, optimizing compilers can destroy address
+	dependencies.	Please see Documentation/RCU/rcu_dereference.rst
+	for more information.
 
-	 See also "Control Dependency" and "Data Dependency".
+	See also "Control Dependency" and "Data Dependency".
 
 Acquire:  With respect to a lock, acquiring that lock, for example,
 	using spin_lock().  With respect to a non-lock shared variable,
@@ -59,12 +59,12 @@ Control Dependency:  When a later store's execution depends on a test
 	 1 if (READ_ONCE(x))
 	 2   WRITE_ONCE(y, 1);
 
-	 Here, the control dependency extends from the READ_ONCE() on
-	 line 1 to the WRITE_ONCE() on line 2.	Control dependencies are
-	 fragile, and can be easily destroyed by optimizing compilers.
-	 Please see control-dependencies.txt for more information.
+	Here, the control dependency extends from the READ_ONCE() on
+	line 1 to the WRITE_ONCE() on line 2.	Control dependencies are
+	fragile, and can be easily destroyed by optimizing compilers.
+	Please see control-dependencies.txt for more information.
 
-	 See also "Address Dependency" and "Data Dependency".
+	See also "Address Dependency" and "Data Dependency".
 
 Cycle:	Memory-barrier pairing is restricted to a pair of CPUs, as the
 	name suggests.	And in a great many cases, a pair of CPUs is all
@@ -72,10 +72,10 @@ Cycle:	Memory-barrier pairing is restricted to a pair of CPUs, as the
 	extended to additional CPUs, and the result is called a "cycle".
 	In a cycle, each CPU's ordering interacts with that of the next:
 
-	CPU 0                CPU 1                CPU 2
-	WRITE_ONCE(x, 1);    WRITE_ONCE(y, 1);    WRITE_ONCE(z, 1);
-	smp_mb();            smp_mb();            smp_mb();
-	r0 = READ_ONCE(y);   r1 = READ_ONCE(z);   r2 = READ_ONCE(x);
+	 CPU 0                CPU 1                CPU 2
+	 WRITE_ONCE(x, 1);    WRITE_ONCE(y, 1);    WRITE_ONCE(z, 1);
+	 smp_mb();            smp_mb();            smp_mb();
+	 r0 = READ_ONCE(y);   r1 = READ_ONCE(z);   r2 = READ_ONCE(x);
 
 	CPU 0's smp_mb() interacts with that of CPU 1, which interacts
 	with that of CPU 2, which in turn interacts with that of CPU 0
diff --git a/tools/memory-model/Documentation/herd-representation.txt b/tools/memory-model/Documentation/herd-representation.txt
index ed988906f2b7..4e19b4f2a476 100644
--- a/tools/memory-model/Documentation/herd-representation.txt
+++ b/tools/memory-model/Documentation/herd-representation.txt
@@ -18,6 +18,11 @@
 #
 # By convention, a blank line in a cell means "same as the preceding line".
 #
+# Note that the syntactic representation does not always match the sets and
+# relations in linux-kernel.cat, due to redefinitions in linux-kernel.bell and
+# lock.cat. For example, the po link between LKR and LKW is upgraded to an rmw
+# link, and W[ACQUIRE] are not included in the Acquire set.
+#
 # Disclaimer.  The table includes representations of "add" and "and" operations;
 # corresponding/identical representations of "sub", "inc", "dec" and "or", "xor",
 # "andnot" operations are omitted.
@@ -27,16 +32,16 @@
     ------------------------------------------------------------------------------
     |                    Non-RMW ops |                                           |
     ------------------------------------------------------------------------------
-    |                      READ_ONCE | R[once]                                   |
+    |                      READ_ONCE | R[ONCE]                                   |
     |                    atomic_read |                                           |
-    |                     WRITE_ONCE | W[once]                                   |
+    |                     WRITE_ONCE | W[ONCE]                                   |
     |                     atomic_set |                                           |
-    |               smp_load_acquire | R[acquire]                                |
+    |               smp_load_acquire | R[ACQUIRE]                                |
     |            atomic_read_acquire |                                           |
-    |              smp_store_release | W[release]                                |
+    |              smp_store_release | W[RELEASE]                                |
     |             atomic_set_release |                                           |
-    |                   smp_store_mb | W[once] ->po F[mb]                        |
-    |                         smp_mb | F[mb]                                     |
+    |                   smp_store_mb | W[ONCE] ->po F[MB]                        |
+    |                         smp_mb | F[MB]                                     |
     |                        smp_rmb | F[rmb]                                    |
     |                        smp_wmb | F[wmb]                                    |
     |          smp_mb__before_atomic | F[before-atomic]                          |
@@ -49,8 +54,8 @@
     |                  rcu_read_lock | F[rcu-lock]                               |
     |                rcu_read_unlock | F[rcu-unlock]                             |
     |                synchronize_rcu | F[sync-rcu]                               |
-    |                rcu_dereference | R[once]                                   |
-    |             rcu_assign_pointer | W[release]                                |
+    |                rcu_dereference | R[ONCE]                                   |
+    |             rcu_assign_pointer | W[RELEASE]                                |
     |                 srcu_read_lock | R[srcu-lock]                              |
     |                 srcu_down_read |                                           |
     |               srcu_read_unlock | W[srcu-unlock]                            |
@@ -60,32 +65,31 @@
     ------------------------------------------------------------------------------
     |       RMW ops w/o return value |                                           |
     ------------------------------------------------------------------------------
-    |                     atomic_add | R*[noreturn] ->rmw W*[once]               |
+    |                     atomic_add | R*[NORETURN] ->rmw W*[NORETURN]           |
     |                     atomic_and |                                           |
     |                      spin_lock | LKR ->po LKW                              |
     ------------------------------------------------------------------------------
     |        RMW ops w/ return value |                                           |
     ------------------------------------------------------------------------------
-    |              atomic_add_return | F[mb] ->po R*[once]                       |
-    |                                |     ->rmw W*[once] ->po F[mb]             |
+    |              atomic_add_return | R*[MB] ->rmw W*[MB]                       |
     |               atomic_fetch_add |                                           |
     |               atomic_fetch_and |                                           |
     |                    atomic_xchg |                                           |
     |                           xchg |                                           |
     |            atomic_add_negative |                                           |
-    |      atomic_add_return_relaxed | R*[once] ->rmw W*[once]                   |
+    |      atomic_add_return_relaxed | R*[ONCE] ->rmw W*[ONCE]                   |
     |       atomic_fetch_add_relaxed |                                           |
     |       atomic_fetch_and_relaxed |                                           |
     |            atomic_xchg_relaxed |                                           |
     |                   xchg_relaxed |                                           |
     |    atomic_add_negative_relaxed |                                           |
-    |      atomic_add_return_acquire | R*[acquire] ->rmw W*[once]                |
+    |      atomic_add_return_acquire | R*[ACQUIRE] ->rmw W*[ACQUIRE]             |
     |       atomic_fetch_add_acquire |                                           |
     |       atomic_fetch_and_acquire |                                           |
     |            atomic_xchg_acquire |                                           |
     |                   xchg_acquire |                                           |
     |    atomic_add_negative_acquire |                                           |
-    |      atomic_add_return_release | R*[once] ->rmw W*[release]                |
+    |      atomic_add_return_release | R*[RELEASE] ->rmw W*[RELEASE]             |
     |       atomic_fetch_add_release |                                           |
     |       atomic_fetch_and_release |                                           |
     |            atomic_xchg_release |                                           |
@@ -94,17 +98,16 @@
     ------------------------------------------------------------------------------
     |            Conditional RMW ops |                                           |
     ------------------------------------------------------------------------------
-    |                 atomic_cmpxchg | On success: F[mb] ->po R*[once]           |
-    |                                |                 ->rmw W*[once] ->po F[mb] |
-    |                                | On failure: R*[once]                      |
+    |                 atomic_cmpxchg | On success: R*[MB] ->rmw W*[MB]           |
+    |                                | On failure: R*[MB]                        |
     |                        cmpxchg |                                           |
     |              atomic_add_unless |                                           |
-    |         atomic_cmpxchg_relaxed | On success: R*[once] ->rmw W*[once]       |
-    |                                | On failure: R*[once]                      |
-    |         atomic_cmpxchg_acquire | On success: R*[acquire] ->rmw W*[once]    |
-    |                                | On failure: R*[once]                      |
-    |         atomic_cmpxchg_release | On success: R*[once] ->rmw W*[release]    |
-    |                                | On failure: R*[once]                      |
+    |         atomic_cmpxchg_relaxed | On success: R*[ONCE] ->rmw W*[ONCE]       |
+    |                                | On failure: R*[ONCE]                      |
+    |         atomic_cmpxchg_acquire | On success: R*[ACQUIRE] ->rmw W*[ACQUIRE] |
+    |                                | On failure: R*[ACQUIRE]                   |
+    |         atomic_cmpxchg_release | On success: R*[RELEASE] ->rmw W*[RELEASE] |
+    |                                | On failure: R*[RELEASE]                   |
     |                   spin_trylock | On success: LKR ->po LKW                  |
     |                                | On failure: LF                            |
     ------------------------------------------------------------------------------
diff --git a/tools/memory-model/README b/tools/memory-model/README
index dab38904206a..64c860863aa9 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -20,7 +20,7 @@ that litmus test to be exercised within the Linux kernel.
 REQUIREMENTS
 ============
 
-Version 7.52 or higher of the "herd7" and "klitmus7" tools must be
+Version 7.58 or higher of the "herd7" and "klitmus7" tools must be
 downloaded separately:
 
   https://github.com/herd/herdtools7
@@ -79,7 +79,7 @@ Several thousand more example litmus tests are available here:
 	https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
 	https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
 
-Documentation describing litmus tests and now to use them may be found
+Documentation describing litmus tests and how to use them may be found
 here:
 
 	tools/memory-model/Documentation/litmus-tests.txt
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index ce068700939c..fe65998002b9 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -13,17 +13,18 @@
 
 "Linux-kernel memory consistency model"
 
-enum Accesses = 'once (*READ_ONCE,WRITE_ONCE*) ||
-		'release (*smp_store_release*) ||
-		'acquire (*smp_load_acquire*) ||
-		'noreturn (* R of non-return RMW *)
-instructions R[{'once,'acquire,'noreturn}]
-instructions W[{'once,'release}]
-instructions RMW[{'once,'acquire,'release}]
+enum Accesses = 'ONCE (*READ_ONCE,WRITE_ONCE*) ||
+		'RELEASE (*smp_store_release*) ||
+		'ACQUIRE (*smp_load_acquire*) ||
+		'NORETURN (* R of non-return RMW *) ||
+		'MB (*xchg(),cmpxchg(),...*)
+instructions R[Accesses]
+instructions W[Accesses]
+instructions RMW[Accesses]
 
 enum Barriers = 'wmb (*smp_wmb*) ||
 		'rmb (*smp_rmb*) ||
-		'mb (*smp_mb*) ||
+		'MB (*smp_mb*) ||
 		'barrier (*barrier*) ||
 		'rcu-lock (*rcu_read_lock*)  ||
 		'rcu-unlock (*rcu_read_unlock*) ||
@@ -35,6 +36,17 @@ enum Barriers = 'wmb (*smp_wmb*) ||
 		'after-srcu-read-unlock (*smp_mb__after_srcu_read_unlock*)
 instructions F[Barriers]
 
+
+(*
+ * Filter out syntactic annotations that do not provide the corresponding
+ * semantic ordering, such as Acquire on a store or Mb on a failed RMW.
+ *)
+let FailedRMW = RMW \ (domain(rmw) | range(rmw))
+let Acquire = ACQUIRE \ W \ FailedRMW
+let Release = RELEASE \ R \ FailedRMW
+let Mb = MB \ FailedRMW
+let Noreturn = NORETURN \ W
+
 (* SRCU *)
 enum SRCU = 'srcu-lock || 'srcu-unlock || 'sync-srcu
 instructions SRCU[SRCU]
@@ -73,7 +85,7 @@ flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep
 flag ~empty different-values(srcu-rscs) as srcu-bad-value-match
 
 (* Compute marked and plain memory accesses *)
-let Marked = (~M) | IW | Once | Release | Acquire | domain(rmw) | range(rmw) |
+let Marked = (~M) | IW | ONCE | RELEASE | ACQUIRE | MB | RMW |
 		LKR | LKW | UL | LF | RL | RU | Srcu-lock | Srcu-unlock
 let Plain = M \ Marked
 
@@ -82,3 +94,6 @@ let carry-dep = (data ; [~ Srcu-unlock] ; rfi)*
 let addr = carry-dep ; addr
 let ctrl = carry-dep ; ctrl
 let data = carry-dep ; data
+
+flag ~empty (if "lkmmv2" then 0 else _)
+  as this-model-requires-variant-higher-than-lkmmv1
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index adf3c4f41229..d7e7bf13c831 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -34,6 +34,16 @@ let R4rmb = R \ Noreturn	(* Reads for which rmb works *)
 let rmb = [R4rmb] ; fencerel(Rmb) ; [R4rmb]
 let wmb = [W] ; fencerel(Wmb) ; [W]
 let mb = ([M] ; fencerel(Mb) ; [M]) |
+	(*
+	 * full-barrier RMWs (successful cmpxchg(), xchg(), etc.) act as
+	 * though there were enclosed by smp_mb().
+	 * The effect of these virtual smp_mb() is formalized by adding
+	 * Mb tags to the read and write of the operation, and providing
+	 * the same ordering as though there were additional po edges
+	 * between the Mb tag and the read resp. write.
+	 *)
+	([M] ; po ; [Mb & R]) |
+	([Mb & W] ; po ; [M]) |
 	([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) |
 	([M] ; po? ; [RMW] ; fencerel(After-atomic) ; [M]) |
 	([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) |
diff --git a/tools/memory-model/linux-kernel.cfg b/tools/memory-model/linux-kernel.cfg
index 3c8098e99f41..69b04f3aad73 100644
--- a/tools/memory-model/linux-kernel.cfg
+++ b/tools/memory-model/linux-kernel.cfg
@@ -1,6 +1,7 @@
 macros linux-kernel.def
 bell linux-kernel.bell
 model linux-kernel.cat
+variant lkmmv2
 graph columns
 squished true
 showevents noregs
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index 88a39601f525..49e402782e49 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -6,18 +6,18 @@
 // which appeared in ASPLOS 2018.
 
 // ONCE
-READ_ONCE(X) __load{once}(X)
-WRITE_ONCE(X,V) { __store{once}(X,V); }
+READ_ONCE(X) __load{ONCE}(X)
+WRITE_ONCE(X,V) { __store{ONCE}(X,V); }
 
 // Release Acquire and friends
-smp_store_release(X,V) { __store{release}(*X,V); }
-smp_load_acquire(X) __load{acquire}(*X)
-rcu_assign_pointer(X,V) { __store{release}(X,V); }
-rcu_dereference(X) __load{once}(X)
-smp_store_mb(X,V) { __store{once}(X,V); __fence{mb}; }
+smp_store_release(X,V) { __store{RELEASE}(*X,V); }
+smp_load_acquire(X) __load{ACQUIRE}(*X)
+rcu_assign_pointer(X,V) { __store{RELEASE}(X,V); }
+rcu_dereference(X) __load{ONCE}(X)
+smp_store_mb(X,V) { __store{ONCE}(X,V); __fence{MB}; }
 
 // Fences
-smp_mb() { __fence{mb}; }
+smp_mb() { __fence{MB}; }
 smp_rmb() { __fence{rmb}; }
 smp_wmb() { __fence{wmb}; }
 smp_mb__before_atomic() { __fence{before-atomic}; }
@@ -28,14 +28,14 @@ smp_mb__after_srcu_read_unlock() { __fence{after-srcu-read-unlock}; }
 barrier() { __fence{barrier}; }
 
 // Exchange
-xchg(X,V)  __xchg{mb}(X,V)
-xchg_relaxed(X,V) __xchg{once}(X,V)
-xchg_release(X,V) __xchg{release}(X,V)
-xchg_acquire(X,V) __xchg{acquire}(X,V)
-cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W)
-cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W)
-cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
-cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
+xchg(X,V)  __xchg{MB}(X,V)
+xchg_relaxed(X,V) __xchg{ONCE}(X,V)
+xchg_release(X,V) __xchg{RELEASE}(X,V)
+xchg_acquire(X,V) __xchg{ACQUIRE}(X,V)
+cmpxchg(X,V,W) __cmpxchg{MB}(X,V,W)
+cmpxchg_relaxed(X,V,W) __cmpxchg{ONCE}(X,V,W)
+cmpxchg_acquire(X,V,W) __cmpxchg{ACQUIRE}(X,V,W)
+cmpxchg_release(X,V,W) __cmpxchg{RELEASE}(X,V,W)
 
 // Spinlocks
 spin_lock(X) { __lock(X); }
@@ -63,57 +63,86 @@ atomic_set(X,V) { WRITE_ONCE(*X,V); }
 atomic_read_acquire(X) smp_load_acquire(X)
 atomic_set_release(X,V) { smp_store_release(X,V); }
 
-atomic_add(V,X) { __atomic_op(X,+,V); }
-atomic_sub(V,X) { __atomic_op(X,-,V); }
-atomic_inc(X)   { __atomic_op(X,+,1); }
-atomic_dec(X)   { __atomic_op(X,-,1); }
-
-atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V)
-atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V)
-atomic_add_return_acquire(V,X) __atomic_op_return{acquire}(X,+,V)
-atomic_add_return_release(V,X) __atomic_op_return{release}(X,+,V)
-atomic_fetch_add(V,X) __atomic_fetch_op{mb}(X,+,V)
-atomic_fetch_add_relaxed(V,X) __atomic_fetch_op{once}(X,+,V)
-atomic_fetch_add_acquire(V,X) __atomic_fetch_op{acquire}(X,+,V)
-atomic_fetch_add_release(V,X) __atomic_fetch_op{release}(X,+,V)
-
-atomic_inc_return(X) __atomic_op_return{mb}(X,+,1)
-atomic_inc_return_relaxed(X) __atomic_op_return{once}(X,+,1)
-atomic_inc_return_acquire(X) __atomic_op_return{acquire}(X,+,1)
-atomic_inc_return_release(X) __atomic_op_return{release}(X,+,1)
-atomic_fetch_inc(X) __atomic_fetch_op{mb}(X,+,1)
-atomic_fetch_inc_relaxed(X) __atomic_fetch_op{once}(X,+,1)
-atomic_fetch_inc_acquire(X) __atomic_fetch_op{acquire}(X,+,1)
-atomic_fetch_inc_release(X) __atomic_fetch_op{release}(X,+,1)
-
-atomic_sub_return(V,X) __atomic_op_return{mb}(X,-,V)
-atomic_sub_return_relaxed(V,X) __atomic_op_return{once}(X,-,V)
-atomic_sub_return_acquire(V,X) __atomic_op_return{acquire}(X,-,V)
-atomic_sub_return_release(V,X) __atomic_op_return{release}(X,-,V)
-atomic_fetch_sub(V,X) __atomic_fetch_op{mb}(X,-,V)
-atomic_fetch_sub_relaxed(V,X) __atomic_fetch_op{once}(X,-,V)
-atomic_fetch_sub_acquire(V,X) __atomic_fetch_op{acquire}(X,-,V)
-atomic_fetch_sub_release(V,X) __atomic_fetch_op{release}(X,-,V)
-
-atomic_dec_return(X) __atomic_op_return{mb}(X,-,1)
-atomic_dec_return_relaxed(X) __atomic_op_return{once}(X,-,1)
-atomic_dec_return_acquire(X) __atomic_op_return{acquire}(X,-,1)
-atomic_dec_return_release(X) __atomic_op_return{release}(X,-,1)
-atomic_fetch_dec(X) __atomic_fetch_op{mb}(X,-,1)
-atomic_fetch_dec_relaxed(X) __atomic_fetch_op{once}(X,-,1)
-atomic_fetch_dec_acquire(X) __atomic_fetch_op{acquire}(X,-,1)
-atomic_fetch_dec_release(X) __atomic_fetch_op{release}(X,-,1)
-
-atomic_xchg(X,V) __xchg{mb}(X,V)
-atomic_xchg_relaxed(X,V) __xchg{once}(X,V)
-atomic_xchg_release(X,V) __xchg{release}(X,V)
-atomic_xchg_acquire(X,V) __xchg{acquire}(X,V)
-atomic_cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W)
-atomic_cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W)
-atomic_cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
-atomic_cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
-
-atomic_sub_and_test(V,X) __atomic_op_return{mb}(X,-,V) == 0
-atomic_dec_and_test(X)  __atomic_op_return{mb}(X,-,1) == 0
-atomic_inc_and_test(X)  __atomic_op_return{mb}(X,+,1) == 0
-atomic_add_negative(V,X) __atomic_op_return{mb}(X,+,V) < 0
+atomic_add(V,X) { __atomic_op{NORETURN}(X,+,V); }
+atomic_sub(V,X) { __atomic_op{NORETURN}(X,-,V); }
+atomic_and(V,X) { __atomic_op{NORETURN}(X,&,V); }
+atomic_or(V,X)  { __atomic_op{NORETURN}(X,|,V); }
+atomic_xor(V,X) { __atomic_op{NORETURN}(X,^,V); }
+atomic_inc(X)   { __atomic_op{NORETURN}(X,+,1); }
+atomic_dec(X)   { __atomic_op{NORETURN}(X,-,1); }
+atomic_andnot(V,X) { __atomic_op{NORETURN}(X,&~,V); }
+
+atomic_add_return(V,X) __atomic_op_return{MB}(X,+,V)
+atomic_add_return_relaxed(V,X) __atomic_op_return{ONCE}(X,+,V)
+atomic_add_return_acquire(V,X) __atomic_op_return{ACQUIRE}(X,+,V)
+atomic_add_return_release(V,X) __atomic_op_return{RELEASE}(X,+,V)
+atomic_fetch_add(V,X) __atomic_fetch_op{MB}(X,+,V)
+atomic_fetch_add_relaxed(V,X) __atomic_fetch_op{ONCE}(X,+,V)
+atomic_fetch_add_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,+,V)
+atomic_fetch_add_release(V,X) __atomic_fetch_op{RELEASE}(X,+,V)
+
+atomic_fetch_and(V,X) __atomic_fetch_op{MB}(X,&,V)
+atomic_fetch_and_relaxed(V,X) __atomic_fetch_op{ONCE}(X,&,V)
+atomic_fetch_and_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,&,V)
+atomic_fetch_and_release(V,X) __atomic_fetch_op{RELEASE}(X,&,V)
+
+atomic_fetch_or(V,X) __atomic_fetch_op{MB}(X,|,V)
+atomic_fetch_or_relaxed(V,X) __atomic_fetch_op{ONCE}(X,|,V)
+atomic_fetch_or_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,|,V)
+atomic_fetch_or_release(V,X) __atomic_fetch_op{RELEASE}(X,|,V)
+
+atomic_fetch_xor(V,X) __atomic_fetch_op{MB}(X,^,V)
+atomic_fetch_xor_relaxed(V,X) __atomic_fetch_op{ONCE}(X,^,V)
+atomic_fetch_xor_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,^,V)
+atomic_fetch_xor_release(V,X) __atomic_fetch_op{RELEASE}(X,^,V)
+
+atomic_inc_return(X) __atomic_op_return{MB}(X,+,1)
+atomic_inc_return_relaxed(X) __atomic_op_return{ONCE}(X,+,1)
+atomic_inc_return_acquire(X) __atomic_op_return{ACQUIRE}(X,+,1)
+atomic_inc_return_release(X) __atomic_op_return{RELEASE}(X,+,1)
+atomic_fetch_inc(X) __atomic_fetch_op{MB}(X,+,1)
+atomic_fetch_inc_relaxed(X) __atomic_fetch_op{ONCE}(X,+,1)
+atomic_fetch_inc_acquire(X) __atomic_fetch_op{ACQUIRE}(X,+,1)
+atomic_fetch_inc_release(X) __atomic_fetch_op{RELEASE}(X,+,1)
+
+atomic_sub_return(V,X) __atomic_op_return{MB}(X,-,V)
+atomic_sub_return_relaxed(V,X) __atomic_op_return{ONCE}(X,-,V)
+atomic_sub_return_acquire(V,X) __atomic_op_return{ACQUIRE}(X,-,V)
+atomic_sub_return_release(V,X) __atomic_op_return{RELEASE}(X,-,V)
+atomic_fetch_sub(V,X) __atomic_fetch_op{MB}(X,-,V)
+atomic_fetch_sub_relaxed(V,X) __atomic_fetch_op{ONCE}(X,-,V)
+atomic_fetch_sub_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,-,V)
+atomic_fetch_sub_release(V,X) __atomic_fetch_op{RELEASE}(X,-,V)
+
+atomic_dec_return(X) __atomic_op_return{MB}(X,-,1)
+atomic_dec_return_relaxed(X) __atomic_op_return{ONCE}(X,-,1)
+atomic_dec_return_acquire(X) __atomic_op_return{ACQUIRE}(X,-,1)
+atomic_dec_return_release(X) __atomic_op_return{RELEASE}(X,-,1)
+atomic_fetch_dec(X) __atomic_fetch_op{MB}(X,-,1)
+atomic_fetch_dec_relaxed(X) __atomic_fetch_op{ONCE}(X,-,1)
+atomic_fetch_dec_acquire(X) __atomic_fetch_op{ACQUIRE}(X,-,1)
+atomic_fetch_dec_release(X) __atomic_fetch_op{RELEASE}(X,-,1)
+
+atomic_xchg(X,V) __xchg{MB}(X,V)
+atomic_xchg_relaxed(X,V) __xchg{ONCE}(X,V)
+atomic_xchg_release(X,V) __xchg{RELEASE}(X,V)
+atomic_xchg_acquire(X,V) __xchg{ACQUIRE}(X,V)
+atomic_cmpxchg(X,V,W) __cmpxchg{MB}(X,V,W)
+atomic_cmpxchg_relaxed(X,V,W) __cmpxchg{ONCE}(X,V,W)
+atomic_cmpxchg_acquire(X,V,W) __cmpxchg{ACQUIRE}(X,V,W)
+atomic_cmpxchg_release(X,V,W) __cmpxchg{RELEASE}(X,V,W)
+
+atomic_sub_and_test(V,X) __atomic_op_return{MB}(X,-,V) == 0
+atomic_dec_and_test(X)  __atomic_op_return{MB}(X,-,1) == 0
+atomic_inc_and_test(X)  __atomic_op_return{MB}(X,+,1) == 0
+atomic_add_negative(V,X) __atomic_op_return{MB}(X,+,V) < 0
+atomic_add_negative_relaxed(V,X) __atomic_op_return{ONCE}(X,+,V) < 0
+atomic_add_negative_acquire(V,X) __atomic_op_return{ACQUIRE}(X,+,V) < 0
+atomic_add_negative_release(V,X) __atomic_op_return{RELEASE}(X,+,V) < 0
+
+atomic_fetch_andnot(V,X) __atomic_fetch_op{MB}(X,&~,V)
+atomic_fetch_andnot_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,&~,V)
+atomic_fetch_andnot_release(V,X) __atomic_fetch_op{RELEASE}(X,&~,V)
+atomic_fetch_andnot_relaxed(V,X) __atomic_fetch_op{ONCE}(X,&~,V)
+
+atomic_add_unless(X,V,W) __atomic_add_unless{MB}(X,V,W)
diff --git a/tools/mm/page-types.c b/tools/mm/page-types.c
index bcac7ebfb51f..d7e5e8902af8 100644
--- a/tools/mm/page-types.c
+++ b/tools/mm/page-types.c
@@ -24,8 +24,8 @@
 #include <signal.h>
 #include <inttypes.h>
 #include <sys/types.h>
-#include <sys/errno.h>
-#include <sys/fcntl.h>
+#include <errno.h>
+#include <fcntl.h>
 #include <sys/mount.h>
 #include <sys/statfs.h>
 #include <sys/mman.h>
diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile
index d1cdf2a8f826..211df5a93ad9 100644
--- a/tools/net/ynl/Makefile
+++ b/tools/net/ynl/Makefile
@@ -1,5 +1,17 @@
 # SPDX-License-Identifier: GPL-2.0
 
+include ../../scripts/Makefile.arch
+
+INSTALL	?= install
+prefix  ?= /usr
+ifeq ($(LP64), 1)
+  libdir_relative = lib64
+else
+  libdir_relative = lib
+endif
+libdir  ?= $(prefix)/$(libdir_relative)
+includedir ?= $(prefix)/include
+
 SUBDIRS = lib generated samples
 
 all: $(SUBDIRS) libynl.a
@@ -21,5 +33,20 @@ clean distclean:
 		fi \
 	done
 	rm -f libynl.a
+	rm -rf pyynl/__pycache__
+	rm -rf pyynl/lib/__pycache__
+	rm -rf pyynl.egg-info
+	rm -rf build
+
+install: libynl.a lib/*.h
+	@echo -e "\tINSTALL libynl.a"
+	@$(INSTALL) -d $(DESTDIR)$(libdir)
+	@$(INSTALL) -m 0644 libynl.a $(DESTDIR)$(libdir)/libynl.a
+	@echo -e "\tINSTALL libynl headers"
+	@$(INSTALL) -d $(DESTDIR)$(includedir)/ynl
+	@$(INSTALL) -m 0644 lib/*.h $(DESTDIR)$(includedir)/ynl/
+	@echo -e "\tINSTALL pyynl"
+	@pip install --prefix=$(DESTDIR)$(prefix) .
+	@make -C generated install
 
-.PHONY: all clean distclean $(SUBDIRS)
+.PHONY: all clean distclean install $(SUBDIRS)
diff --git a/tools/net/ynl/generated/.gitignore b/tools/net/ynl/generated/.gitignore
index ade488626d26..859a6fb446e1 100644
--- a/tools/net/ynl/generated/.gitignore
+++ b/tools/net/ynl/generated/.gitignore
@@ -1,2 +1,3 @@
 *-user.c
 *-user.h
+*.rst
diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile
index 7db5240de58a..21f9e299dc75 100644
--- a/tools/net/ynl/generated/Makefile
+++ b/tools/net/ynl/generated/Makefile
@@ -7,32 +7,44 @@ ifeq ("$(DEBUG)","1")
   CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
 endif
 
+INSTALL     ?= install
+prefix      ?= /usr
+datarootdir ?= $(prefix)/share
+docdir      ?= $(datarootdir)/doc
+includedir  ?= $(prefix)/include
+
 include ../Makefile.deps
 
 YNL_GEN_ARG_ethtool:=--user-header linux/ethtool_netlink.h \
 	--exclude-op stats-get
 
-TOOL:=../ynl-gen-c.py
+TOOL:=../pyynl/ynl_gen_c.py
+TOOL_RST:=../pyynl/ynl_gen_rst.py
 
+SPECS_DIR:=../../../../Documentation/netlink/specs
 GENS_PATHS=$(shell grep -nrI --files-without-match \
 		'protocol: netlink' \
-		../../../../Documentation/netlink/specs/)
-GENS=$(patsubst ../../../../Documentation/netlink/specs/%.yaml,%,${GENS_PATHS})
+		$(SPECS_DIR))
+GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS})
 SRCS=$(patsubst %,%-user.c,${GENS})
 HDRS=$(patsubst %,%-user.h,${GENS})
 OBJS=$(patsubst %,%-user.o,${GENS})
 
-all: protos.a $(HDRS) $(SRCS) $(KHDRS) $(KSRCS) $(UAPI)
+SPECS_PATHS=$(wildcard $(SPECS_DIR)/*.yaml)
+SPECS=$(patsubst $(SPECS_DIR)/%.yaml,%,${SPECS_PATHS})
+RSTS=$(patsubst %,%.rst,${SPECS})
+
+all: protos.a $(HDRS) $(SRCS) $(KHDRS) $(KSRCS) $(UAPI) $(RSTS)
 
 protos.a: $(OBJS)
 	@echo -e "\tAR $@"
 	@ar rcs $@ $(OBJS)
 
-%-user.h: ../../../../Documentation/netlink/specs/%.yaml $(TOOL)
+%-user.h: $(SPECS_DIR)/%.yaml $(TOOL)
 	@echo -e "\tGEN $@"
 	@$(TOOL) --mode user --header --spec $< -o $@ $(YNL_GEN_ARG_$*)
 
-%-user.c: ../../../../Documentation/netlink/specs/%.yaml $(TOOL)
+%-user.c: $(SPECS_DIR)/%.yaml $(TOOL)
 	@echo -e "\tGEN $@"
 	@$(TOOL) --mode user --source --spec $< -o $@ $(YNL_GEN_ARG_$*)
 
@@ -40,14 +52,37 @@ protos.a: $(OBJS)
 	@echo -e "\tCC $@"
 	@$(COMPILE.c) $(CFLAGS_$*) -o $@ $<
 
+%.rst: $(SPECS_DIR)/%.yaml $(TOOL_RST)
+	@echo -e "\tGEN_RST $@"
+	@$(TOOL_RST) -o $@ -i $<
+
 clean:
 	rm -f *.o
 
 distclean: clean
-	rm -f *.c *.h *.a
+	rm -f *.c *.h *.a *.rst
 
 regen:
 	@../ynl-regen.sh
 
-.PHONY: all clean distclean regen
+install-headers: $(HDRS)
+	@echo -e "\tINSTALL generated headers"
+	@$(INSTALL) -d $(DESTDIR)$(includedir)/ynl
+	@$(INSTALL) -m 0644 *.h $(DESTDIR)$(includedir)/ynl/
+
+install-rsts: $(RSTS)
+	@echo -e "\tINSTALL generated docs"
+	@$(INSTALL) -d $(DESTDIR)$(docdir)/ynl
+	@$(INSTALL) -m 0644 $(RSTS) $(DESTDIR)$(docdir)/ynl/
+
+install-specs:
+	@echo -e "\tINSTALL specs"
+	@$(INSTALL) -d $(DESTDIR)$(datarootdir)/ynl
+	@$(INSTALL) -m 0644 ../../../../Documentation/netlink/*.yaml $(DESTDIR)$(datarootdir)/ynl/
+	@$(INSTALL) -d $(DESTDIR)$(datarootdir)/ynl/specs
+	@$(INSTALL) -m 0644 $(SPECS_DIR)/*.yaml $(DESTDIR)$(datarootdir)/ynl/specs/
+
+install: install-headers install-rsts install-specs
+
+.PHONY: all clean distclean regen install install-headers install-rsts install-specs
 .DEFAULT_GOAL: all
diff --git a/tools/net/ynl/lib/.gitignore b/tools/net/ynl/lib/.gitignore
index 296c4035dbf2..a4383358ec72 100644
--- a/tools/net/ynl/lib/.gitignore
+++ b/tools/net/ynl/lib/.gitignore
@@ -1,2 +1 @@
-__pycache__/
 *.d
diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile
index 94c49cca3dca..4b2b98704ff9 100644
--- a/tools/net/ynl/lib/Makefile
+++ b/tools/net/ynl/lib/Makefile
@@ -19,7 +19,6 @@ ynl.a: $(OBJS)
 
 clean:
 	rm -f *.o *.d *~
-	rm -rf __pycache__
 
 distclean: clean
 	rm -f *.a
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index e16cef160bc2..ce32cb35007d 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -95,7 +95,7 @@ ynl_err_walk(struct ynl_sock *ys, void *start, void *end, unsigned int off,
 
 	ynl_attr_for_each_payload(start, data_len, attr) {
 		astart_off = (char *)attr - (char *)start;
-		aend_off = astart_off + ynl_attr_data_len(attr);
+		aend_off = (char *)ynl_attr_data_end(attr) - (char *)start;
 		if (aend_off <= off)
 			continue;
 
diff --git a/tools/net/ynl/pyproject.toml b/tools/net/ynl/pyproject.toml
new file mode 100644
index 000000000000..a81d8779b0e0
--- /dev/null
+++ b/tools/net/ynl/pyproject.toml
@@ -0,0 +1,24 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pyynl"
+authors = [
+    {name = "Donald Hunter", email = "donald.hunter@gmail.com"},
+    {name = "Jakub Kicinski", email = "kuba@kernel.org"},
+]
+description = "yaml netlink (ynl)"
+version = "0.0.1"
+requires-python = ">=3.9"
+dependencies = [
+    "pyyaml==6.*",
+    "jsonschema==4.*"
+]
+
+[tool.setuptools.packages.find]
+include = ["pyynl", "pyynl.lib"]
+
+[project.scripts]
+ynl = "pyynl.cli:main"
+ynl-ethtool = "pyynl.ethtool:main"
diff --git a/tools/net/ynl/pyynl/.gitignore b/tools/net/ynl/pyynl/.gitignore
new file mode 100644
index 000000000000..b801cd2d016e
--- /dev/null
+++ b/tools/net/ynl/pyynl/.gitignore
@@ -0,0 +1,2 @@
+__pycache__/
+lib/__pycache__/
diff --git a/tools/net/ynl/pyynl/__init__.py b/tools/net/ynl/pyynl/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/net/ynl/pyynl/__init__.py
diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/pyynl/cli.py
index 41d9fa5c818d..794e3c7dcc65 100755
--- a/tools/net/ynl/cli.py
+++ b/tools/net/ynl/pyynl/cli.py
@@ -3,6 +3,7 @@
 
 import argparse
 import json
+import os
 import pathlib
 import pprint
 import sys
@@ -10,6 +11,24 @@ import sys
 sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
 from lib import YnlFamily, Netlink, NlError
 
+sys_schema_dir='/usr/share/ynl'
+relative_schema_dir='../../../../Documentation/netlink'
+
+def schema_dir():
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    schema_dir = os.path.abspath(f"{script_dir}/{relative_schema_dir}")
+    if not os.path.isdir(schema_dir):
+        schema_dir = sys_schema_dir
+    if not os.path.isdir(schema_dir):
+        raise Exception(f"Schema directory {schema_dir} does not exist")
+    return schema_dir
+
+def spec_dir():
+    spec_dir = schema_dir() + '/specs'
+    if not os.path.isdir(spec_dir):
+        raise Exception(f"Spec directory {spec_dir} does not exist")
+    return spec_dir
+
 
 class YnlEncoder(json.JSONEncoder):
     def default(self, obj):
@@ -32,7 +51,14 @@ def main():
 
     parser = argparse.ArgumentParser(description=description,
                                      epilog=epilog)
-    parser.add_argument('--spec', dest='spec', type=str, required=True)
+    spec_group = parser.add_mutually_exclusive_group(required=True)
+    spec_group.add_argument('--family', dest='family', type=str,
+                            help='name of the netlink FAMILY')
+    spec_group.add_argument('--list-families', action='store_true',
+                            help='list all netlink families supported by YNL (has spec)')
+    spec_group.add_argument('--spec', dest='spec', type=str,
+                            help='choose the family by SPEC file path')
+
     parser.add_argument('--schema', dest='schema', type=str)
     parser.add_argument('--no-schema', action='store_true')
     parser.add_argument('--json', dest='json_text', type=str)
@@ -70,6 +96,12 @@ def main():
         else:
             pprint.PrettyPrinter().pprint(msg)
 
+    if args.list_families:
+        for filename in sorted(os.listdir(spec_dir())):
+            if filename.endswith('.yaml'):
+                print(filename.removesuffix('.yaml'))
+        return
+
     if args.no_schema:
         args.schema = ''
 
@@ -77,7 +109,16 @@ def main():
     if args.json_text:
         attrs = json.loads(args.json_text)
 
-    ynl = YnlFamily(args.spec, args.schema, args.process_unknown,
+    if args.family:
+        spec = f"{spec_dir()}/{args.family}.yaml"
+        if args.schema is None and spec.startswith(sys_schema_dir):
+            args.schema = '' # disable schema validation when installed
+    else:
+        spec = args.spec
+    if not os.path.isfile(spec):
+        raise Exception(f"Spec file {spec} does not exist")
+
+    ynl = YnlFamily(spec, args.schema, args.process_unknown,
                     recv_size=args.dbg_small_recv)
     if args.dbg_small_recv:
         ynl.set_recv_dbg(True)
diff --git a/tools/net/ynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py
index ebb0a11f67bf..af7fddd7b085 100755
--- a/tools/net/ynl/ethtool.py
+++ b/tools/net/ynl/pyynl/ethtool.py
@@ -11,6 +11,7 @@ import os
 
 sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
 from lib import YnlFamily
+from cli import schema_dir, spec_dir
 
 def args_to_req(ynl, op_name, args, req):
     """
@@ -156,10 +157,8 @@ def main():
     args = parser.parse_args()
 
     script_abs_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
-    spec = os.path.join(script_abs_dir,
-                        '../../../Documentation/netlink/specs/ethtool.yaml')
-    schema = os.path.join(script_abs_dir,
-                          '../../../Documentation/netlink/genetlink-legacy.yaml')
+    spec = os.path.join(spec_dir(), 'ethtool.yaml')
+    schema = os.path.join(schema_dir(), 'genetlink-legacy.yaml')
 
     ynl = YnlFamily(spec, schema)
 
diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py
index 9137b83e580a..9137b83e580a 100644
--- a/tools/net/ynl/lib/__init__.py
+++ b/tools/net/ynl/pyynl/lib/__init__.py
diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/pyynl/lib/nlspec.py
index a745739655ad..314ec8007496 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/pyynl/lib/nlspec.py
@@ -219,7 +219,10 @@ class SpecAttrSet(SpecElement):
         else:
             real_set = family.attr_sets[self.subset_of]
             for elem in self.yaml['attributes']:
-                attr = real_set[elem['name']]
+                real_attr = real_set[elem['name']]
+                combined_elem = real_attr.yaml | elem
+                attr = self.new_attr(combined_elem, real_attr.value)
+
                 self.attrs[attr.name] = attr
                 self.attrs_by_val[attr.value] = attr
 
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index eea29359a899..08f8bf89cfc2 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -733,41 +733,45 @@ class YnlFamily(SpecFamily):
                 self._rsp_add(rsp, attr_name, None, self._decode_unknown(attr))
                 continue
 
-            if attr_spec["type"] == 'nest':
-                subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs)
-                decoded = subdict
-            elif attr_spec["type"] == 'string':
-                decoded = attr.as_strz()
-            elif attr_spec["type"] == 'binary':
-                decoded = self._decode_binary(attr, attr_spec)
-            elif attr_spec["type"] == 'flag':
-                decoded = True
-            elif attr_spec.is_auto_scalar:
-                decoded = attr.as_auto_scalar(attr_spec['type'], attr_spec.byte_order)
-            elif attr_spec["type"] in NlAttr.type_formats:
-                decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order)
-                if 'enum' in attr_spec:
-                    decoded = self._decode_enum(decoded, attr_spec)
-                elif attr_spec.display_hint:
-                    decoded = self._formatted_string(decoded, attr_spec.display_hint)
-            elif attr_spec["type"] == 'indexed-array':
-                decoded = self._decode_array_attr(attr, attr_spec)
-            elif attr_spec["type"] == 'bitfield32':
-                value, selector = struct.unpack("II", attr.raw)
-                if 'enum' in attr_spec:
-                    value = self._decode_enum(value, attr_spec)
-                    selector = self._decode_enum(selector, attr_spec)
-                decoded = {"value": value, "selector": selector}
-            elif attr_spec["type"] == 'sub-message':
-                decoded = self._decode_sub_msg(attr, attr_spec, search_attrs)
-            elif attr_spec["type"] == 'nest-type-value':
-                decoded = self._decode_nest_type_value(attr, attr_spec)
-            else:
-                if not self.process_unknown:
-                    raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
-                decoded = self._decode_unknown(attr)
-
-            self._rsp_add(rsp, attr_spec["name"], attr_spec.is_multi, decoded)
+            try:
+                if attr_spec["type"] == 'nest':
+                    subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs)
+                    decoded = subdict
+                elif attr_spec["type"] == 'string':
+                    decoded = attr.as_strz()
+                elif attr_spec["type"] == 'binary':
+                    decoded = self._decode_binary(attr, attr_spec)
+                elif attr_spec["type"] == 'flag':
+                    decoded = True
+                elif attr_spec.is_auto_scalar:
+                    decoded = attr.as_auto_scalar(attr_spec['type'], attr_spec.byte_order)
+                elif attr_spec["type"] in NlAttr.type_formats:
+                    decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order)
+                    if 'enum' in attr_spec:
+                        decoded = self._decode_enum(decoded, attr_spec)
+                    elif attr_spec.display_hint:
+                        decoded = self._formatted_string(decoded, attr_spec.display_hint)
+                elif attr_spec["type"] == 'indexed-array':
+                    decoded = self._decode_array_attr(attr, attr_spec)
+                elif attr_spec["type"] == 'bitfield32':
+                    value, selector = struct.unpack("II", attr.raw)
+                    if 'enum' in attr_spec:
+                        value = self._decode_enum(value, attr_spec)
+                        selector = self._decode_enum(selector, attr_spec)
+                    decoded = {"value": value, "selector": selector}
+                elif attr_spec["type"] == 'sub-message':
+                    decoded = self._decode_sub_msg(attr, attr_spec, search_attrs)
+                elif attr_spec["type"] == 'nest-type-value':
+                    decoded = self._decode_nest_type_value(attr, attr_spec)
+                else:
+                    if not self.process_unknown:
+                        raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
+                    decoded = self._decode_unknown(attr)
+
+                self._rsp_add(rsp, attr_spec["name"], attr_spec.is_multi, decoded)
+            except:
+                print(f"Error decoding '{attr_spec.name}' from '{space}'")
+                raise
 
         return rsp
 
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index 6750fdb42564..c2eabc90dce8 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -79,6 +79,20 @@ class Type(SpecAttr):
         self.enum_name = None
         delattr(self, "enum_name")
 
+    def _get_real_attr(self):
+        # if the attr is for a subset return the "real" attr (just one down, does not recurse)
+        return self.family.attr_sets[self.attr_set.subset_of][self.name]
+
+    def set_request(self):
+        self.request = True
+        if self.attr_set.subset_of:
+            self._get_real_attr().set_request()
+
+    def set_reply(self):
+        self.reply = True
+        if self.attr_set.subset_of:
+            self._get_real_attr().set_reply()
+
     def get_limit(self, limit, default=None):
         value = self.checks.get(limit, default)
         if value is None:
@@ -106,6 +120,10 @@ class Type(SpecAttr):
             enum_name = f"{self.attr_set.name_prefix}{self.name}"
         self.enum_name = c_upper(enum_name)
 
+        if self.attr_set.subset_of:
+            if self.checks != self._get_real_attr().checks:
+                raise Exception("Overriding checks not supported by codegen, yet")
+
     def is_multi_val(self):
         return None
 
@@ -801,6 +819,8 @@ class EnumSet(SpecEnumSet):
             self.user_type = 'int'
 
         self.value_pfx = yaml.get('name-prefix', f"{family.ident_name}-{yaml['name']}-")
+        self.header = yaml.get('header', None)
+        self.enum_cnt_name = yaml.get('enum-cnt-name', None)
 
         super().__init__(family, yaml)
 
@@ -1117,17 +1137,17 @@ class Family(SpecFamily):
         for _, struct in self.pure_nested_structs.items():
             if struct.request:
                 for _, arg in struct.member_list():
-                    arg.request = True
+                    arg.set_request()
             if struct.reply:
                 for _, arg in struct.member_list():
-                    arg.reply = True
+                    arg.set_reply()
 
         for root_set, rs_members in self.root_sets.items():
             for attr, spec in self.attr_sets[root_set].items():
                 if attr in rs_members['request']:
-                    spec.request = True
+                    spec.set_request()
                 if attr in rs_members['reply']:
-                    spec.reply = True
+                    spec.set_reply()
 
     def _load_global_policy(self):
         global_set = set()
@@ -1763,7 +1783,14 @@ def parse_rsp_nested(ri, struct):
                   f'{struct.ptr_name}dst = yarg->data;']
     init_lines = []
 
-    _multi_parse(ri, struct, init_lines, local_vars)
+    if struct.member_list():
+        _multi_parse(ri, struct, init_lines, local_vars)
+    else:
+        # Empty nest
+        ri.cw.block_start()
+        ri.cw.p('return 0;')
+        ri.cw.block_end()
+        ri.cw.nl()
 
 
 def parse_rsp_msg(ri, deref=False):
@@ -2427,6 +2454,87 @@ def uapi_enum_start(family, cw, obj, ckey='', enum_name='enum-name'):
     cw.block_start(line=start_line)
 
 
+def render_uapi_unified(family, cw, max_by_define, separate_ntf):
+    max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX"))
+    cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX"))
+    max_value = f"({cnt_name} - 1)"
+
+    uapi_enum_start(family, cw, family['operations'], 'enum-name')
+    val = 0
+    for op in family.msgs.values():
+        if separate_ntf and ('notify' in op or 'event' in op):
+            continue
+
+        suffix = ','
+        if op.value != val:
+            suffix = f" = {op.value},"
+            val = op.value
+        cw.p(op.enum_name + suffix)
+        val += 1
+    cw.nl()
+    cw.p(cnt_name + ('' if max_by_define else ','))
+    if not max_by_define:
+        cw.p(f"{max_name} = {max_value}")
+    cw.block_end(line=';')
+    if max_by_define:
+        cw.p(f"#define {max_name} {max_value}")
+    cw.nl()
+
+
+def render_uapi_directional(family, cw, max_by_define):
+    max_name = f"{family.op_prefix}USER_MAX"
+    cnt_name = f"__{family.op_prefix}USER_CNT"
+    max_value = f"({cnt_name} - 1)"
+
+    cw.block_start(line='enum')
+    cw.p(c_upper(f'{family.name}_MSG_USER_NONE = 0,'))
+    val = 0
+    for op in family.msgs.values():
+        if 'do' in op and 'event' not in op:
+            suffix = ','
+            if op.value and op.value != val:
+                suffix = f" = {op.value},"
+                val = op.value
+            cw.p(op.enum_name + suffix)
+            val += 1
+    cw.nl()
+    cw.p(cnt_name + ('' if max_by_define else ','))
+    if not max_by_define:
+        cw.p(f"{max_name} = {max_value}")
+    cw.block_end(line=';')
+    if max_by_define:
+        cw.p(f"#define {max_name} {max_value}")
+    cw.nl()
+
+    max_name = f"{family.op_prefix}KERNEL_MAX"
+    cnt_name = f"__{family.op_prefix}KERNEL_CNT"
+    max_value = f"({cnt_name} - 1)"
+
+    cw.block_start(line='enum')
+    cw.p(c_upper(f'{family.name}_MSG_KERNEL_NONE = 0,'))
+    val = 0
+    for op in family.msgs.values():
+        if ('do' in op and 'reply' in op['do']) or 'notify' in op or 'event' in op:
+            enum_name = op.enum_name
+            if 'event' not in op and 'notify' not in op:
+                enum_name = f'{enum_name}_REPLY'
+
+            suffix = ','
+            if op.value and op.value != val:
+                suffix = f" = {op.value},"
+                val = op.value
+            cw.p(enum_name + suffix)
+            val += 1
+    cw.nl()
+    cw.p(cnt_name + ('' if max_by_define else ','))
+    if not max_by_define:
+        cw.p(f"{max_name} = {max_value}")
+    cw.block_end(line=';')
+    if max_by_define:
+        cw.p(f"#define {max_name} {max_value}")
+    cw.nl()
+
+
 def render_uapi(family, cw):
     hdr_prot = f"_UAPI_LINUX_{c_upper(family.uapi_header_name)}_H"
     hdr_prot = hdr_prot.replace('/', '_')
@@ -2450,6 +2558,9 @@ def render_uapi(family, cw):
         if const['type'] == 'enum' or const['type'] == 'flags':
             enum = family.consts[const['name']]
 
+            if enum.header:
+                continue
+
             if enum.has_doc():
                 if enum.has_entry_doc():
                     cw.p('/**')
@@ -2482,9 +2593,12 @@ def render_uapi(family, cw):
                     max_val = f' = {enum.get_mask()},'
                     cw.p(max_name + max_val)
                 else:
+                    cnt_name = enum.enum_cnt_name
                     max_name = c_upper(name_pfx + 'max')
-                    cw.p('__' + max_name + ',')
-                    cw.p(max_name + ' = (__' + max_name + ' - 1)')
+                    if not cnt_name:
+                        cnt_name = '__' + name_pfx + 'max'
+                    cw.p(c_upper(cnt_name) + ',')
+                    cw.p(max_name + ' = (' + c_upper(cnt_name) + ' - 1)')
             cw.block_end(line=';')
             cw.nl()
         elif const['type'] == 'const':
@@ -2513,7 +2627,8 @@ def render_uapi(family, cw):
                 val = attr.value
             val += 1
             cw.p(attr.enum_name + suffix)
-        cw.nl()
+        if attr_set.items():
+            cw.nl()
         cw.p(attr_set.cnt_name + ('' if max_by_define else ','))
         if not max_by_define:
             cw.p(f"{attr_set.max_name} = {max_value}")
@@ -2525,30 +2640,12 @@ def render_uapi(family, cw):
     # Commands
     separate_ntf = 'async-prefix' in family['operations']
 
-    max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX"))
-    cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX"))
-    max_value = f"({cnt_name} - 1)"
-
-    uapi_enum_start(family, cw, family['operations'], 'enum-name')
-    val = 0
-    for op in family.msgs.values():
-        if separate_ntf and ('notify' in op or 'event' in op):
-            continue
-
-        suffix = ','
-        if op.value != val:
-            suffix = f" = {op.value},"
-            val = op.value
-        cw.p(op.enum_name + suffix)
-        val += 1
-    cw.nl()
-    cw.p(cnt_name + ('' if max_by_define else ','))
-    if not max_by_define:
-        cw.p(f"{max_name} = {max_value}")
-    cw.block_end(line=';')
-    if max_by_define:
-        cw.p(f"#define {max_name} {max_value}")
-    cw.nl()
+    if family.msg_id_model == 'unified':
+        render_uapi_unified(family, cw, max_by_define, separate_ntf)
+    elif family.msg_id_model == 'directional':
+        render_uapi_directional(family, cw, max_by_define)
+    else:
+        raise Exception(f'Unsupported message enum-model {family.msg_id_model}')
 
     if separate_ntf:
         uapi_enum_start(family, cw, family['operations'], enum_name='async-enum')
@@ -2645,7 +2742,8 @@ def find_kernel_root(full_path):
 
 def main():
     parser = argparse.ArgumentParser(description='Netlink simple parsing generator')
-    parser.add_argument('--mode', dest='mode', type=str, required=True)
+    parser.add_argument('--mode', dest='mode', type=str, required=True,
+                        choices=('user', 'kernel', 'uapi'))
     parser.add_argument('--spec', dest='spec', type=str, required=True)
     parser.add_argument('--header', dest='header', action='store_true', default=None)
     parser.add_argument('--source', dest='header', action='store_false')
@@ -2672,13 +2770,6 @@ def main():
         os.sys.exit(1)
         return
 
-    supported_models = ['unified']
-    if args.mode in ['user', 'kernel']:
-        supported_models += ['directional']
-    if parsed.msg_id_model not in supported_models:
-        print(f'Message enum-model {parsed.msg_id_model} not supported for {args.mode} generation')
-        os.sys.exit(1)
-
     cw = CodeWriter(BaseNlLib(), args.out_file, overwrite=(not args.cmp_out))
 
     _, spec_kernel = find_kernel_root(args.spec)
@@ -2706,7 +2797,10 @@ def main():
         cw.p('#define ' + hdr_prot)
         cw.nl()
 
-    hdr_file=os.path.basename(args.out_file[:-2]) + ".h"
+    if args.out_file:
+        hdr_file = os.path.basename(args.out_file[:-2]) + ".h"
+    else:
+        hdr_file = "generated_header_file.h"
 
     if args.mode == 'kernel':
         cw.p('#include <net/netlink.h>')
@@ -2728,12 +2822,17 @@ def main():
         else:
             cw.p(f'#include "{hdr_file}"')
             cw.p('#include "ynl.h"')
-        headers = [parsed.uapi_header]
+        headers = []
     for definition in parsed['definitions']:
         if 'header' in definition:
             headers.append(definition['header'])
+    if args.mode == 'user':
+        headers.append(parsed.uapi_header)
+    seen_header = []
     for one in headers:
-        cw.p(f"#include <{one}>")
+        if one not in seen_header:
+            cw.p(f"#include <{one}>")
+            seen_header.append(one)
     cw.nl()
 
     if args.mode == "user":
diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py
index 6c56d0d726b4..6c56d0d726b4 100755
--- a/tools/net/ynl/ynl-gen-rst.py
+++ b/tools/net/ynl/pyynl/ynl_gen_rst.py
diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh
index a37304dcc88e..81b4ecd89100 100755
--- a/tools/net/ynl/ynl-regen.sh
+++ b/tools/net/ynl/ynl-regen.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 
-TOOL=$(dirname $(realpath $0))/ynl-gen-c.py
+TOOL=$(dirname $(realpath $0))/pyynl/ynl_gen_c.py
 
 force=
 search=
diff --git a/tools/objtool/Documentation/objtool.txt b/tools/objtool/Documentation/objtool.txt
index 7c3ee959b63c..28ac57b9e102 100644
--- a/tools/objtool/Documentation/objtool.txt
+++ b/tools/objtool/Documentation/objtool.txt
@@ -28,6 +28,15 @@ Objtool has the following features:
   sites, enabling the kernel to patch them inline, to prevent "thunk
   funneling" for both security and performance reasons
 
+- Return thunk validation -- validates return thunks are used for
+  certain CPU mitigations including Retbleed and SRSO
+
+- Return thunk annotation -- annotates all return thunk sites so kernel
+  can patch them inline, depending on enabled mitigations
+
+- Return thunk training valiation -- validate that all entry paths
+  untrain a "safe return" before the first return (or call)
+
 - Non-instrumentation validation -- validates non-instrumentable
   ("noinstr") code rules, preventing instrumentation in low-level C
   entry code
@@ -53,6 +62,9 @@ Objtool has the following features:
 - Function entry annotation -- annotates function entries, enabling
   kernel function tracing
 
+- Function preamble (prefix) annotation and/or symbol generation -- used
+  for FineIBT and call depth tracking
+
 - Other toolchain hacks which will go unmentioned at this time...
 
 Each feature can be enabled individually or in combination using the
@@ -197,19 +209,17 @@ To achieve the validation, objtool enforces the following rules:
 
 1. Each callable function must be annotated as such with the ELF
    function type.  In asm code, this is typically done using the
-   ENTRY/ENDPROC macros.  If objtool finds a return instruction
+   SYM_FUNC_{START,END} macros.  If objtool finds a return instruction
    outside of a function, it flags an error since that usually indicates
    callable code which should be annotated accordingly.
 
    This rule is needed so that objtool can properly identify each
    callable function in order to analyze its stack metadata.
 
-2. Conversely, each section of code which is *not* callable should *not*
-   be annotated as an ELF function.  The ENDPROC macro shouldn't be used
-   in this case.
-
-   This rule is needed so that objtool can ignore non-callable code.
-   Such code doesn't have to follow any of the other rules.
+2. Conversely, each section of code which is *not* callable, or is
+   otherwise doing funny things with the stack or registers, should
+   *not* be annotated as an ELF function.  Rather, SYM_CODE_{START,END}
+   should be used along with unwind hints.
 
 3. Each callable function which calls another function must have the
    correct frame pointer logic, if required by CONFIG_FRAME_POINTER or
@@ -221,7 +231,7 @@ To achieve the validation, objtool enforces the following rules:
    function B, the _caller_ of function A will be skipped on the stack
    trace.
 
-4. Dynamic jumps and jumps to undefined symbols are only allowed if:
+4. Indirect jumps and jumps to undefined symbols are only allowed if:
 
    a) the jump is part of a switch statement; or
 
@@ -304,29 +314,29 @@ the objtool maintainers.
    001e    2823e:  80 ce 02                or     $0x2,%dh
    ...
 
+
 2. file.o: warning: objtool: .text+0x53: unreachable instruction
 
    Objtool couldn't find a code path to reach the instruction.
 
    If the error is for an asm file, and the instruction is inside (or
    reachable from) a callable function, the function should be annotated
-   with the ENTRY/ENDPROC macros (ENDPROC is the important one).
-   Otherwise, the code should probably be annotated with the unwind hint
-   macros in asm/unwind_hints.h so objtool and the unwinder can know the
-   stack state associated with the code.
+   with the SYM_FUNC_START and SYM_FUNC_END macros.
 
-   If you're 100% sure the code won't affect stack traces, or if you're
-   a just a bad person, you can tell objtool to ignore it.  See the
-   "Adding exceptions" section below.
+   Otherwise, SYM_CODE_START can be used.  In that case the code needs
+   to be annotated with unwind hint macros.
 
-   If it's not actually in a callable function (e.g. kernel entry code),
-   change ENDPROC to END.
+   If you're sure the code won't affect the reliability of runtime stack
+   traces and want objtool to ignore it, see "Adding exceptions" below.
 
-3. file.o: warning: objtool: foo+0x48c: bar() is missing a __noreturn annotation
 
-   The call from foo() to bar() doesn't return, but bar() is missing the
-   __noreturn annotation.  NOTE: In addition to annotating the function
-   with __noreturn, please also add it to tools/objtool/noreturns.h.
+3. file.o: warning: objtool: foo+0x48c: bar() missing __noreturn in .c/.h or NORETURN() in noreturns.h
+
+   The call from foo() to bar() doesn't return, but bar() is incorrectly
+   annotated.  A noreturn function must be marked __noreturn in both its
+   declaration and its definition, and must have a NORETURN() annotation
+   in tools/objtool/noreturns.h.
+
 
 4. file.o: warning: objtool: func(): can't find starting instruction
    or
@@ -341,23 +351,21 @@ the objtool maintainers.
 
    This is a kernel entry/exit instruction like sysenter or iret.  Such
    instructions aren't allowed in a callable function, and are most
-   likely part of the kernel entry code.  They should usually not have
-   the callable function annotation (ENDPROC) and should always be
-   annotated with the unwind hint macros in asm/unwind_hints.h.
+   likely part of the kernel entry code.  Such code should probably be
+   placed in a SYM_FUNC_CODE block with unwind hints.
 
 
 6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame
 
-   This is a dynamic jump or a jump to an undefined symbol.  Objtool
-   assumed it's a sibling call and detected that the frame pointer
-   wasn't first restored to its original state.
+   This is a branch to an UNDEF symbol.  Objtool assumed it's a
+   sibling call and detected that the stack wasn't first restored to its
+   original state.
 
-   If it's not really a sibling call, you may need to move the
-   destination code to the local file.
+   If it's not really a sibling call, you may need to use unwind hints
+   and/or move the destination code to the local file.
 
    If the instruction is not actually in a callable function (e.g.
-   kernel entry code), change ENDPROC to END and annotate manually with
-   the unwind hint macros in asm/unwind_hints.h.
+   kernel entry code), use SYM_CODE_{START,END} and unwind hints.
 
 
 7. file: warning: objtool: func()+0x5c: stack state mismatch
@@ -373,8 +381,8 @@ the objtool maintainers.
 
    Another possibility is that the code has some asm or inline asm which
    does some unusual things to the stack or the frame pointer.  In such
-   cases it's probably appropriate to use the unwind hint macros in
-   asm/unwind_hints.h.
+   cases it's probably appropriate to use SYM_FUNC_CODE with unwind
+   hints.
 
 
 8. file.o: warning: objtool: funcA() falls through to next function funcB()
@@ -384,17 +392,16 @@ the objtool maintainers.
    can fall through into the next function.  There could be different
    reasons for this:
 
-   1) funcA()'s last instruction is a call to a "noreturn" function like
+   a) funcA()'s last instruction is a call to a "noreturn" function like
       panic().  In this case the noreturn function needs to be added to
       objtool's hard-coded global_noreturns array.  Feel free to bug the
       objtool maintainer, or you can submit a patch.
 
-   2) funcA() uses the unreachable() annotation in a section of code
+   b) funcA() uses the unreachable() annotation in a section of code
       that is actually reachable.
 
-   3) If funcA() calls an inline function, the object code for funcA()
-      might be corrupt due to a gcc bug.  For more details, see:
-      https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
+   c) Some undefined behavior like divide by zero.
+
 
 9. file.o: warning: objtool: funcA() call to funcB() with UACCESS enabled
 
@@ -432,24 +439,26 @@ the objtool maintainers.
     This limitation can be overcome by massaging the alternatives with
     NOPs to shift the stack changes around so they no longer conflict.
 
+
 11. file.o: warning: unannotated intra-function call
 
-   This warning means that a direct call is done to a destination which
-   is not at the beginning of a function. If this is a legit call, you
-   can remove this warning by putting the ANNOTATE_INTRA_FUNCTION_CALL
-   directive right before the call.
+    This warning means that a direct call is done to a destination which
+    is not at the beginning of a function. If this is a legit call, you
+    can remove this warning by putting the ANNOTATE_INTRA_FUNCTION_CALL
+    directive right before the call.
+
 
 12. file.o: warning: func(): not an indirect call target
 
-   This means that objtool is running with --ibt and a function expected
-   to be an indirect call target is not. In particular, this happens for
-   init_module() or cleanup_module() if a module relies on these special
-   names and does not use module_init() / module_exit() macros to create
-   them.
+    This means that objtool is running with --ibt and a function
+    expected to be an indirect call target is not. In particular, this
+    happens for init_module() or cleanup_module() if a module relies on
+    these special names and does not use module_init() / module_exit()
+    macros to create them.
 
 
 If the error doesn't seem to make sense, it could be a bug in objtool.
-Feel free to ask the objtool maintainer for help.
+Feel free to ask objtool maintainers for help.
 
 
 Adding exceptions
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index f56e27727534..8c20361dd100 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -37,7 +37,7 @@ OBJTOOL_CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBE
 OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
 
 # Allow old libelf to be used:
-elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(HOSTCC) $(OBJTOOL_CFLAGS) -x c -E - | grep elf_getshdr)
+elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(HOSTCC) $(OBJTOOL_CFLAGS) -x c -E - 2>/dev/null | grep elf_getshdr)
 OBJTOOL_CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
 
 # Always want host compilation.
@@ -46,12 +46,6 @@ HOST_OVERRIDES := CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)"
 AWK = awk
 MKDIR = mkdir
 
-ifeq ($(V),1)
-  Q =
-else
-  Q = @
-endif
-
 BUILD_ORC := n
 
 ifeq ($(SRCARCH),x86)
diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c
index 69b66994f2a1..02e490555966 100644
--- a/tools/objtool/arch/loongarch/decode.c
+++ b/tools/objtool/arch/loongarch/decode.c
@@ -5,10 +5,7 @@
 #include <asm/inst.h>
 #include <asm/orc_types.h>
 #include <linux/objtool_types.h>
-
-#ifndef EM_LOONGARCH
-#define EM_LOONGARCH	258
-#endif
+#include <arch/elf.h>
 
 int arch_ftrace_match(char *name)
 {
@@ -363,3 +360,26 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state)
 	state->cfa.base = CFI_SP;
 	state->cfa.offset = 0;
 }
+
+unsigned int arch_reloc_size(struct reloc *reloc)
+{
+	switch (reloc_type(reloc)) {
+	case R_LARCH_32:
+	case R_LARCH_32_PCREL:
+		return 4;
+	default:
+		return 8;
+	}
+}
+
+unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table)
+{
+	switch (reloc_type(reloc)) {
+	case R_LARCH_32_PCREL:
+	case R_LARCH_64_PCREL:
+		return reloc->sym->offset + reloc_addend(reloc) -
+		       (reloc_offset(reloc) - reloc_offset(table));
+	default:
+		return reloc->sym->offset + reloc_addend(reloc);
+	}
+}
diff --git a/tools/objtool/arch/loongarch/include/arch/elf.h b/tools/objtool/arch/loongarch/include/arch/elf.h
index 9623d663220e..ec79062c9554 100644
--- a/tools/objtool/arch/loongarch/include/arch/elf.h
+++ b/tools/objtool/arch/loongarch/include/arch/elf.h
@@ -18,6 +18,13 @@
 #ifndef R_LARCH_32_PCREL
 #define R_LARCH_32_PCREL	99
 #endif
+#ifndef R_LARCH_64_PCREL
+#define R_LARCH_64_PCREL	109
+#endif
+
+#ifndef EM_LOONGARCH
+#define EM_LOONGARCH		258
+#endif
 
 #define R_NONE			R_LARCH_NONE
 #define R_ABS32			R_LARCH_32
diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c
index 9bba1e9318e0..e39f86d97002 100644
--- a/tools/objtool/arch/loongarch/special.c
+++ b/tools/objtool/arch/loongarch/special.c
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
+#include <string.h>
 #include <objtool/special.h>
+#include <objtool/warn.h>
 
 bool arch_support_alt_relocation(struct special_alt *special_alt,
 				 struct instruction *insn,
@@ -8,8 +10,164 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
 	return false;
 }
 
-struct reloc *arch_find_switch_table(struct objtool_file *file,
-				     struct instruction *insn)
+struct table_info {
+	struct list_head jump_info;
+	unsigned long insn_offset;
+	unsigned long rodata_offset;
+};
+
+static void get_rodata_table_size_by_table_annotate(struct objtool_file *file,
+						    struct instruction *insn,
+						    unsigned long *table_size)
+{
+	struct section *rsec;
+	struct reloc *reloc;
+	struct list_head table_list;
+	struct table_info *orig_table;
+	struct table_info *next_table;
+	unsigned long tmp_insn_offset;
+	unsigned long tmp_rodata_offset;
+
+	rsec = find_section_by_name(file->elf, ".rela.discard.tablejump_annotate");
+	if (!rsec)
+		return;
+
+	INIT_LIST_HEAD(&table_list);
+
+	for_each_reloc(rsec, reloc) {
+		orig_table = malloc(sizeof(struct table_info));
+		if (!orig_table) {
+			WARN("malloc failed");
+			return;
+		}
+
+		orig_table->insn_offset = reloc->sym->offset + reloc_addend(reloc);
+		reloc++;
+		orig_table->rodata_offset = reloc->sym->offset + reloc_addend(reloc);
+
+		list_add_tail(&orig_table->jump_info, &table_list);
+
+		if (reloc_idx(reloc) + 1 == sec_num_entries(rsec))
+			break;
+	}
+
+	list_for_each_entry(orig_table, &table_list, jump_info) {
+		next_table = list_next_entry(orig_table, jump_info);
+		list_for_each_entry_from(next_table, &table_list, jump_info) {
+			if (next_table->rodata_offset < orig_table->rodata_offset) {
+				tmp_insn_offset = next_table->insn_offset;
+				tmp_rodata_offset = next_table->rodata_offset;
+				next_table->insn_offset = orig_table->insn_offset;
+				next_table->rodata_offset = orig_table->rodata_offset;
+				orig_table->insn_offset = tmp_insn_offset;
+				orig_table->rodata_offset = tmp_rodata_offset;
+			}
+		}
+	}
+
+	list_for_each_entry(orig_table, &table_list, jump_info) {
+		if (insn->offset == orig_table->insn_offset) {
+			next_table = list_next_entry(orig_table, jump_info);
+			if (&next_table->jump_info == &table_list) {
+				*table_size = 0;
+				return;
+			}
+
+			while (next_table->rodata_offset == orig_table->rodata_offset) {
+				next_table = list_next_entry(next_table, jump_info);
+				if (&next_table->jump_info == &table_list) {
+					*table_size = 0;
+					return;
+				}
+			}
+
+			*table_size = next_table->rodata_offset - orig_table->rodata_offset;
+		}
+	}
+}
+
+static struct reloc *find_reloc_by_table_annotate(struct objtool_file *file,
+						  struct instruction *insn,
+						  unsigned long *table_size)
+{
+	struct section *rsec;
+	struct reloc *reloc;
+	unsigned long offset;
+
+	rsec = find_section_by_name(file->elf, ".rela.discard.tablejump_annotate");
+	if (!rsec)
+		return NULL;
+
+	for_each_reloc(rsec, reloc) {
+		if (reloc->sym->sec->rodata)
+			continue;
+
+		if (strcmp(insn->sec->name, reloc->sym->sec->name))
+			continue;
+
+		offset = reloc->sym->offset + reloc_addend(reloc);
+		if (insn->offset == offset) {
+			get_rodata_table_size_by_table_annotate(file, insn, table_size);
+			reloc++;
+			return reloc;
+		}
+	}
+
+	return NULL;
+}
+
+static struct reloc *find_reloc_of_rodata_c_jump_table(struct section *sec,
+						       unsigned long offset,
+						       unsigned long *table_size)
 {
+	struct section *rsec;
+	struct reloc *reloc;
+
+	rsec = sec->rsec;
+	if (!rsec)
+		return NULL;
+
+	for_each_reloc(rsec, reloc) {
+		if (reloc_offset(reloc) > offset)
+			break;
+
+		if (!strcmp(reloc->sym->sec->name, C_JUMP_TABLE_SECTION)) {
+			*table_size = 0;
+			return reloc;
+		}
+	}
+
 	return NULL;
 }
+
+struct reloc *arch_find_switch_table(struct objtool_file *file,
+				     struct instruction *insn,
+				     unsigned long *table_size)
+{
+	struct reloc *annotate_reloc;
+	struct reloc *rodata_reloc;
+	struct section *table_sec;
+	unsigned long table_offset;
+
+	annotate_reloc = find_reloc_by_table_annotate(file, insn, table_size);
+	if (!annotate_reloc) {
+		annotate_reloc = find_reloc_of_rodata_c_jump_table(
+				 insn->sec, insn->offset, table_size);
+		if (!annotate_reloc)
+			return NULL;
+	}
+
+	table_sec = annotate_reloc->sym->sec;
+	table_offset = annotate_reloc->sym->offset + reloc_addend(annotate_reloc);
+
+	/*
+	 * Each table entry has a rela associated with it.  The rela
+	 * should reference text in the same function as the original
+	 * instruction.
+	 */
+	rodata_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset);
+	if (!rodata_reloc)
+		return NULL;
+
+	return rodata_reloc;
+}
diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
index 53b55690f320..7c0bf2429067 100644
--- a/tools/objtool/arch/powerpc/decode.c
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -106,3 +106,17 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state)
 	state->regs[CFI_RA].base = CFI_CFA;
 	state->regs[CFI_RA].offset = 0;
 }
+
+unsigned int arch_reloc_size(struct reloc *reloc)
+{
+	switch (reloc_type(reloc)) {
+	case R_PPC_REL32:
+	case R_PPC_ADDR32:
+	case R_PPC_UADDR32:
+	case R_PPC_PLT32:
+	case R_PPC_PLTREL32:
+		return 4;
+	default:
+		return 8;
+	}
+}
diff --git a/tools/objtool/arch/powerpc/special.c b/tools/objtool/arch/powerpc/special.c
index d33868147196..51610689abf7 100644
--- a/tools/objtool/arch/powerpc/special.c
+++ b/tools/objtool/arch/powerpc/special.c
@@ -13,7 +13,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
 }
 
 struct reloc *arch_find_switch_table(struct objtool_file *file,
-				    struct instruction *insn)
+				     struct instruction *insn,
+				     unsigned long *table_size)
 {
 	exit(-1);
 }
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index fe1362c34564..7567c893f45e 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -850,5 +850,19 @@ bool arch_is_rethunk(struct symbol *sym)
 bool arch_is_embedded_insn(struct symbol *sym)
 {
 	return !strcmp(sym->name, "retbleed_return_thunk") ||
+	       !strcmp(sym->name, "srso_alias_safe_ret") ||
 	       !strcmp(sym->name, "srso_safe_ret");
 }
+
+unsigned int arch_reloc_size(struct reloc *reloc)
+{
+	switch (reloc_type(reloc)) {
+	case R_X86_64_32:
+	case R_X86_64_32S:
+	case R_X86_64_PC32:
+	case R_X86_64_PLT32:
+		return 4;
+	default:
+		return 8;
+	}
+}
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index 4ea0f9815fda..9c1c9df09aaa 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -109,7 +109,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
  *    NOTE: MITIGATION_RETPOLINE made it harder still to decode dynamic jumps.
  */
 struct reloc *arch_find_switch_table(struct objtool_file *file,
-				    struct instruction *insn)
+				     struct instruction *insn,
+				     unsigned long *table_size)
 {
 	struct reloc  *text_reloc, *rodata_reloc;
 	struct section *table_sec;
@@ -158,5 +159,6 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
 	if (reloc_type(text_reloc) == R_X86_64_PC32)
 		file->ignore_unreachables = true;
 
+	*table_size = 0;
 	return rodata_reloc;
 }
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 387d56a7f5fb..5f761f420b8c 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -6,6 +6,10 @@
 #include <subcmd/parse-options.h>
 #include <string.h>
 #include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/sendfile.h>
 #include <objtool/builtin.h>
 #include <objtool/objtool.h>
 
@@ -14,6 +18,8 @@
 		"error: objtool: " format "\n",		\
 		##__VA_ARGS__)
 
+const char *objname;
+
 struct opts opts;
 
 static const char * const check_usage[] = {
@@ -71,7 +77,7 @@ static const struct option check_options[] = {
 	OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
 	OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
 	OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
-	OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"),
+	OPT_BOOLEAN(0,   "orc", &opts.orc, "generate ORC metadata"),
 	OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
 	OPT_BOOLEAN(0,   "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
 	OPT_BOOLEAN(0,   "unret", &opts.unret, "validate entry unret placement"),
@@ -84,16 +90,17 @@ static const struct option check_options[] = {
 	OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
 
 	OPT_GROUP("Options:"),
-	OPT_BOOLEAN(0, "backtrace", &opts.backtrace, "unwind on error"),
-	OPT_BOOLEAN(0, "backup", &opts.backup, "create .orig files before modification"),
-	OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"),
-	OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"),
-	OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"),
-	OPT_BOOLEAN(0, "mnop", &opts.mnop, "nop out mcount call sites"),
-	OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
-	OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"),
-	OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"),
+	OPT_BOOLEAN(0,   "backtrace", &opts.backtrace, "unwind on error"),
+	OPT_BOOLEAN(0,   "dry-run", &opts.dryrun, "don't write modifications"),
+	OPT_BOOLEAN(0,   "link", &opts.link, "object is a linked object"),
+	OPT_BOOLEAN(0,   "module", &opts.module, "object is part of a kernel module"),
+	OPT_BOOLEAN(0,   "mnop", &opts.mnop, "nop out mcount call sites"),
+	OPT_BOOLEAN(0,   "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
+	OPT_STRING('o',  "output", &opts.output, "file", "output file name"),
+	OPT_BOOLEAN(0,   "sec-address", &opts.sec_address, "print section addresses in warnings"),
+	OPT_BOOLEAN(0,   "stats", &opts.stats, "print statistics"),
 	OPT_BOOLEAN('v', "verbose", &opts.verbose, "verbose warnings"),
+	OPT_BOOLEAN(0,   "Werror", &opts.werror, "return error on warnings"),
 
 	OPT_END(),
 };
@@ -131,6 +138,26 @@ int cmd_parse_options(int argc, const char **argv, const char * const usage[])
 
 static bool opts_valid(void)
 {
+	if (opts.mnop && !opts.mcount) {
+		ERROR("--mnop requires --mcount");
+		return false;
+	}
+
+	if (opts.noinstr && !opts.link) {
+		ERROR("--noinstr requires --link");
+		return false;
+	}
+
+	if (opts.ibt && !opts.link) {
+		ERROR("--ibt requires --link");
+		return false;
+	}
+
+	if (opts.unret && !opts.link) {
+		ERROR("--unret requires --link");
+		return false;
+	}
+
 	if (opts.hack_jump_label	||
 	    opts.hack_noinstr		||
 	    opts.ibt			||
@@ -151,11 +178,6 @@ static bool opts_valid(void)
 		return true;
 	}
 
-	if (opts.unret && !opts.rethunk) {
-		ERROR("--unret requires --rethunk");
-		return false;
-	}
-
 	if (opts.dump_orc)
 		return true;
 
@@ -163,76 +185,156 @@ static bool opts_valid(void)
 	return false;
 }
 
-static bool mnop_opts_valid(void)
+static int copy_file(const char *src, const char *dst)
 {
-	if (opts.mnop && !opts.mcount) {
-		ERROR("--mnop requires --mcount");
-		return false;
+	size_t to_copy, copied;
+	int dst_fd, src_fd;
+	struct stat stat;
+	off_t offset = 0;
+
+	src_fd = open(src, O_RDONLY);
+	if (src_fd == -1) {
+		ERROR("can't open '%s' for reading", src);
+		return 1;
 	}
 
-	return true;
-}
-
-static bool link_opts_valid(struct objtool_file *file)
-{
-	if (opts.link)
-		return true;
+	dst_fd = open(dst, O_WRONLY | O_CREAT | O_TRUNC, 0400);
+	if (dst_fd == -1) {
+		ERROR("can't open '%s' for writing", dst);
+		return 1;
+	}
 
-	if (has_multiple_files(file->elf)) {
-		ERROR("Linked object detected, forcing --link");
-		opts.link = true;
-		return true;
+	if (fstat(src_fd, &stat) == -1) {
+		perror("fstat");
+		return 1;
 	}
 
-	if (opts.noinstr) {
-		ERROR("--noinstr requires --link");
-		return false;
+	if (fchmod(dst_fd, stat.st_mode) == -1) {
+		perror("fchmod");
+		return 1;
 	}
 
-	if (opts.ibt) {
-		ERROR("--ibt requires --link");
-		return false;
+	for (to_copy = stat.st_size; to_copy > 0; to_copy -= copied) {
+		copied = sendfile(dst_fd, src_fd, &offset, to_copy);
+		if (copied == -1) {
+			perror("sendfile");
+			return 1;
+		}
 	}
 
-	if (opts.unret) {
-		ERROR("--unret requires --link");
-		return false;
+	close(dst_fd);
+	close(src_fd);
+	return 0;
+}
+
+static char **save_argv(int argc, const char **argv)
+{
+	char **orig_argv;
+
+	orig_argv = calloc(argc, sizeof(char *));
+	if (!orig_argv) {
+		perror("calloc");
+		return NULL;
 	}
 
-	return true;
+	for (int i = 0; i < argc; i++) {
+		orig_argv[i] = strdup(argv[i]);
+		if (!orig_argv[i]) {
+			perror("strdup");
+			return NULL;
+		}
+	};
+
+	return orig_argv;
 }
 
+#define ORIG_SUFFIX ".orig"
+
 int objtool_run(int argc, const char **argv)
 {
-	const char *objname;
 	struct objtool_file *file;
-	int ret;
+	char *backup = NULL;
+	char **orig_argv;
+	int ret = 0;
 
-	argc = cmd_parse_options(argc, argv, check_usage);
-	objname = argv[0];
+	orig_argv = save_argv(argc, argv);
+	if (!orig_argv)
+		return 1;
+
+	cmd_parse_options(argc, argv, check_usage);
 
 	if (!opts_valid())
 		return 1;
 
+	objname = argv[0];
+
 	if (opts.dump_orc)
 		return orc_dump(objname);
 
+	if (!opts.dryrun && opts.output) {
+		/* copy original .o file to output file */
+		if (copy_file(objname, opts.output))
+			return 1;
+
+		/* from here on, work directly on the output file */
+		objname = opts.output;
+	}
+
 	file = objtool_open_read(objname);
 	if (!file)
-		return 1;
+		goto err;
 
-	if (!mnop_opts_valid())
-		return 1;
-
-	if (!link_opts_valid(file))
-		return 1;
+	if (!opts.link && has_multiple_files(file->elf)) {
+		ERROR("Linked object requires --link");
+		goto err;
+	}
 
 	ret = check(file);
 	if (ret)
-		return ret;
+		goto err;
 
-	if (file->elf->changed)
-		return elf_write(file->elf);
+	if (!opts.dryrun && file->elf->changed && elf_write(file->elf))
+		goto err;
 
 	return 0;
+
+err:
+	if (opts.dryrun)
+		goto err_msg;
+
+	if (opts.output) {
+		unlink(opts.output);
+		goto err_msg;
+	}
+
+	/*
+	 * Make a backup before kbuild deletes the file so the error
+	 * can be recreated without recompiling or relinking.
+	 */
+	backup = malloc(strlen(objname) + strlen(ORIG_SUFFIX) + 1);
+	if (!backup) {
+		perror("malloc");
+		return 1;
+	}
+
+	strcpy(backup, objname);
+	strcat(backup, ORIG_SUFFIX);
+	if (copy_file(objname, backup))
+		return 1;
+
+err_msg:
+	fprintf(stderr, "%s", orig_argv[0]);
+
+	for (int i = 1; i < argc; i++) {
+		char *arg = orig_argv[i];
+
+		if (backup && !strcmp(arg, objname))
+			fprintf(stderr, " %s -o %s", backup, objname);
+		else
+			fprintf(stderr, " %s", arg);
+	}
+
+	fprintf(stderr, "\n");
+
+	return 1;
 }
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 76060da755b5..ca3435acc326 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -150,6 +150,15 @@ static inline struct reloc *insn_jump_table(struct instruction *insn)
 	return NULL;
 }
 
+static inline unsigned long insn_jump_table_size(struct instruction *insn)
+{
+	if (insn->type == INSN_JUMP_DYNAMIC ||
+	    insn->type == INSN_CALL_DYNAMIC)
+		return insn->_jump_table_size;
+
+	return 0;
+}
+
 static bool is_jump_table_jump(struct instruction *insn)
 {
 	struct alt_group *alt_group = insn->alt_group;
@@ -218,6 +227,7 @@ static bool is_rust_noreturn(const struct symbol *func)
 	       str_ends_with(func->name, "_4core9panicking18panic_bounds_check")			||
 	       str_ends_with(func->name, "_4core9panicking19assert_failed_inner")			||
 	       str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference")	||
+	       strstr(func->name, "_4core9panicking13assert_failed")					||
 	       strstr(func->name, "_4core9panicking11panic_const24panic_const_")			||
 	       (strstr(func->name, "_4core5slice5index24slice_") &&
 		str_ends_with(func->name, "_fail"));
@@ -614,108 +624,6 @@ static int init_pv_ops(struct objtool_file *file)
 	return 0;
 }
 
-static struct instruction *find_last_insn(struct objtool_file *file,
-					  struct section *sec)
-{
-	struct instruction *insn = NULL;
-	unsigned int offset;
-	unsigned int end = (sec->sh.sh_size > 10) ? sec->sh.sh_size - 10 : 0;
-
-	for (offset = sec->sh.sh_size - 1; offset >= end && !insn; offset--)
-		insn = find_insn(file, sec, offset);
-
-	return insn;
-}
-
-/*
- * Mark "ud2" instructions and manually annotated dead ends.
- */
-static int add_dead_ends(struct objtool_file *file)
-{
-	struct section *rsec;
-	struct reloc *reloc;
-	struct instruction *insn;
-	uint64_t offset;
-
-	/*
-	 * Check for manually annotated dead ends.
-	 */
-	rsec = find_section_by_name(file->elf, ".rela.discard.unreachable");
-	if (!rsec)
-		goto reachable;
-
-	for_each_reloc(rsec, reloc) {
-		if (reloc->sym->type == STT_SECTION) {
-			offset = reloc_addend(reloc);
-		} else if (reloc->sym->local_label) {
-			offset = reloc->sym->offset;
-		} else {
-			WARN("unexpected relocation symbol type in %s", rsec->name);
-			return -1;
-		}
-
-		insn = find_insn(file, reloc->sym->sec, offset);
-		if (insn)
-			insn = prev_insn_same_sec(file, insn);
-		else if (offset == reloc->sym->sec->sh.sh_size) {
-			insn = find_last_insn(file, reloc->sym->sec);
-			if (!insn) {
-				WARN("can't find unreachable insn at %s+0x%" PRIx64,
-				     reloc->sym->sec->name, offset);
-				return -1;
-			}
-		} else {
-			WARN("can't find unreachable insn at %s+0x%" PRIx64,
-			     reloc->sym->sec->name, offset);
-			return -1;
-		}
-
-		insn->dead_end = true;
-	}
-
-reachable:
-	/*
-	 * These manually annotated reachable checks are needed for GCC 4.4,
-	 * where the Linux unreachable() macro isn't supported.  In that case
-	 * GCC doesn't know the "ud2" is fatal, so it generates code as if it's
-	 * not a dead end.
-	 */
-	rsec = find_section_by_name(file->elf, ".rela.discard.reachable");
-	if (!rsec)
-		return 0;
-
-	for_each_reloc(rsec, reloc) {
-		if (reloc->sym->type == STT_SECTION) {
-			offset = reloc_addend(reloc);
-		} else if (reloc->sym->local_label) {
-			offset = reloc->sym->offset;
-		} else {
-			WARN("unexpected relocation symbol type in %s", rsec->name);
-			return -1;
-		}
-
-		insn = find_insn(file, reloc->sym->sec, offset);
-		if (insn)
-			insn = prev_insn_same_sec(file, insn);
-		else if (offset == reloc->sym->sec->sh.sh_size) {
-			insn = find_last_insn(file, reloc->sym->sec);
-			if (!insn) {
-				WARN("can't find reachable insn at %s+0x%" PRIx64,
-				     reloc->sym->sec->name, offset);
-				return -1;
-			}
-		} else {
-			WARN("can't find reachable insn at %s+0x%" PRIx64,
-			     reloc->sym->sec->name, offset);
-			return -1;
-		}
-
-		insn->dead_end = false;
-	}
-
-	return 0;
-}
-
 static int create_static_call_sections(struct objtool_file *file)
 {
 	struct static_call_site *site;
@@ -1310,40 +1218,6 @@ static void add_uaccess_safe(struct objtool_file *file)
 }
 
 /*
- * FIXME: For now, just ignore any alternatives which add retpolines.  This is
- * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
- * But it at least allows objtool to understand the control flow *around* the
- * retpoline.
- */
-static int add_ignore_alternatives(struct objtool_file *file)
-{
-	struct section *rsec;
-	struct reloc *reloc;
-	struct instruction *insn;
-
-	rsec = find_section_by_name(file->elf, ".rela.discard.ignore_alts");
-	if (!rsec)
-		return 0;
-
-	for_each_reloc(rsec, reloc) {
-		if (reloc->sym->type != STT_SECTION) {
-			WARN("unexpected relocation symbol type in %s", rsec->name);
-			return -1;
-		}
-
-		insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
-		if (!insn) {
-			WARN("bad .discard.ignore_alts entry");
-			return -1;
-		}
-
-		insn->ignore_alts = true;
-	}
-
-	return 0;
-}
-
-/*
  * Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol
  * will be added to the .retpoline_sites section.
  */
@@ -1410,15 +1284,6 @@ static void annotate_call_site(struct objtool_file *file,
 	if (!sym)
 		sym = reloc->sym;
 
-	/*
-	 * Alternative replacement code is just template code which is
-	 * sometimes copied to the original instruction. For now, don't
-	 * annotate it. (In the future we might consider annotating the
-	 * original instruction if/when it ever makes sense to do so.)
-	 */
-	if (!strcmp(insn->sec->name, ".altinstr_replacement"))
-		return;
-
 	if (sym->static_call_tramp) {
 		list_add_tail(&insn->call_node, &file->static_call_list);
 		return;
@@ -1476,7 +1341,8 @@ static void annotate_call_site(struct objtool_file *file,
 		return;
 	}
 
-	if (insn->type == INSN_CALL && !insn->sec->init)
+	if (insn->type == INSN_CALL && !insn->sec->init &&
+	    !insn->_call_dest->embedded_insn)
 		list_add_tail(&insn->call_node, &file->call_list);
 
 	if (!sibling && dead_end_function(file, sym))
@@ -2070,15 +1936,22 @@ out:
 	return ret;
 }
 
+__weak unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table)
+{
+	return reloc->sym->offset + reloc_addend(reloc);
+}
+
 static int add_jump_table(struct objtool_file *file, struct instruction *insn,
 			  struct reloc *next_table)
 {
+	unsigned long table_size = insn_jump_table_size(insn);
 	struct symbol *pfunc = insn_func(insn)->pfunc;
 	struct reloc *table = insn_jump_table(insn);
 	struct instruction *dest_insn;
 	unsigned int prev_offset = 0;
 	struct reloc *reloc = table;
 	struct alternative *alt;
+	unsigned long sym_offset;
 
 	/*
 	 * Each @reloc is a switch table relocation which points to the target
@@ -2087,19 +1960,30 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
 	for_each_reloc_from(table->sec, reloc) {
 
 		/* Check for the end of the table: */
+		if (table_size && reloc_offset(reloc) - reloc_offset(table) >= table_size)
+			break;
 		if (reloc != table && reloc == next_table)
 			break;
 
 		/* Make sure the table entries are consecutive: */
-		if (prev_offset && reloc_offset(reloc) != prev_offset + 8)
+		if (prev_offset && reloc_offset(reloc) != prev_offset + arch_reloc_size(reloc))
 			break;
 
+		sym_offset = arch_jump_table_sym_offset(reloc, table);
+
 		/* Detect function pointers from contiguous objects: */
-		if (reloc->sym->sec == pfunc->sec &&
-		    reloc_addend(reloc) == pfunc->offset)
+		if (reloc->sym->sec == pfunc->sec && sym_offset == pfunc->offset)
 			break;
 
-		dest_insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
+		/*
+		 * Clang sometimes leaves dangling unused jump table entries
+		 * which point to the end of the function.  Ignore them.
+		 */
+		if (reloc->sym->sec == pfunc->sec &&
+		    sym_offset == pfunc->offset + pfunc->len)
+			goto next;
+
+		dest_insn = find_insn(file, reloc->sym->sec, sym_offset);
 		if (!dest_insn)
 			break;
 
@@ -2116,6 +2000,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
 		alt->insn = dest_insn;
 		alt->next = insn->alts;
 		insn->alts = alt;
+next:
 		prev_offset = reloc_offset(reloc);
 	}
 
@@ -2131,12 +2016,13 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
  * find_jump_table() - Given a dynamic jump, find the switch jump table
  * associated with it.
  */
-static struct reloc *find_jump_table(struct objtool_file *file,
-				      struct symbol *func,
-				      struct instruction *insn)
+static void find_jump_table(struct objtool_file *file, struct symbol *func,
+			    struct instruction *insn)
 {
 	struct reloc *table_reloc;
 	struct instruction *dest_insn, *orig_insn = insn;
+	unsigned long table_size;
+	unsigned long sym_offset;
 
 	/*
 	 * Backward search using the @first_jump_src links, these help avoid
@@ -2157,17 +2043,20 @@ static struct reloc *find_jump_table(struct objtool_file *file,
 		     insn->jump_dest->offset > orig_insn->offset))
 		    break;
 
-		table_reloc = arch_find_switch_table(file, insn);
+		table_reloc = arch_find_switch_table(file, insn, &table_size);
 		if (!table_reloc)
 			continue;
-		dest_insn = find_insn(file, table_reloc->sym->sec, reloc_addend(table_reloc));
+
+		sym_offset = table_reloc->sym->offset + reloc_addend(table_reloc);
+
+		dest_insn = find_insn(file, table_reloc->sym->sec, sym_offset);
 		if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func)
 			continue;
 
-		return table_reloc;
+		orig_insn->_jump_table = table_reloc;
+		orig_insn->_jump_table_size = table_size;
+		break;
 	}
-
-	return NULL;
 }
 
 /*
@@ -2178,7 +2067,6 @@ static void mark_func_jump_tables(struct objtool_file *file,
 				    struct symbol *func)
 {
 	struct instruction *insn, *last = NULL;
-	struct reloc *reloc;
 
 	func_for_each_insn(file, func, insn) {
 		if (!last)
@@ -2201,9 +2089,7 @@ static void mark_func_jump_tables(struct objtool_file *file,
 		if (insn->type != INSN_JUMP_DYNAMIC)
 			continue;
 
-		reloc = find_jump_table(file, func, insn);
-		if (reloc)
-			insn->_jump_table = reloc;
+		find_jump_table(file, func, insn);
 	}
 }
 
@@ -2373,185 +2259,147 @@ static int read_unwind_hints(struct objtool_file *file)
 	return 0;
 }
 
-static int read_noendbr_hints(struct objtool_file *file)
+static int read_annotate(struct objtool_file *file,
+			 int (*func)(struct objtool_file *file, int type, struct instruction *insn))
 {
+	struct section *sec;
 	struct instruction *insn;
-	struct section *rsec;
 	struct reloc *reloc;
+	uint64_t offset;
+	int type, ret;
 
-	rsec = find_section_by_name(file->elf, ".rela.discard.noendbr");
-	if (!rsec)
+	sec = find_section_by_name(file->elf, ".discard.annotate_insn");
+	if (!sec)
 		return 0;
 
-	for_each_reloc(rsec, reloc) {
-		insn = find_insn(file, reloc->sym->sec,
-				 reloc->sym->offset + reloc_addend(reloc));
-		if (!insn) {
-			WARN("bad .discard.noendbr entry");
-			return -1;
-		}
+	if (!sec->rsec)
+		return 0;
 
-		insn->noendbr = 1;
+	if (sec->sh.sh_entsize != 8) {
+		static bool warned = false;
+		if (!warned && opts.verbose) {
+			WARN("%s: dodgy linker, sh_entsize != 8", sec->name);
+			warned = true;
+		}
+		sec->sh.sh_entsize = 8;
 	}
 
-	return 0;
-}
+	for_each_reloc(sec->rsec, reloc) {
+		type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4);
 
-static int read_retpoline_hints(struct objtool_file *file)
-{
-	struct section *rsec;
-	struct instruction *insn;
-	struct reloc *reloc;
-
-	rsec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
-	if (!rsec)
-		return 0;
-
-	for_each_reloc(rsec, reloc) {
-		if (reloc->sym->type != STT_SECTION) {
-			WARN("unexpected relocation symbol type in %s", rsec->name);
-			return -1;
-		}
+		offset = reloc->sym->offset + reloc_addend(reloc);
+		insn = find_insn(file, reloc->sym->sec, offset);
 
-		insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
 		if (!insn) {
-			WARN("bad .discard.retpoline_safe entry");
-			return -1;
-		}
-
-		if (insn->type != INSN_JUMP_DYNAMIC &&
-		    insn->type != INSN_CALL_DYNAMIC &&
-		    insn->type != INSN_RETURN &&
-		    insn->type != INSN_NOP) {
-			WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop");
+			WARN("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type);
 			return -1;
 		}
 
-		insn->retpoline_safe = true;
+		ret = func(file, type, insn);
+		if (ret < 0)
+			return ret;
 	}
 
 	return 0;
 }
 
-static int read_instr_hints(struct objtool_file *file)
+static int __annotate_early(struct objtool_file *file, int type, struct instruction *insn)
 {
-	struct section *rsec;
-	struct instruction *insn;
-	struct reloc *reloc;
+	switch (type) {
+	case ANNOTYPE_IGNORE_ALTS:
+		insn->ignore_alts = true;
+		break;
 
-	rsec = find_section_by_name(file->elf, ".rela.discard.instr_end");
-	if (!rsec)
-		return 0;
+	/*
+	 * Must be before read_unwind_hints() since that needs insn->noendbr.
+	 */
+	case ANNOTYPE_NOENDBR:
+		insn->noendbr = 1;
+		break;
 
-	for_each_reloc(rsec, reloc) {
-		if (reloc->sym->type != STT_SECTION) {
-			WARN("unexpected relocation symbol type in %s", rsec->name);
-			return -1;
-		}
+	default:
+		break;
+	}
 
-		insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
-		if (!insn) {
-			WARN("bad .discard.instr_end entry");
-			return -1;
-		}
+	return 0;
+}
 
-		insn->instr--;
-	}
+static int __annotate_ifc(struct objtool_file *file, int type, struct instruction *insn)
+{
+	unsigned long dest_off;
 
-	rsec = find_section_by_name(file->elf, ".rela.discard.instr_begin");
-	if (!rsec)
+	if (type != ANNOTYPE_INTRA_FUNCTION_CALL)
 		return 0;
 
-	for_each_reloc(rsec, reloc) {
-		if (reloc->sym->type != STT_SECTION) {
-			WARN("unexpected relocation symbol type in %s", rsec->name);
-			return -1;
-		}
+	if (insn->type != INSN_CALL) {
+		WARN_INSN(insn, "intra_function_call not a direct call");
+		return -1;
+	}
 
-		insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
-		if (!insn) {
-			WARN("bad .discard.instr_begin entry");
-			return -1;
-		}
+	/*
+	 * Treat intra-function CALLs as JMPs, but with a stack_op.
+	 * See add_call_destinations(), which strips stack_ops from
+	 * normal CALLs.
+	 */
+	insn->type = INSN_JUMP_UNCONDITIONAL;
 
-		insn->instr++;
+	dest_off = arch_jump_destination(insn);
+	insn->jump_dest = find_insn(file, insn->sec, dest_off);
+	if (!insn->jump_dest) {
+		WARN_INSN(insn, "can't find call dest at %s+0x%lx",
+			  insn->sec->name, dest_off);
+		return -1;
 	}
 
 	return 0;
 }
 
-static int read_validate_unret_hints(struct objtool_file *file)
+static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn)
 {
-	struct section *rsec;
-	struct instruction *insn;
-	struct reloc *reloc;
-
-	rsec = find_section_by_name(file->elf, ".rela.discard.validate_unret");
-	if (!rsec)
-		return 0;
-
-	for_each_reloc(rsec, reloc) {
-		if (reloc->sym->type != STT_SECTION) {
-			WARN("unexpected relocation symbol type in %s", rsec->name);
-			return -1;
-		}
+	switch (type) {
+	case ANNOTYPE_NOENDBR:
+		/* early */
+		break;
 
-		insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
-		if (!insn) {
-			WARN("bad .discard.instr_end entry");
+	case ANNOTYPE_RETPOLINE_SAFE:
+		if (insn->type != INSN_JUMP_DYNAMIC &&
+		    insn->type != INSN_CALL_DYNAMIC &&
+		    insn->type != INSN_RETURN &&
+		    insn->type != INSN_NOP) {
+			WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop");
 			return -1;
 		}
-		insn->unret = 1;
-	}
-
-	return 0;
-}
-
 
-static int read_intra_function_calls(struct objtool_file *file)
-{
-	struct instruction *insn;
-	struct section *rsec;
-	struct reloc *reloc;
+		insn->retpoline_safe = true;
+		break;
 
-	rsec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls");
-	if (!rsec)
-		return 0;
+	case ANNOTYPE_INSTR_BEGIN:
+		insn->instr++;
+		break;
 
-	for_each_reloc(rsec, reloc) {
-		unsigned long dest_off;
+	case ANNOTYPE_INSTR_END:
+		insn->instr--;
+		break;
 
-		if (reloc->sym->type != STT_SECTION) {
-			WARN("unexpected relocation symbol type in %s",
-			     rsec->name);
-			return -1;
-		}
+	case ANNOTYPE_UNRET_BEGIN:
+		insn->unret = 1;
+		break;
 
-		insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
-		if (!insn) {
-			WARN("bad .discard.intra_function_call entry");
-			return -1;
-		}
+	case ANNOTYPE_IGNORE_ALTS:
+		/* early */
+		break;
 
-		if (insn->type != INSN_CALL) {
-			WARN_INSN(insn, "intra_function_call not a direct call");
-			return -1;
-		}
+	case ANNOTYPE_INTRA_FUNCTION_CALL:
+		/* ifc */
+		break;
 
-		/*
-		 * Treat intra-function CALLs as JMPs, but with a stack_op.
-		 * See add_call_destinations(), which strips stack_ops from
-		 * normal CALLs.
-		 */
-		insn->type = INSN_JUMP_UNCONDITIONAL;
+	case ANNOTYPE_REACHABLE:
+		insn->dead_end = false;
+		break;
 
-		dest_off = arch_jump_destination(insn);
-		insn->jump_dest = find_insn(file, insn->sec, dest_off);
-		if (!insn->jump_dest) {
-			WARN_INSN(insn, "can't find call dest at %s+0x%lx",
-				  insn->sec->name, dest_off);
-			return -1;
-		}
+	default:
+		WARN_INSN(insn, "Unknown annotation type: %d", type);
+		break;
 	}
 
 	return 0;
@@ -2627,13 +2475,14 @@ static void mark_rodata(struct objtool_file *file)
 	 *
 	 * - .rodata: can contain GCC switch tables
 	 * - .rodata.<func>: same, if -fdata-sections is being used
-	 * - .rodata..c_jump_table: contains C annotated jump tables
+	 * - .data.rel.ro.c_jump_table: contains C annotated jump tables
 	 *
 	 * .rodata.str1.* sections are ignored; they don't contain jump tables.
 	 */
 	for_each_sec(file, sec) {
-		if (!strncmp(sec->name, ".rodata", 7) &&
-		    !strstr(sec->name, ".str1.")) {
+		if ((!strncmp(sec->name, ".rodata", 7) &&
+		     !strstr(sec->name, ".str1.")) ||
+		    !strncmp(sec->name, ".data.rel.ro", 12)) {
 			sec->rodata = true;
 			found = true;
 		}
@@ -2666,14 +2515,7 @@ static int decode_sections(struct objtool_file *file)
 	add_ignores(file);
 	add_uaccess_safe(file);
 
-	ret = add_ignore_alternatives(file);
-	if (ret)
-		return ret;
-
-	/*
-	 * Must be before read_unwind_hints() since that needs insn->noendbr.
-	 */
-	ret = read_noendbr_hints(file);
+	ret = read_annotate(file, __annotate_early);
 	if (ret)
 		return ret;
 
@@ -2695,7 +2537,7 @@ static int decode_sections(struct objtool_file *file)
 	 * Must be before add_call_destination(); it changes INSN_CALL to
 	 * INSN_JUMP.
 	 */
-	ret = read_intra_function_calls(file);
+	ret = read_annotate(file, __annotate_ifc);
 	if (ret)
 		return ret;
 
@@ -2703,14 +2545,6 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
-	/*
-	 * Must be after add_call_destinations() such that it can override
-	 * dead_end_function() marks.
-	 */
-	ret = add_dead_ends(file);
-	if (ret)
-		return ret;
-
 	ret = add_jump_table_alts(file);
 	if (ret)
 		return ret;
@@ -2719,15 +2553,11 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
-	ret = read_retpoline_hints(file);
-	if (ret)
-		return ret;
-
-	ret = read_instr_hints(file);
-	if (ret)
-		return ret;
-
-	ret = read_validate_unret_hints(file);
+	/*
+	 * Must be after add_call_destinations() such that it can override
+	 * dead_end_function() marks.
+	 */
+	ret = read_annotate(file, __annotate_late);
 	if (ret)
 		return ret;
 
@@ -4622,35 +4452,6 @@ static int validate_sls(struct objtool_file *file)
 	return warnings;
 }
 
-static bool ignore_noreturn_call(struct instruction *insn)
-{
-	struct symbol *call_dest = insn_call_dest(insn);
-
-	/*
-	 * FIXME: hack, we need a real noreturn solution
-	 *
-	 * Problem is, exc_double_fault() may or may not return, depending on
-	 * whether CONFIG_X86_ESPFIX64 is set.  But objtool has no visibility
-	 * to the kernel config.
-	 *
-	 * Other potential ways to fix it:
-	 *
-	 *   - have compiler communicate __noreturn functions somehow
-	 *   - remove CONFIG_X86_ESPFIX64
-	 *   - read the .config file
-	 *   - add a cmdline option
-	 *   - create a generic objtool annotation format (vs a bunch of custom
-	 *     formats) and annotate it
-	 */
-	if (!strcmp(call_dest->name, "exc_double_fault")) {
-		/* prevent further unreachable warnings for the caller */
-		insn->sym->warned = 1;
-		return true;
-	}
-
-	return false;
-}
-
 static int validate_reachable_instructions(struct objtool_file *file)
 {
 	struct instruction *insn, *prev_insn;
@@ -4667,8 +4468,8 @@ static int validate_reachable_instructions(struct objtool_file *file)
 		prev_insn = prev_insn_same_sec(file, insn);
 		if (prev_insn && prev_insn->dead_end) {
 			call_dest = insn_call_dest(prev_insn);
-			if (call_dest && !ignore_noreturn_call(prev_insn)) {
-				WARN_INSN(insn, "%s() is missing a __noreturn annotation",
+			if (call_dest) {
+				WARN_INSN(insn, "%s() missing __noreturn in .c/.h or NORETURN() in noreturns.h",
 					  call_dest->name);
 				warnings++;
 				continue;
@@ -4738,7 +4539,7 @@ static int disas_warned_funcs(struct objtool_file *file)
 	char *funcs = NULL, *tmp;
 
 	for_each_sym(file, sym) {
-		if (sym->warned) {
+		if (sym->warnings) {
 			if (!funcs) {
 				funcs = malloc(strlen(sym->name) + 1);
 				strcpy(funcs, sym->name);
@@ -4796,8 +4597,10 @@ int check(struct objtool_file *file)
 	init_cfi_state(&force_undefined_cfi);
 	force_undefined_cfi.force_undefined = true;
 
-	if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3)))
+	if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3))) {
+		ret = -1;
 		goto out;
+	}
 
 	cfi_hash_add(&init_cfi);
 	cfi_hash_add(&func_cfi);
@@ -4814,7 +4617,7 @@ int check(struct objtool_file *file)
 	if (opts.retpoline) {
 		ret = validate_retpoline(file);
 		if (ret < 0)
-			return ret;
+			goto out;
 		warnings += ret;
 	}
 
@@ -4850,7 +4653,7 @@ int check(struct objtool_file *file)
 		 */
 		ret = validate_unrets(file);
 		if (ret < 0)
-			return ret;
+			goto out;
 		warnings += ret;
 	}
 
@@ -4913,7 +4716,7 @@ int check(struct objtool_file *file)
 	if (opts.prefix) {
 		ret = add_prefix_symbols(file);
 		if (ret < 0)
-			return ret;
+			goto out;
 		warnings += ret;
 	}
 
@@ -4945,9 +4748,18 @@ int check(struct objtool_file *file)
 
 out:
 	/*
-	 *  For now, don't fail the kernel build on fatal warnings.  These
-	 *  errors are still fairly common due to the growing matrix of
-	 *  supported toolchains and their recent pace of change.
+	 * CONFIG_OBJTOOL_WERROR upgrades all warnings (and errors) to actual
+	 * errors.
+	 *
+	 * Note that even "fatal" type errors don't actually return an error
+	 * without CONFIG_OBJTOOL_WERROR.  That probably needs improved at some
+	 * point.
 	 */
+	if (opts.werror && (ret || warnings)) {
+		if (warnings)
+			WARN("%d warning(s) upgraded to errors", warnings);
+		return 1;
+	}
+
 	return 0;
 }
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 6f64d611faea..be4f4b62730c 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -1302,9 +1302,6 @@ int elf_write(struct elf *elf)
 	struct section *sec;
 	Elf_Scn *s;
 
-	if (opts.dryrun)
-		return 0;
-
 	/* Update changed relocation sections and section headers: */
 	list_for_each_entry(sec, &elf->sections, list) {
 		if (sec->truncate)
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index d63b46a19f39..089a1acc48a8 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -97,4 +97,7 @@ int arch_rewrite_retpolines(struct objtool_file *file);
 
 bool arch_pc_relative_reloc(struct reloc *reloc);
 
+unsigned int arch_reloc_size(struct reloc *reloc);
+unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table);
+
 #endif /* _ARCH_H */
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index fcca6662c8b4..0fafd0f7a209 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -29,15 +29,16 @@ struct opts {
 
 	/* options: */
 	bool backtrace;
-	bool backup;
 	bool dryrun;
 	bool link;
 	bool mnop;
 	bool module;
 	bool no_unreachable;
+	const char *output;
 	bool sec_address;
 	bool stats;
 	bool verbose;
+	bool werror;
 };
 
 extern struct opts opts;
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index daa46f1f0965..e1cd13cd28a3 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -71,7 +71,10 @@ struct instruction {
 	struct instruction *first_jump_src;
 	union {
 		struct symbol *_call_dest;
-		struct reloc *_jump_table;
+		struct {
+			struct reloc *_jump_table;
+			unsigned long _jump_table_size;
+		};
 	};
 	struct alternative *alts;
 	struct symbol *sym;
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index d7e815c2fd15..223ac1c24b90 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -65,10 +65,10 @@ struct symbol {
 	u8 return_thunk      : 1;
 	u8 fentry            : 1;
 	u8 profiling_func    : 1;
-	u8 warned	     : 1;
 	u8 embedded_insn     : 1;
 	u8 local_label       : 1;
 	u8 frame_pointer     : 1;
+	u8 warnings	     : 2;
 	struct list_head pv_target;
 	struct reloc *relocs;
 };
diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h
index 86d4af9c5aa9..e049679bb17b 100644
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -10,7 +10,7 @@
 #include <objtool/check.h>
 #include <objtool/elf.h>
 
-#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
+#define C_JUMP_TABLE_SECTION ".data.rel.ro.c_jump_table"
 
 struct special_alt {
 	struct list_head list;
@@ -38,5 +38,6 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
 				 struct instruction *insn,
 				 struct reloc *reloc);
 struct reloc *arch_find_switch_table(struct objtool_file *file,
-				    struct instruction *insn);
+				     struct instruction *insn,
+				     unsigned long *table_size);
 #endif /* _SPECIAL_H */
diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h
index ac04d3fe4dd9..e72b9d630551 100644
--- a/tools/objtool/include/objtool/warn.h
+++ b/tools/objtool/include/objtool/warn.h
@@ -43,8 +43,10 @@ static inline char *offstr(struct section *sec, unsigned long offset)
 
 #define WARN(format, ...)				\
 	fprintf(stderr,					\
-		"%s: warning: objtool: " format "\n",	\
-		objname, ##__VA_ARGS__)
+		"%s: %s: objtool: " format "\n",	\
+		objname,				\
+		opts.werror ? "error" : "warning",	\
+		##__VA_ARGS__)
 
 #define WARN_FUNC(format, sec, offset, ...)		\
 ({							\
@@ -53,14 +55,22 @@ static inline char *offstr(struct section *sec, unsigned long offset)
 	free(_str);					\
 })
 
+#define WARN_LIMIT 2
+
 #define WARN_INSN(insn, format, ...)					\
 ({									\
 	struct instruction *_insn = (insn);				\
-	if (!_insn->sym || !_insn->sym->warned)				\
+	BUILD_BUG_ON(WARN_LIMIT > 2);					\
+	if (!_insn->sym || _insn->sym->warnings < WARN_LIMIT) {		\
 		WARN_FUNC(format, _insn->sec, _insn->offset,		\
 			  ##__VA_ARGS__);				\
-	if (_insn->sym)							\
-		_insn->sym->warned = 1;					\
+		if (_insn->sym)						\
+			_insn->sym->warnings++;				\
+	} else if (_insn->sym && _insn->sym->warnings == WARN_LIMIT) {	\
+		WARN_FUNC("skipping duplicate warning(s)",		\
+			  _insn->sec, _insn->offset);			\
+		_insn->sym->warnings++;					\
+	}								\
 })
 
 #define BT_INSN(insn, format, ...)				\
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index b2174894f9f7..eacfe3b0a8d1 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -16,10 +16,11 @@ NORETURN(__tdx_hypercall_failed)
 NORETURN(__ubsan_handle_builtin_unreachable)
 NORETURN(__x64_sys_exit)
 NORETURN(__x64_sys_exit_group)
+NORETURN(acpi_processor_ffh_play_dead)
 NORETURN(arch_cpu_idle_dead)
 NORETURN(bch2_trans_in_restart_error)
 NORETURN(bch2_trans_restart_error)
-NORETURN(bch2_trans_unlocked_error)
+NORETURN(bch2_trans_unlocked_or_in_restart_error)
 NORETURN(cpu_bringup_and_idle)
 NORETURN(cpu_startup_entry)
 NORETURN(do_exit)
@@ -34,6 +35,7 @@ NORETURN(kunit_try_catch_throw)
 NORETURN(machine_real_restart)
 NORETURN(make_task_dead)
 NORETURN(mpt_halt_firmware)
+NORETURN(mwait_play_dead)
 NORETURN(nmi_panic_self_stop)
 NORETURN(panic)
 NORETURN(panic_smp_self_stop)
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index f40febdd6e36..1c73fb62fd57 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -18,87 +18,19 @@
 
 bool help;
 
-const char *objname;
 static struct objtool_file file;
 
-static bool objtool_create_backup(const char *_objname)
+struct objtool_file *objtool_open_read(const char *filename)
 {
-	int len = strlen(_objname);
-	char *buf, *base, *name = malloc(len+6);
-	int s, d, l, t;
-
-	if (!name) {
-		perror("failed backup name malloc");
-		return false;
-	}
-
-	strcpy(name, _objname);
-	strcpy(name + len, ".orig");
-
-	d = open(name, O_CREAT|O_WRONLY|O_TRUNC, 0644);
-	if (d < 0) {
-		perror("failed to create backup file");
-		return false;
-	}
-
-	s = open(_objname, O_RDONLY);
-	if (s < 0) {
-		perror("failed to open orig file");
-		return false;
-	}
-
-	buf = malloc(4096);
-	if (!buf) {
-		perror("failed backup data malloc");
-		return false;
-	}
-
-	while ((l = read(s, buf, 4096)) > 0) {
-		base = buf;
-		do {
-			t = write(d, base, l);
-			if (t < 0) {
-				perror("failed backup write");
-				return false;
-			}
-			base += t;
-			l -= t;
-		} while (l);
-	}
-
-	if (l < 0) {
-		perror("failed backup read");
-		return false;
-	}
-
-	free(name);
-	free(buf);
-	close(d);
-	close(s);
-
-	return true;
-}
-
-struct objtool_file *objtool_open_read(const char *_objname)
-{
-	if (objname) {
-		if (strcmp(objname, _objname)) {
-			WARN("won't handle more than one file at a time");
-			return NULL;
-		}
-		return &file;
+	if (file.elf) {
+		WARN("won't handle more than one file at a time");
+		return NULL;
 	}
-	objname = _objname;
 
-	file.elf = elf_open_read(objname, O_RDWR);
+	file.elf = elf_open_read(filename, O_RDWR);
 	if (!file.elf)
 		return NULL;
 
-	if (opts.backup && !objtool_create_backup(objname)) {
-		WARN("can't create backup file");
-		return NULL;
-	}
-
 	hash_init(file.insn_hash);
 	INIT_LIST_HEAD(&file.retpoline_call_list);
 	INIT_LIST_HEAD(&file.return_thunk_list);
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index a62247efb64f..05ef0e297837 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -10,7 +10,7 @@
 #include <objtool/warn.h>
 #include <objtool/endianness.h>
 
-int orc_dump(const char *_objname)
+int orc_dump(const char *filename)
 {
 	int fd, nr_entries, i, *orc_ip = NULL, orc_size = 0;
 	struct orc_entry *orc = NULL;
@@ -26,12 +26,9 @@ int orc_dump(const char *_objname)
 	Elf_Data *data, *symtab = NULL, *rela_orc_ip = NULL;
 	struct elf dummy_elf = {};
 
-
-	objname = _objname;
-
 	elf_version(EV_CURRENT);
 
-	fd = open(objname, O_RDONLY);
+	fd = open(filename, O_RDONLY);
 	if (fd == -1) {
 		perror("open");
 		return -1;
diff --git a/tools/pci/Build b/tools/pci/Build
deleted file mode 100644
index c375aea21790..000000000000
--- a/tools/pci/Build
+++ /dev/null
@@ -1 +0,0 @@
-pcitest-y += pcitest.o
diff --git a/tools/pci/Makefile b/tools/pci/Makefile
deleted file mode 100644
index 62d41f1a1e2c..000000000000
--- a/tools/pci/Makefile
+++ /dev/null
@@ -1,58 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-include ../scripts/Makefile.include
-
-bindir ?= /usr/bin
-
-ifeq ($(srctree),)
-srctree := $(patsubst %/,%,$(dir $(CURDIR)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-endif
-
-# Do not use make's built-in rules
-# (this improves performance and avoids hard-to-debug behaviour);
-MAKEFLAGS += -r
-
-CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
-
-ALL_TARGETS := pcitest
-ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
-
-SCRIPTS := pcitest.sh
-
-all: $(ALL_PROGRAMS)
-
-export srctree OUTPUT CC LD CFLAGS
-include $(srctree)/tools/build/Makefile.include
-
-#
-# We need the following to be outside of kernel tree
-#
-$(OUTPUT)include/linux/: ../../include/uapi/linux/
-	mkdir -p $(OUTPUT)include/linux/ 2>&1 || true
-	ln -sf $(CURDIR)/../../include/uapi/linux/pcitest.h $@
-
-prepare: $(OUTPUT)include/linux/
-
-PCITEST_IN := $(OUTPUT)pcitest-in.o
-$(PCITEST_IN): prepare FORCE
-	$(Q)$(MAKE) $(build)=pcitest
-$(OUTPUT)pcitest: $(PCITEST_IN)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
-
-clean:
-	rm -f $(ALL_PROGRAMS)
-	rm -rf $(OUTPUT)include/
-	find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
-
-install: $(ALL_PROGRAMS)
-	install -d -m 755 $(DESTDIR)$(bindir);		\
-	for program in $(ALL_PROGRAMS); do		\
-		install $$program $(DESTDIR)$(bindir);	\
-	done;						\
-	for script in $(SCRIPTS); do			\
-		install $$script $(DESTDIR)$(bindir);	\
-	done
-
-FORCE:
-
-.PHONY: all install clean FORCE prepare
diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c
deleted file mode 100644
index 7b530d838d40..000000000000
--- a/tools/pci/pcitest.c
+++ /dev/null
@@ -1,250 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/**
- * Userspace PCI Endpoint Test Module
- *
- * Copyright (C) 2017 Texas Instruments
- * Author: Kishon Vijay Abraham I <kishon@ti.com>
- */
-
-#include <errno.h>
-#include <fcntl.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-
-#include <linux/pcitest.h>
-
-static char *result[] = { "NOT OKAY", "OKAY" };
-static char *irq[] = { "LEGACY", "MSI", "MSI-X" };
-
-struct pci_test {
-	char		*device;
-	char		barnum;
-	bool		legacyirq;
-	unsigned int	msinum;
-	unsigned int	msixnum;
-	int		irqtype;
-	bool		set_irqtype;
-	bool		get_irqtype;
-	bool		clear_irq;
-	bool		read;
-	bool		write;
-	bool		copy;
-	unsigned long	size;
-	bool		use_dma;
-};
-
-static int run_test(struct pci_test *test)
-{
-	struct pci_endpoint_test_xfer_param param = {};
-	int ret = -EINVAL;
-	int fd;
-
-	fd = open(test->device, O_RDWR);
-	if (fd < 0) {
-		perror("can't open PCI Endpoint Test device");
-		return -ENODEV;
-	}
-
-	if (test->barnum >= 0 && test->barnum <= 5) {
-		ret = ioctl(fd, PCITEST_BAR, test->barnum);
-		fprintf(stdout, "BAR%d:\t\t", test->barnum);
-		if (ret < 0)
-			fprintf(stdout, "TEST FAILED\n");
-		else
-			fprintf(stdout, "%s\n", result[ret]);
-	}
-
-	if (test->set_irqtype) {
-		ret = ioctl(fd, PCITEST_SET_IRQTYPE, test->irqtype);
-		fprintf(stdout, "SET IRQ TYPE TO %s:\t\t", irq[test->irqtype]);
-		if (ret < 0)
-			fprintf(stdout, "FAILED\n");
-		else
-			fprintf(stdout, "%s\n", result[ret]);
-	}
-
-	if (test->get_irqtype) {
-		ret = ioctl(fd, PCITEST_GET_IRQTYPE);
-		fprintf(stdout, "GET IRQ TYPE:\t\t");
-		if (ret < 0)
-			fprintf(stdout, "FAILED\n");
-		else
-			fprintf(stdout, "%s\n", irq[ret]);
-	}
-
-	if (test->clear_irq) {
-		ret = ioctl(fd, PCITEST_CLEAR_IRQ);
-		fprintf(stdout, "CLEAR IRQ:\t\t");
-		if (ret < 0)
-			fprintf(stdout, "FAILED\n");
-		else
-			fprintf(stdout, "%s\n", result[ret]);
-	}
-
-	if (test->legacyirq) {
-		ret = ioctl(fd, PCITEST_LEGACY_IRQ, 0);
-		fprintf(stdout, "LEGACY IRQ:\t");
-		if (ret < 0)
-			fprintf(stdout, "TEST FAILED\n");
-		else
-			fprintf(stdout, "%s\n", result[ret]);
-	}
-
-	if (test->msinum > 0 && test->msinum <= 32) {
-		ret = ioctl(fd, PCITEST_MSI, test->msinum);
-		fprintf(stdout, "MSI%u:\t\t", test->msinum);
-		if (ret < 0)
-			fprintf(stdout, "TEST FAILED\n");
-		else
-			fprintf(stdout, "%s\n", result[ret]);
-	}
-
-	if (test->msixnum > 0 && test->msixnum <= 2048) {
-		ret = ioctl(fd, PCITEST_MSIX, test->msixnum);
-		fprintf(stdout, "MSI-X%u:\t\t", test->msixnum);
-		if (ret < 0)
-			fprintf(stdout, "TEST FAILED\n");
-		else
-			fprintf(stdout, "%s\n", result[ret]);
-	}
-
-	if (test->write) {
-		param.size = test->size;
-		if (test->use_dma)
-			param.flags = PCITEST_FLAGS_USE_DMA;
-		ret = ioctl(fd, PCITEST_WRITE, &param);
-		fprintf(stdout, "WRITE (%7lu bytes):\t\t", test->size);
-		if (ret < 0)
-			fprintf(stdout, "TEST FAILED\n");
-		else
-			fprintf(stdout, "%s\n", result[ret]);
-	}
-
-	if (test->read) {
-		param.size = test->size;
-		if (test->use_dma)
-			param.flags = PCITEST_FLAGS_USE_DMA;
-		ret = ioctl(fd, PCITEST_READ, &param);
-		fprintf(stdout, "READ (%7lu bytes):\t\t", test->size);
-		if (ret < 0)
-			fprintf(stdout, "TEST FAILED\n");
-		else
-			fprintf(stdout, "%s\n", result[ret]);
-	}
-
-	if (test->copy) {
-		param.size = test->size;
-		if (test->use_dma)
-			param.flags = PCITEST_FLAGS_USE_DMA;
-		ret = ioctl(fd, PCITEST_COPY, &param);
-		fprintf(stdout, "COPY (%7lu bytes):\t\t", test->size);
-		if (ret < 0)
-			fprintf(stdout, "TEST FAILED\n");
-		else
-			fprintf(stdout, "%s\n", result[ret]);
-	}
-
-	fflush(stdout);
-	close(fd);
-	return (ret < 0) ? ret : 1 - ret; /* return 0 if test succeeded */
-}
-
-int main(int argc, char **argv)
-{
-	int c;
-	struct pci_test *test;
-
-	test = calloc(1, sizeof(*test));
-	if (!test) {
-		perror("Fail to allocate memory for pci_test\n");
-		return -ENOMEM;
-	}
-
-	/* since '0' is a valid BAR number, initialize it to -1 */
-	test->barnum = -1;
-
-	/* set default size as 100KB */
-	test->size = 0x19000;
-
-	/* set default endpoint device */
-	test->device = "/dev/pci-endpoint-test.0";
-
-	while ((c = getopt(argc, argv, "D:b:m:x:i:deIlhrwcs:")) != EOF)
-	switch (c) {
-	case 'D':
-		test->device = optarg;
-		continue;
-	case 'b':
-		test->barnum = atoi(optarg);
-		if (test->barnum < 0 || test->barnum > 5)
-			goto usage;
-		continue;
-	case 'l':
-		test->legacyirq = true;
-		continue;
-	case 'm':
-		test->msinum = atoi(optarg);
-		if (test->msinum < 1 || test->msinum > 32)
-			goto usage;
-		continue;
-	case 'x':
-		test->msixnum = atoi(optarg);
-		if (test->msixnum < 1 || test->msixnum > 2048)
-			goto usage;
-		continue;
-	case 'i':
-		test->irqtype = atoi(optarg);
-		if (test->irqtype < 0 || test->irqtype > 2)
-			goto usage;
-		test->set_irqtype = true;
-		continue;
-	case 'I':
-		test->get_irqtype = true;
-		continue;
-	case 'r':
-		test->read = true;
-		continue;
-	case 'w':
-		test->write = true;
-		continue;
-	case 'c':
-		test->copy = true;
-		continue;
-	case 'e':
-		test->clear_irq = true;
-		continue;
-	case 's':
-		test->size = strtoul(optarg, NULL, 0);
-		continue;
-	case 'd':
-		test->use_dma = true;
-		continue;
-	case 'h':
-	default:
-usage:
-		fprintf(stderr,
-			"usage: %s [options]\n"
-			"Options:\n"
-			"\t-D <dev>		PCI endpoint test device {default: /dev/pci-endpoint-test.0}\n"
-			"\t-b <bar num>		BAR test (bar number between 0..5)\n"
-			"\t-m <msi num>		MSI test (msi number between 1..32)\n"
-			"\t-x <msix num>	\tMSI-X test (msix number between 1..2048)\n"
-			"\t-i <irq type>	\tSet IRQ type (0 - Legacy, 1 - MSI, 2 - MSI-X)\n"
-			"\t-e			Clear IRQ\n"
-			"\t-I			Get current IRQ type configured\n"
-			"\t-d			Use DMA\n"
-			"\t-l			Legacy IRQ test\n"
-			"\t-r			Read buffer test\n"
-			"\t-w			Write buffer test\n"
-			"\t-c			Copy buffer test\n"
-			"\t-s <size>		Size of buffer {default: 100KB}\n"
-			"\t-h			Print this help message\n",
-			argv[0]);
-		return -EINVAL;
-	}
-
-	return run_test(test);
-}
diff --git a/tools/pci/pcitest.sh b/tools/pci/pcitest.sh
deleted file mode 100644
index 75ed48ff2990..000000000000
--- a/tools/pci/pcitest.sh
+++ /dev/null
@@ -1,72 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-echo "BAR tests"
-echo
-
-bar=0
-
-while [ $bar -lt 6 ]
-do
-	pcitest -b $bar
-	bar=`expr $bar + 1`
-done
-echo
-
-echo "Interrupt tests"
-echo
-
-pcitest -i 0
-pcitest -l
-
-pcitest -i 1
-msi=1
-
-while [ $msi -lt 33 ]
-do
-        pcitest -m $msi
-        msi=`expr $msi + 1`
-done
-echo
-
-pcitest -i 2
-msix=1
-
-while [ $msix -lt 2049 ]
-do
-        pcitest -x $msix
-        msix=`expr $msix + 1`
-done
-echo
-
-echo "Read Tests"
-echo
-
-pcitest -i 1
-
-pcitest -r -s 1
-pcitest -r -s 1024
-pcitest -r -s 1025
-pcitest -r -s 1024000
-pcitest -r -s 1024001
-echo
-
-echo "Write Tests"
-echo
-
-pcitest -w -s 1
-pcitest -w -s 1024
-pcitest -w -s 1025
-pcitest -w -s 1024000
-pcitest -w -s 1024001
-echo
-
-echo "Copy Tests"
-echo
-
-pcitest -c -s 1
-pcitest -c -s 1024
-pcitest -c -s 1025
-pcitest -c -s 1024000
-pcitest -c -s 1024001
-echo
diff --git a/tools/perf/Documentation/perf-check.txt b/tools/perf/Documentation/perf-check.txt
index 31741499e786..a764a4629220 100644
--- a/tools/perf/Documentation/perf-check.txt
+++ b/tools/perf/Documentation/perf-check.txt
@@ -51,7 +51,6 @@ feature::
                 dwarf_getlocations      /  HAVE_LIBDW_SUPPORT
                 dwarf-unwind            /  HAVE_DWARF_UNWIND_SUPPORT
                 auxtrace                /  HAVE_AUXTRACE_SUPPORT
-                libaudit                /  HAVE_LIBAUDIT_SUPPORT
                 libbfd                  /  HAVE_LIBBFD_SUPPORT
                 libcapstone             /  HAVE_LIBCAPSTONE_SUPPORT
                 libcrypto               /  HAVE_LIBCRYPTO_SUPPORT
@@ -67,7 +66,6 @@ feature::
                 libunwind               /  HAVE_LIBUNWIND_SUPPORT
                 lzma                    /  HAVE_LZMA_SUPPORT
                 numa_num_possible_cpus  /  HAVE_LIBNUMA_SUPPORT
-                syscall_table           /  HAVE_SYSCALL_TABLE_SUPPORT
                 zlib                    /  HAVE_ZLIB_SUPPORT
                 zstd                    /  HAVE_ZSTD_SUPPORT
 
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 1f668d4724e3..36ebebc875ea 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -40,7 +40,7 @@ The '$HOME/.perfconfig' file is used to store a per-user configuration.
 The file '$(sysconfdir)/perfconfig' can be used to
 store a system-wide default configuration.
 
-One an disable reading config files by setting the PERF_CONFIG environment
+One can disable reading config files by setting the PERF_CONFIG environment
 variable to /dev/null, or provide an alternate config file by setting that
 variable.
 
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index eaec8253be68..b77f58c4d2fd 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -148,6 +148,17 @@ OPTIONS for 'perf ftrace latency'
 --use-nsec::
 	Use nano-second instead of micro-second as a base unit of the histogram.
 
+--bucket-range=::
+	Bucket range in ms or ns (according to -n/--use-nsec), default is log2() mode.
+
+--min-latency=::
+	Minimum latency for the start of the first bucket, in ms or ns (according to
+	-n/--use-nsec).
+
+--max-latency=::
+	Maximum latency for the start of the last bucket, in ms or ns (according to
+	-n/--use-nsec). The setting is ignored if the value results in more than
+	22 buckets.
 
 OPTIONS for 'perf ftrace profile'
 ---------------------------------
@@ -190,6 +201,14 @@ OPTIONS for 'perf ftrace profile'
 	Sort the result by the given field.  Available values are:
 	total, avg, max, count, name.  Default is 'total'.
 
+--graph-opts::
+	List of options allowed to set:
+
+	  - nosleep-time - Measure on-CPU time only for function_graph tracer.
+	  - noirqs       - Ignore functions that happen inside interrupt.
+	  - thresh=<n>   - Setup trace duration threshold in microseconds.
+	  - depth=<n>    - Set max depth for function graph tracer to follow.
+
 
 SEE ALSO
 --------
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index 59ab1ff9d75f..cc0f37f0fa5a 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -151,7 +151,7 @@ displayed as follows:
 There are two ways that instructions-per-cycle (IPC) can be calculated depending
 on the recording.
 
-If the 'cyc' config term (see config terms section below) was used, then IPC
+If the 'cyc' config term (see <<_config_terms,config terms>> section below) was used, then IPC
 and cycle events are calculated using the cycle count from CYC packets, otherwise
 MTC packets are used - refer to the 'mtc' config term.  When MTC is used, however,
 the values are less accurate because the timing is less accurate.
@@ -239,7 +239,7 @@ which is the same as
 
 	-e intel_pt/tsc=1,noretcomp=0/
 
-Note there are now new config terms - see section 'config terms' further below.
+Note there are other config terms - see section <<_config_terms,config terms>> further below.
 
 The config terms are listed in /sys/devices/intel_pt/format.  They are bit
 fields within the config member of the struct perf_event_attr which is
@@ -311,218 +311,271 @@ perf_event_attr is displayed if the -vv option is used e.g.
 config terms
 ~~~~~~~~~~~~
 
-The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
-Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
-Some of the features are reflect in new config terms.  All the config terms are
-described below.
-
-tsc		Always supported.  Produces TSC timestamp packets to provide
-		timing information.  In some cases it is possible to decode
-		without timing information, for example a per-thread context
-		that does not overlap executable memory maps.
-
-		The default config selects tsc (i.e. tsc=1).
-
-noretcomp	Always supported.  Disables "return compression" so a TIP packet
-		is produced when a function returns.  Causes more packets to be
-		produced but might make decoding more reliable.
-
-		The default config does not select noretcomp (i.e. noretcomp=0).
-
-psb_period	Allows the frequency of PSB packets to be specified.
-
-		The PSB packet is a synchronization packet that provides a
-		starting point for decoding or recovery from errors.
-
-		Support for psb_period is indicated by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
-
-		which contains "1" if the feature is supported and "0"
-		otherwise.
-
-		Valid values are given by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/psb_periods
-
-		which contains a hexadecimal value, the bits of which represent
-		valid values e.g. bit 2 set means value 2 is valid.
-
-		The psb_period value is converted to the approximate number of
-		trace bytes between PSB packets as:
-
-			2 ^ (value + 11)
-
-		e.g. value 3 means 16KiB bytes between PSBs
-
-		If an invalid value is entered, the error message
-		will give a list of valid values e.g.
-
-			$ perf record -e intel_pt/psb_period=15/u uname
-			Invalid psb_period for intel_pt. Valid values are: 0-5
-
-		If MTC packets are selected, the default config selects a value
-		of 3 (i.e. psb_period=3) or the nearest lower value that is
-		supported (0 is always supported).  Otherwise the default is 0.
-
-		If decoding is expected to be reliable and the buffer is large
-		then a large PSB period can be used.
-
-		Because a TSC packet is produced with PSB, the PSB period can
-		also affect the granularity to timing information in the absence
-		of MTC or CYC.
-
-mtc		Produces MTC timing packets.
-
-		MTC packets provide finer grain timestamp information than TSC
-		packets.  MTC packets record time using the hardware crystal
-		clock (CTC) which is related to TSC packets using a TMA packet.
-
-		Support for this feature is indicated by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/mtc
-
-		which contains "1" if the feature is supported and
-		"0" otherwise.
-
-		The frequency of MTC packets can also be specified - see
-		mtc_period below.
-
-mtc_period	Specifies how frequently MTC packets are produced - see mtc
-		above for how to determine if MTC packets are supported.
-
-		Valid values are given by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/mtc_periods
-
-		which contains a hexadecimal value, the bits of which represent
-		valid values e.g. bit 2 set means value 2 is valid.
-
-		The mtc_period value is converted to the MTC frequency as:
-
-			CTC-frequency / (2 ^ value)
-
-		e.g. value 3 means one eighth of CTC-frequency
-
-		Where CTC is the hardware crystal clock, the frequency of which
-		can be related to TSC via values provided in cpuid leaf 0x15.
-
-		If an invalid value is entered, the error message
-		will give a list of valid values e.g.
-
-			$ perf record -e intel_pt/mtc_period=15/u uname
-			Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
-
-		The default value is 3 or the nearest lower value
-		that is supported (0 is always supported).
-
-cyc		Produces CYC timing packets.
-
-		CYC packets provide even finer grain timestamp information than
-		MTC and TSC packets.  A CYC packet contains the number of CPU
-		cycles since the last CYC packet. Unlike MTC and TSC packets,
-		CYC packets are only sent when another packet is also sent.
-
-		Support for this feature is indicated by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
-
-		which contains "1" if the feature is supported and
-		"0" otherwise.
-
-		The number of CYC packets produced can be reduced by specifying
-		a threshold - see cyc_thresh below.
-
-cyc_thresh	Specifies how frequently CYC packets are produced - see cyc
-		above for how to determine if CYC packets are supported.
-
-		Valid cyc_thresh values are given by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
-
-		which contains a hexadecimal value, the bits of which represent
-		valid values e.g. bit 2 set means value 2 is valid.
-
-		The cyc_thresh value represents the minimum number of CPU cycles
-		that must have passed before a CYC packet can be sent.  The
-		number of CPU cycles is:
-
-			2 ^ (value - 1)
-
-		e.g. value 4 means 8 CPU cycles must pass before a CYC packet
-		can be sent.  Note a CYC packet is still only sent when another
-		packet is sent, not at, e.g. every 8 CPU cycles.
-
-		If an invalid value is entered, the error message
-		will give a list of valid values e.g.
-
-			$ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
-			Invalid cyc_thresh for intel_pt. Valid values are: 0-12
-
-		CYC packets are not requested by default.
-
-pt		Specifies pass-through which enables the 'branch' config term.
-
-		The default config selects 'pt' if it is available, so a user will
-		never need to specify this term.
-
-branch		Enable branch tracing.  Branch tracing is enabled by default so to
-		disable branch tracing use 'branch=0'.
-
-		The default config selects 'branch' if it is available.
-
-ptw		Enable PTWRITE packets which are produced when a ptwrite instruction
-		is executed.
-
-		Support for this feature is indicated by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/ptwrite
-
-		which contains "1" if the feature is supported and
-		"0" otherwise.
-
-		As an alternative, refer to "Emulated PTWRITE" further below.
-
-fup_on_ptw	Enable a FUP packet to follow the PTWRITE packet.  The FUP packet
-		provides the address of the ptwrite instruction.  In the absence of
-		fup_on_ptw, the decoder will use the address of the previous branch
-		if branch tracing is enabled, otherwise the address will be zero.
-		Note that fup_on_ptw will work even when branch tracing is disabled.
-
-pwr_evt		Enable power events.  The power events provide information about
-		changes to the CPU C-state.
-
-		Support for this feature is indicated by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/power_event_trace
-
-		which contains "1" if the feature is supported and
-		"0" otherwise.
-
-event		Enable Event Trace.  The events provide information about asynchronous
-		events.
-
-		Support for this feature is indicated by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/event_trace
-
-		which contains "1" if the feature is supported and
-		"0" otherwise.
-
-notnt		Disable TNT packets.  Without TNT packets, it is not possible to walk
-		executable code to reconstruct control flow, however FUP, TIP, TIP.PGE
-		and TIP.PGD packets still indicate asynchronous control flow, and (if
-		return compression is disabled - see noretcomp) return statements.
-		The advantage of eliminating TNT packets is reducing the size of the
-		trace and corresponding tracing overhead.
-
-		Support for this feature is indicated by:
-
-			/sys/bus/event_source/devices/intel_pt/caps/tnt_disable
-
-		which contains "1" if the feature is supported and
-		"0" otherwise.
-
+Config terms are parameters specified with the -e intel_pt// event option,
+for example:
+
+	-e intel_pt/cyc/
+
+which selects cycle accurate mode. Each config term can have a value which
+defaults to 1, so the above is the same as:
+
+	-e intel_pt/cyc=1/
+
+Some terms are set by default, so must be set to 0 to turn them off. For
+example, to turn off branch tracing:
+
+	-e intel_pt/branch=0/
+
+Multiple config terms are separated by commas, for example:
+
+	-e intel_pt/cyc,mtc_period=9/
+
+There are also common config terms, see linkperf:perf-record[1] documentation.
+
+Intel PT config terms are described below.
+
+*tsc*::
+Always supported.  Produces TSC timestamp packets to provide
+timing information.  In some cases it is possible to decode
+without timing information, for example a per-thread context
+that does not overlap executable memory maps.
++
+The default config selects tsc (i.e. tsc=1).
+
+*noretcomp*::
+Always supported.  Disables "return compression" so a TIP packet
+is produced when a function returns.  Causes more packets to be
+produced but might make decoding more reliable.
++
+The default config does not select noretcomp (i.e. noretcomp=0).
+
+*psb_period*::
+Allows the frequency of PSB packets to be specified.
++
+The PSB packet is a synchronization packet that provides a
+starting point for decoding or recovery from errors.
++
+Support for psb_period is indicated by:
++
+	/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
++
+which contains "1" if the feature is supported and "0"
+otherwise.
++
+Valid values are given by:
++
+	/sys/bus/event_source/devices/intel_pt/caps/psb_periods
++
+which contains a hexadecimal value, the bits of which represent
+valid values e.g. bit 2 set means value 2 is valid.
++
+The psb_period value is converted to the approximate number of
+trace bytes between PSB packets as:
++
+	2 ^ (value + 11)
++
+e.g. value 3 means 16KiB bytes between PSBs
++
+If an invalid value is entered, the error message
+will give a list of valid values e.g.
++
+	$ perf record -e intel_pt/psb_period=15/u uname
+	Invalid psb_period for intel_pt. Valid values are: 0-5
++
+If MTC packets are selected, the default config selects a value
+of 3 (i.e. psb_period=3) or the nearest lower value that is
+supported (0 is always supported).  Otherwise the default is 0.
++
+If decoding is expected to be reliable and the buffer is large
+then a large PSB period can be used.
++
+Because a TSC packet is produced with PSB, the PSB period can
+also affect the granularity to timing information in the absence
+of MTC or CYC.
+
+*mtc*::
+Produces MTC timing packets.
++
+MTC packets provide finer grain timestamp information than TSC
+packets.  MTC packets record time using the hardware crystal
+clock (CTC) which is related to TSC packets using a TMA packet.
++
+Support for this feature is indicated by:
++
+	/sys/bus/event_source/devices/intel_pt/caps/mtc
++
+which contains "1" if the feature is supported and
+"0" otherwise.
++
+The frequency of MTC packets can also be specified - see
+mtc_period below.
+
+*mtc_period*::
+Specifies how frequently MTC packets are produced - see mtc
+above for how to determine if MTC packets are supported.
++
+Valid values are given by:
++
+	/sys/bus/event_source/devices/intel_pt/caps/mtc_periods
++
+which contains a hexadecimal value, the bits of which represent
+valid values e.g. bit 2 set means value 2 is valid.
++
+The mtc_period value is converted to the MTC frequency as:
+
+	CTC-frequency / (2 ^ value)
++
+e.g. value 3 means one eighth of CTC-frequency
++
+Where CTC is the hardware crystal clock, the frequency of which
+can be related to TSC via values provided in cpuid leaf 0x15.
++
+If an invalid value is entered, the error message
+will give a list of valid values e.g.
++
+	$ perf record -e intel_pt/mtc_period=15/u uname
+	Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
++
+The default value is 3 or the nearest lower value
+that is supported (0 is always supported).
+
+*cyc*::
+Produces CYC timing packets.
++
+CYC packets provide even finer grain timestamp information than
+MTC and TSC packets.  A CYC packet contains the number of CPU
+cycles since the last CYC packet. Unlike MTC and TSC packets,
+CYC packets are only sent when another packet is also sent.
++
+Support for this feature is indicated by:
++
+	/sys/bus/event_source/devices/intel_pt/caps/psb_cyc
++
+which contains "1" if the feature is supported and
+"0" otherwise.
++
+The number of CYC packets produced can be reduced by specifying
+a threshold - see cyc_thresh below.
+
+*cyc_thresh*::
+Specifies how frequently CYC packets are produced - see cyc
+above for how to determine if CYC packets are supported.
++
+Valid cyc_thresh values are given by:
++
+	/sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
++
+which contains a hexadecimal value, the bits of which represent
+valid values e.g. bit 2 set means value 2 is valid.
++
+The cyc_thresh value represents the minimum number of CPU cycles
+that must have passed before a CYC packet can be sent.  The
+number of CPU cycles is:
++
+	2 ^ (value - 1)
++
+e.g. value 4 means 8 CPU cycles must pass before a CYC packet
+can be sent.  Note a CYC packet is still only sent when another
+packet is sent, not at, e.g. every 8 CPU cycles.
++
+If an invalid value is entered, the error message
+will give a list of valid values e.g.
++
+	$ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
+	Invalid cyc_thresh for intel_pt. Valid values are: 0-12
++
+CYC packets are not requested by default.
+
+*pt*::
+Specifies pass-through which enables the 'branch' config term.
++
+The default config selects 'pt' if it is available, so a user will
+never need to specify this term.
+
+*branch*::
+Enable branch tracing.  Branch tracing is enabled by default so to
+disable branch tracing use 'branch=0'.
++
+The default config selects 'branch' if it is available.
+
+*ptw*::
+Enable PTWRITE packets which are produced when a ptwrite instruction
+is executed.
++
+Support for this feature is indicated by:
++
+	/sys/bus/event_source/devices/intel_pt/caps/ptwrite
++
+which contains "1" if the feature is supported and
+"0" otherwise.
++
+As an alternative, refer to "Emulated PTWRITE" further below.
+
+*fup_on_ptw*::
+Enable a FUP packet to follow the PTWRITE packet.  The FUP packet
+provides the address of the ptwrite instruction.  In the absence of
+fup_on_ptw, the decoder will use the address of the previous branch
+if branch tracing is enabled, otherwise the address will be zero.
+Note that fup_on_ptw will work even when branch tracing is disabled.
+
+*pwr_evt*::
+Enable power events.  The power events provide information about
+changes to the CPU C-state.
++
+Support for this feature is indicated by:
++
+	/sys/bus/event_source/devices/intel_pt/caps/power_event_trace
++
+which contains "1" if the feature is supported and
+"0" otherwise.
+
+*event*::
+Enable Event Trace.  The events provide information about asynchronous
+events.
++
+Support for this feature is indicated by:
++
+	/sys/bus/event_source/devices/intel_pt/caps/event_trace
++
+which contains "1" if the feature is supported and
+"0" otherwise.
+
+*notnt*::
+Disable TNT packets.  Without TNT packets, it is not possible to walk
+executable code to reconstruct control flow, however FUP, TIP, TIP.PGE
+and TIP.PGD packets still indicate asynchronous control flow, and (if
+return compression is disabled - see noretcomp) return statements.
+The advantage of eliminating TNT packets is reducing the size of the
+trace and corresponding tracing overhead.
++
+Support for this feature is indicated by:
++
+	/sys/bus/event_source/devices/intel_pt/caps/tnt_disable
++
+which contains "1" if the feature is supported and
+"0" otherwise.
+
+*aux-action=start-paused*::
+Start tracing paused, refer to the section <<_pause_or_resume_tracing,Pause or Resume Tracing>>
+
+
+config terms on other events
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some Intel PT features work with other events, features such as AUX area sampling
+and PEBS-via-PT.  In those cases, the other events can have config terms below:
+
+*aux-sample-size*::
+		Used to set the AUX area sample size, refer to the section
+		<<_aux_area_sampling_option,AUX area sampling option>>
+
+*aux-output*::
+		Used to select PEBS-via-PT, refer to the
+		section <<_pebs_via_intel_pt,PEBS via Intel PT>>
+
+*aux-action*::
+		Used to pause or resume tracing, refer to the section
+		<<_pause_or_resume_tracing,Pause or Resume Tracing>>
 
 AUX area sampling option
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -596,7 +649,8 @@ The default snapshot size is the auxtrace mmap size.  If neither auxtrace mmap s
 nor snapshot size is specified, then the default is 4MiB for privileged users
 (or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
 If an unprivileged user does not specify mmap pages, the mmap pages will be
-reduced as described in the 'new auxtrace mmap size option' section below.
+reduced as described in the <<_new_auxtrace_mmap_size_option,new auxtrace mmap size option>>
+section below.
 
 The snapshot size is displayed if the option -vv is used e.g.
 
@@ -952,11 +1006,11 @@ transaction start, commit or abort.
 
 Note that "instructions", "cycles", "branches" and "transactions" events
 depend on code flow packets which can be disabled by using the config term
-"branch=0".  Refer to the config terms section above.
+"branch=0".  Refer to the <<_config_terms,config terms>> section above.
 
 "ptwrite" events record the payload of the ptwrite instruction and whether
 "fup_on_ptw" was used.  "ptwrite" events depend on PTWRITE packets which are
-recorded only if the "ptw" config term was used.  Refer to the config terms
+recorded only if the "ptw" config term was used.  Refer to the <<_config_terms,config terms>>
 section above.  perf script "synth" field displays "ptwrite" information like
 this: "ip: 0 payload: 0x123456789abcdef0"  where "ip" is 1 if "fup_on_ptw" was
 used.
@@ -964,7 +1018,7 @@ used.
 "Power" events correspond to power event packets and CBR (core-to-bus ratio)
 packets.  While CBR packets are always recorded when tracing is enabled, power
 event packets are recorded only if the "pwr_evt" config term was used.  Refer to
-the config terms section above.  The power events record information about
+the <<_config_terms,config terms>> section above.  The power events record information about
 C-state changes, whereas CBR is indicative of CPU frequency.  perf script
 "event,synth" fields display information like this:
 
@@ -1120,7 +1174,7 @@ What *will* be decoded with the (single) q option:
 	- asynchronous branches such as interrupts
 	- indirect branches
 	- function return target address *if* the noretcomp config term (refer
-	config terms section) was used
+	<<_config_terms,config terms>> section) was used
 	- start of (control-flow) tracing
 	- end of (control-flow) tracing, if it is not out of context
 	- power events, ptwrite, transaction start and abort
@@ -1133,7 +1187,7 @@ Repeating the q option (double-q i.e. qq) results in even faster decoding and ev
 less detail.  The decoder decodes only extended PSB (PSB+) packets, getting the
 instruction pointer if there is a FUP packet within PSB+ (i.e. between PSB and
 PSBEND).  Note PSB packets occur regularly in the trace based on the psb_period
-config term (refer config terms section).  There will be a FUP packet if the
+config term (refer <<_config_terms,config terms>> section).  There will be a FUP packet if the
 PSB+ occurs while control flow is being traced.
 
 What will *not* be decoded with the qq option:
@@ -1867,6 +1921,108 @@ For pipe mode, the order of events and timestamps can presumably
 be messed up.
 
 
+Pause or Resume Tracing
+-----------------------
+
+With newer Kernels, it is possible to use other selected events to pause
+or resume Intel PT tracing.  This is configured by using the "aux-action"
+config term:
+
+"aux-action=pause" is used with events that are to pause Intel PT tracing.
+
+"aux-action=resume" is used with events that are to resume Intel PT tracing.
+
+"aux-action=start-paused" is used with the Intel PT event to start in a
+paused state.
+
+For example, to trace only the uname system call (sys_newuname) when running the
+command line utility uname:
+
+ $ perf record --kcore -e intel_pt/aux-action=start-paused/k,syscalls:sys_enter_newuname/aux-action=resume/,syscalls:sys_exit_newuname/aux-action=pause/ uname
+ Linux
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 0.043 MB perf.data ]
+ $ perf script --call-trace
+ uname   30805 [000] 24001.058782799: name: 0x7ffc9c1865b0
+ uname   30805 [000] 24001.058784424:  psb offs: 0
+ uname   30805 [000] 24001.058784424:  cbr: 39 freq: 3904 MHz (139%)
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])        debug_smp_processor_id
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])        __x64_sys_newuname
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])            down_read
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                __cond_resched
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                preempt_count_add
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                    in_lock_functions
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                preempt_count_sub
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])            up_read
+ uname   30805 [000] 24001.058784629: ([kernel.kallsyms])                preempt_count_add
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                    in_lock_functions
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                preempt_count_sub
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])            _copy_to_user
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])        syscall_exit_to_user_mode
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])            syscall_exit_work
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                perf_syscall_exit
+ uname   30805 [000] 24001.058784838: ([kernel.kallsyms])                    debug_smp_processor_id
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                    perf_trace_buf_alloc
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        perf_swevent_get_recursion_context
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            debug_smp_processor_id
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        debug_smp_processor_id
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                    perf_tp_event
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        perf_trace_buf_update
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            tracing_gen_ctx_irq_test
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                        perf_swevent_event
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            __perf_event_account_interrupt
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                __this_cpu_preempt_check
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                            perf_event_output_forward
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                perf_event_aux_pause
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                    ring_buffer_get
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                        __rcu_read_lock
+ uname   30805 [000] 24001.058785046: ([kernel.kallsyms])                                        __rcu_read_unlock
+ uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                    pt_event_stop
+ uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                        debug_smp_processor_id
+ uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                        debug_smp_processor_id
+ uname   30805 [000] 24001.058785254: ([kernel.kallsyms])                                        native_write_msr
+ uname   30805 [000] 24001.058785463: ([kernel.kallsyms])                                        native_write_msr
+ uname   30805 [000] 24001.058785639: 0x0
+
+The example above uses tracepoints, but any kind of sampled event can be used.
+
+For example:
+
+ Tracing between arch_cpu_idle_enter() and arch_cpu_idle_exit() using breakpoint events:
+
+ $ sudo cat /proc/kallsyms | sort | grep ' arch_cpu_idle_enter\| arch_cpu_idle_exit'
+ ffffffffb605bf60 T arch_cpu_idle_enter
+ ffffffffb614d8a0 W arch_cpu_idle_exit
+ $ sudo perf record --kcore -a -e intel_pt/aux-action=start-paused/k -e mem:0xffffffffb605bf60:x/aux-action=resume/ -e mem:0xffffffffb614d8a0:x/aux-action=pause/ -- sleep 1
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 1.387 MB perf.data ]
+
+ Tracing __alloc_pages() using kprobes:
+
+ $ sudo perf probe --add '__alloc_pages order'
+ Added new event:  probe:__alloc_pages  (on __alloc_pages with order)
+ $ sudo perf probe --add __alloc_pages%return
+ Added new event:  probe:__alloc_pages__return (on __alloc_pages%return)
+ $ sudo perf record --kcore -aR -e intel_pt/aux-action=start-paused/k -e probe:__alloc_pages/aux-action=resume/ -e probe:__alloc_pages__return/aux-action=pause/ -- sleep 1
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 1.490 MB perf.data ]
+
+ Tracing starting at main() using a uprobe event:
+
+ $ sudo perf probe -x /usr/bin/uname main
+ Added new event:  probe_uname:main     (on main in /usr/bin/uname)
+ $ sudo perf record -e intel_pt/-aux-action=start-paused/u -e probe_uname:main/aux-action=resume/ -- uname
+ Linux
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 0.031 MB perf.data ]
+
+ Tracing occasionally using cycles events with different periods:
+
+ $ perf record --kcore -a -m,64M -e intel_pt/aux-action=start-paused/k -e cycles/aux-action=pause,period=1000000/Pk -e cycles/aux-action=resume,period=10500000/Pk -- firefox
+ [ perf record: Woken up 19 times to write data ]
+ [ perf record: Captured and wrote 16.561 MB perf.data ]
+
+
 EXAMPLE
 -------
 
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 57a940399de0..d3793054f7d3 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -187,8 +187,8 @@ CONTENTION OPTIONS
 	Show lock contention only for given lock types (comma separated list).
 	Available values are:
 	  semaphore, spinlock, rwlock, rwlock:R, rwlock:W, rwsem, rwsem:R, rwsem:W,
-	  rtmutex, rwlock-rt, rwlock-rt:R, rwlock-rt:W, pcpu-sem, pcpu-sem:R, pcpu-sem:W,
-	  mutex
+	  rtmutex, rwlock-rt, rwlock-rt:R, rwlock-rt:W, percpu-rwmem, pcpu-sem,
+	  pcpu-sem:R, pcpu-sem:W, mutex
 
 	Note that RW-variant of locks have :R and :W suffix.  Names without the
 	suffix are shortcuts for the both variants.  Ex) rwsem = rwsem:R + rwsem:W.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 242223240a08..80686d590de2 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -68,6 +68,10 @@ OPTIONS
 		    like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'.
 	  - 'aux-output': Generate AUX records instead of events. This requires
 			  that an AUX area event is also provided.
+	  - 'aux-action': "pause" or "resume" to pause or resume an AUX
+			  area event (the group leader) when this event occurs.
+			  "start-paused" on an AUX area event itself, will
+			  start in a paused state.
 	  - 'aux-sample-size': Set sample size for AUX area sampling. If the
 	  '--aux-sample' option has been used, set aux-sample-size=0 to disable
 	  AUX area sampling for the event.
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
index efcdec528a8f..32da0d1fa86a 100644
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -28,18 +28,22 @@ OPTIONS
 	Tests to skip (comma separated numeric list).
 
 -v::
+-vv::
+-vvv::
 --verbose::
-	Be more verbose.
+	With a single '-v', verbose level 1, only failing test output
+	is displayed. With '-vv' and higher all test output is shown.
 
 -S::
 --sequential::
-	Run tests one after the other, this is the default mode.
-
--p:: 
---parallel::
-	Run tests in parallel, speeds up the whole process but is not safe with
-	the current infrastructure, where some tests that compete for some resources,
-	for instance, 'perf probe' tests that add/remove probes or clean all probes, etc.
+	Run all tests one after the other. By default "exclusive"
+	tests are run sequentially, but other tests are run in
+	parallel to speed execution.
+
+-r::
+--runs-per-test::
+	Run each test the given number of times, by default once. This
+	option can be useful to determine if a test is flaky.
 
 -F::
 --dont-fork::
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 6e0cc50bbc13..fb3d2af33844 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -241,6 +241,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 	printing using the existing 'perf trace' syscall arg beautifiers to map integer
 	arguments to strings (pid to comm, syscall id to syscall name, etc).
 
+--force-btf::
+	Use btf_dump to pretty print syscall argument data, instead of using hand-crafted pretty
+	printers. This option is intended for testing BTF integration in perf trace. btf_dump-based
+	pretty-printing serves as a fallback to hand-crafted pretty printers, as the latter can
+	better pretty-print integer flags and struct pointers.
 
 PAGEFAULTS
 ----------
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index dc42de1785ce..364b55b00b48 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,5 +1,8 @@
+COPYING
+LICENSES/preferred/GPL-2.0
 arch/arm64/tools/gen-sysreg.awk
 arch/arm64/tools/sysreg
+arch/*/include/uapi/asm/bpf_perf_event.h
 tools/perf
 tools/arch
 tools/scripts
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 2916d59c88cd..a148ca9efca9 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -28,63 +28,58 @@ include $(srctree)/tools/scripts/Makefile.arch
 
 $(call detected_var,SRCARCH)
 
-ifneq ($(NO_SYSCALL_TABLE),1)
-  NO_SYSCALL_TABLE := 1
-
-  ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 powerpc arm64 s390 mips loongarch riscv))
-    NO_SYSCALL_TABLE := 0
-  endif
-
-  ifneq ($(NO_SYSCALL_TABLE),1)
-    CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT
-  endif
-endif
+CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated
 
 # Additional ARCH settings for ppc
 ifeq ($(SRCARCH),powerpc)
-  CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated
-  LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
+  ifndef NO_LIBUNWIND
+    LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
+  endif
 endif
 
 # Additional ARCH settings for x86
 ifeq ($(SRCARCH),x86)
   $(call detected,CONFIG_X86)
-  CFLAGS += -I$(OUTPUT)arch/x86/include/generated
   ifeq (${IS_64_BIT}, 1)
     CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT
     ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
-    LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma
+    ifndef NO_LIBUNWIND
+      LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma
+    endif
     $(call detected,CONFIG_X86_64)
   else
-    LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
+    ifndef NO_LIBUNWIND
+      LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
+    endif
   endif
 endif
 
 ifeq ($(SRCARCH),arm)
-  LIBUNWIND_LIBS = -lunwind -lunwind-arm
+  ifndef NO_LIBUNWIND
+    LIBUNWIND_LIBS = -lunwind -lunwind-arm
+  endif
 endif
 
 ifeq ($(SRCARCH),arm64)
-  CFLAGS += -I$(OUTPUT)arch/arm64/include/generated
-  LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
+  ifndef NO_LIBUNWIND
+    LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
+  endif
 endif
 
 ifeq ($(SRCARCH),loongarch)
-  CFLAGS += -I$(OUTPUT)arch/loongarch/include/generated
-  LIBUNWIND_LIBS = -lunwind -lunwind-loongarch64
+  ifndef NO_LIBUNWIND
+    LIBUNWIND_LIBS = -lunwind -lunwind-loongarch64
+  endif
 endif
 
 ifeq ($(ARCH),s390)
-  CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated
+  CFLAGS += -fPIC
 endif
 
 ifeq ($(ARCH),mips)
-  CFLAGS += -I$(OUTPUT)arch/mips/include/generated
-  LIBUNWIND_LIBS = -lunwind -lunwind-mips
-endif
-
-ifeq ($(ARCH),riscv)
-  CFLAGS += -I$(OUTPUT)arch/riscv/include/generated
+  ifndef NO_LIBUNWIND
+    LIBUNWIND_LIBS = -lunwind -lunwind-mips
+  endif
 endif
 
 # So far there's only x86 and arm libdw unwind support merged in perf.
@@ -121,16 +116,18 @@ ifdef LIBUNWIND_DIR
   $(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch)))
 endif
 
-# Set per-feature check compilation flags
-FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
-FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
-FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
-FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
-
-FEATURE_CHECK_LDFLAGS-libunwind-arm += -lunwind -lunwind-arm
-FEATURE_CHECK_LDFLAGS-libunwind-aarch64 += -lunwind -lunwind-aarch64
-FEATURE_CHECK_LDFLAGS-libunwind-x86 += -lunwind -llzma -lunwind-x86
-FEATURE_CHECK_LDFLAGS-libunwind-x86_64 += -lunwind -llzma -lunwind-x86_64
+ifndef NO_LIBUNWIND
+  # Set per-feature check compilation flags
+  FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
+  FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
+  FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
+  FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
+  
+  FEATURE_CHECK_LDFLAGS-libunwind-arm += -lunwind -lunwind-arm
+  FEATURE_CHECK_LDFLAGS-libunwind-aarch64 += -lunwind -lunwind-aarch64
+  FEATURE_CHECK_LDFLAGS-libunwind-x86 += -lunwind -llzma -lunwind-x86
+  FEATURE_CHECK_LDFLAGS-libunwind-x86_64 += -lunwind -llzma -lunwind-x86_64
+endif
 
 FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
 
@@ -155,7 +152,7 @@ ifdef LIBDW_DIR
 endif
 DWARFLIBS := -ldw
 ifeq ($(findstring -static,${LDFLAGS}),-static)
-  DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd
+  DWARFLIBS += -lelf -lz -llzma -lbz2
 
   LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw).0.0
   LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION)))
@@ -550,6 +547,12 @@ ifndef NO_LIBELF
     CFLAGS += -DHAVE_ELF_GETSHDRSTRNDX_SUPPORT
   endif
 
+  ifeq ($(feature-libelf-zstd), 1)
+    ifdef NO_LIBZSTD
+      $(error Error: libzstd is required by libelf, please do not set NO_LIBZSTD)
+    endif
+  endif
+
   ifndef NO_LIBDEBUGINFOD
     $(call feature_check,libdebuginfod)
     ifeq ($(feature-libdebuginfod), 1)
@@ -734,26 +737,25 @@ ifeq ($(dwarf-post-unwind),1)
   $(call detected,CONFIG_DWARF_UNWIND)
 endif
 
-ifndef NO_LOCAL_LIBUNWIND
-  ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64))
-    $(call feature_check,libunwind-debug-frame)
-    ifneq ($(feature-libunwind-debug-frame), 1)
-      $(warning No debug_frame support found in libunwind)
+ifndef NO_LIBUNWIND
+  ifndef NO_LOCAL_LIBUNWIND
+    ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64))
+      $(call feature_check,libunwind-debug-frame)
+      ifneq ($(feature-libunwind-debug-frame), 1)
+        $(warning No debug_frame support found in libunwind)
+        CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
+      endif
+    else
+      # non-ARM has no dwarf_find_debug_frame() function:
       CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
     endif
-  else
-    # non-ARM has no dwarf_find_debug_frame() function:
-    CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
+    EXTLIBS += $(LIBUNWIND_LIBS)
+    LDFLAGS += $(LIBUNWIND_LIBS)
+  endif
+  ifeq ($(findstring -static,${LDFLAGS}),-static)
+    # gcc -static links libgcc_eh which contans piece of libunwind
+    LIBUNWIND_LDFLAGS += -Wl,--allow-multiple-definition
   endif
-  EXTLIBS += $(LIBUNWIND_LIBS)
-  LDFLAGS += $(LIBUNWIND_LIBS)
-endif
-ifeq ($(findstring -static,${LDFLAGS}),-static)
-  # gcc -static links libgcc_eh which contans piece of libunwind
-  LIBUNWIND_LDFLAGS += -Wl,--allow-multiple-definition
-endif
-
-ifndef NO_LIBUNWIND
   CFLAGS  += -DHAVE_LIBUNWIND_SUPPORT
   CFLAGS  += $(LIBUNWIND_CFLAGS)
   LDFLAGS += $(LIBUNWIND_LDFLAGS)
@@ -761,21 +763,7 @@ ifndef NO_LIBUNWIND
 endif
 
 ifneq ($(NO_LIBTRACEEVENT),1)
-  ifeq ($(NO_SYSCALL_TABLE),0)
-    $(call detected,CONFIG_TRACE)
-  else
-    ifndef NO_LIBAUDIT
-      $(call feature_check,libaudit)
-      ifneq ($(feature-libaudit), 1)
-        $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev)
-        NO_LIBAUDIT := 1
-      else
-        CFLAGS += -DHAVE_LIBAUDIT_SUPPORT
-        EXTLIBS += -laudit
-        $(call detected,CONFIG_TRACE)
-      endif
-    endif
-  endif
+  $(call detected,CONFIG_TRACE)
 endif
 
 ifndef NO_LIBCRYPTO
@@ -1172,7 +1160,6 @@ endif
 
 # libtraceevent is a recommended dependency picked up from the system.
 ifneq ($(NO_LIBTRACEEVENT),1)
-  $(call feature_check,libtraceevent)
   ifeq ($(feature-libtraceevent), 1)
     CFLAGS += -DHAVE_LIBTRACEEVENT $(shell $(PKG_CONFIG) --cflags libtraceevent)
     LDFLAGS += $(shell $(PKG_CONFIG) --libs-only-L libtraceevent)
@@ -1188,7 +1175,6 @@ ifneq ($(NO_LIBTRACEEVENT),1)
     $(error ERROR: libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel and/or set LIBTRACEEVENT_DIR or build with NO_LIBTRACEEVENT=1)
   endif
 
-  $(call feature_check,libtracefs)
   ifeq ($(feature-libtracefs), 1)
     CFLAGS +=  $(shell $(PKG_CONFIG) --cflags libtracefs)
     LDFLAGS += $(shell $(PKG_CONFIG) --libs-only-L libtracefs)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index d74241a15131..05c083bb1122 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -59,8 +59,6 @@ include ../scripts/utilities.mak
 #
 # Define NO_LIBNUMA if you do not want numa perf benchmark
 #
-# Define NO_LIBAUDIT if you do not want libaudit support
-#
 # Define NO_LIBBIONIC if you do not want bionic support
 #
 # Define NO_LIBCRYPTO if you do not want libcrypto (openssl) support
@@ -119,10 +117,6 @@ include ../scripts/utilities.mak
 #
 # Define LIBBPF_DYNAMIC to enable libbpf dynamic linking.
 #
-# Define NO_SYSCALL_TABLE=1 to disable the use of syscall id to/from name tables
-# generated from the kernel .tbl or unistd.h files and use, if available, libaudit
-# for doing the conversions to/from strings/id.
-#
 # Define NO_LIBPFM4 to disable libpfm4 events extension.
 #
 # Define NO_LIBDEBUGINFOD if you do not want support debuginfod
@@ -167,12 +161,6 @@ export VPATH
 SOURCE := $(shell ln -sf $(srctree)/tools/perf $(OUTPUT)/source)
 endif
 
-ifeq ($(V),1)
-  Q =
-else
-  Q = @
-endif
-
 # Do not use make's built-in rules
 # (this improves performance and avoids hard-to-debug behaviour);
 MAKEFLAGS += -r
@@ -310,6 +298,7 @@ ifeq ($(filter feature-dump,$(MAKECMDGOALS)),feature-dump)
 FEATURE_TESTS := all
 endif
 endif
+include $(srctree)/tools/perf/scripts/Makefile.syscalls
 include Makefile.config
 endif
 
@@ -487,6 +476,9 @@ endif
 EXTLIBS := $(call filter-out,$(EXCLUDE_EXTLIBS),$(EXTLIBS))
 LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
 
+PERFLIBS_PY := $(call filter-out,$(LIBPERF_BENCH) $(LIBPERF_TEST),$(PERFLIBS))
+LIBS_PY = -Wl,--whole-archive $(PERFLIBS_PY) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
+
 export INSTALL SHELL_PATH
 
 ### Build rules
@@ -735,9 +727,9 @@ all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
 # Create python binding output directory if not already present
 $(shell [ -d '$(OUTPUT)python' ] || mkdir -p '$(OUTPUT)python')
 
-$(OUTPUT)python/perf$(PYTHON_EXTENSION_SUFFIX): util/python.c util/setup.py $(PERFLIBS)
+$(OUTPUT)python/perf$(PYTHON_EXTENSION_SUFFIX): util/python.c util/setup.py $(PERFLIBS_PY)
 	$(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \
-        CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBS)' \
+        CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBS_PY)' \
 	  $(PYTHON_WORD) util/setup.py \
 	  --quiet build_ext; \
 	cp $(PYTHON_EXTBUILD_LIB)perf*.so $(OUTPUT)python/
@@ -1094,11 +1086,6 @@ endif
 		$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 	$(call QUIET_INSTALL, perf-iostat) \
 		$(INSTALL) $(OUTPUT)perf-iostat -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
-ifndef NO_LIBAUDIT
-	$(call QUIET_INSTALL, strace/groups) \
-		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'; \
-		$(INSTALL) trace/strace/groups/* -m 644 -t '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'
-endif
 ifndef NO_LIBPERL
 	$(call QUIET_INSTALL, perl-scripts) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
diff --git a/tools/perf/arch/alpha/entry/syscalls/Kbuild b/tools/perf/arch/alpha/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..9a41e3572c3a
--- /dev/null
+++ b/tools/perf/arch/alpha/entry/syscalls/Kbuild
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_64.h
diff --git a/tools/perf/arch/alpha/entry/syscalls/Makefile.syscalls b/tools/perf/arch/alpha/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..690168aac34d
--- /dev/null
+++ b/tools/perf/arch/alpha/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_64  +=
+
+syscalltbl = $(srctree)/tools/perf/arch/alpha/entry/syscalls/syscall.tbl
diff --git a/tools/perf/arch/alpha/entry/syscalls/syscall.tbl b/tools/perf/arch/alpha/entry/syscalls/syscall.tbl
new file mode 100644
index 000000000000..74720667fe09
--- /dev/null
+++ b/tools/perf/arch/alpha/entry/syscalls/syscall.tbl
@@ -0,0 +1,504 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for alpha
+#
+# The format is:
+# <number> <abi> <name> <entry point>
+#
+# The <abi> is always "common" for this file
+#
+0	common	osf_syscall			alpha_syscall_zero
+1	common	exit				sys_exit
+2	common	fork				alpha_fork
+3	common	read				sys_read
+4	common	write				sys_write
+5	common	osf_old_open			sys_ni_syscall
+6	common	close				sys_close
+7	common	osf_wait4			sys_osf_wait4
+8	common	osf_old_creat			sys_ni_syscall
+9	common	link				sys_link
+10	common	unlink				sys_unlink
+11	common	osf_execve			sys_ni_syscall
+12	common	chdir				sys_chdir
+13	common	fchdir				sys_fchdir
+14	common	mknod				sys_mknod
+15	common	chmod				sys_chmod
+16	common	chown				sys_chown
+17	common	brk				sys_osf_brk
+18	common	osf_getfsstat			sys_ni_syscall
+19	common	lseek				sys_lseek
+20	common	getxpid				sys_getxpid
+21	common	osf_mount			sys_osf_mount
+22	common	umount2				sys_umount
+23	common	setuid				sys_setuid
+24	common	getxuid				sys_getxuid
+25	common	exec_with_loader		sys_ni_syscall
+26	common	ptrace				sys_ptrace
+27	common	osf_nrecvmsg			sys_ni_syscall
+28	common	osf_nsendmsg			sys_ni_syscall
+29	common	osf_nrecvfrom			sys_ni_syscall
+30	common	osf_naccept			sys_ni_syscall
+31	common	osf_ngetpeername		sys_ni_syscall
+32	common	osf_ngetsockname		sys_ni_syscall
+33	common	access				sys_access
+34	common	osf_chflags			sys_ni_syscall
+35	common	osf_fchflags			sys_ni_syscall
+36	common	sync				sys_sync
+37	common	kill				sys_kill
+38	common	osf_old_stat			sys_ni_syscall
+39	common	setpgid				sys_setpgid
+40	common	osf_old_lstat			sys_ni_syscall
+41	common	dup				sys_dup
+42	common	pipe				sys_alpha_pipe
+43	common	osf_set_program_attributes	sys_osf_set_program_attributes
+44	common	osf_profil			sys_ni_syscall
+45	common	open				sys_open
+46	common	osf_old_sigaction		sys_ni_syscall
+47	common	getxgid				sys_getxgid
+48	common	osf_sigprocmask			sys_osf_sigprocmask
+49	common	osf_getlogin			sys_ni_syscall
+50	common	osf_setlogin			sys_ni_syscall
+51	common	acct				sys_acct
+52	common	sigpending			sys_sigpending
+54	common	ioctl				sys_ioctl
+55	common	osf_reboot			sys_ni_syscall
+56	common	osf_revoke			sys_ni_syscall
+57	common	symlink				sys_symlink
+58	common	readlink			sys_readlink
+59	common	execve				sys_execve
+60	common	umask				sys_umask
+61	common	chroot				sys_chroot
+62	common	osf_old_fstat			sys_ni_syscall
+63	common	getpgrp				sys_getpgrp
+64	common	getpagesize			sys_getpagesize
+65	common	osf_mremap			sys_ni_syscall
+66	common	vfork				alpha_vfork
+67	common	stat				sys_newstat
+68	common	lstat				sys_newlstat
+69	common	osf_sbrk			sys_ni_syscall
+70	common	osf_sstk			sys_ni_syscall
+71	common	mmap				sys_osf_mmap
+72	common	osf_old_vadvise			sys_ni_syscall
+73	common	munmap				sys_munmap
+74	common	mprotect			sys_mprotect
+75	common	madvise				sys_madvise
+76	common	vhangup				sys_vhangup
+77	common	osf_kmodcall			sys_ni_syscall
+78	common	osf_mincore			sys_ni_syscall
+79	common	getgroups			sys_getgroups
+80	common	setgroups			sys_setgroups
+81	common	osf_old_getpgrp			sys_ni_syscall
+82	common	setpgrp				sys_setpgid
+83	common	osf_setitimer			compat_sys_setitimer
+84	common	osf_old_wait			sys_ni_syscall
+85	common	osf_table			sys_ni_syscall
+86	common	osf_getitimer			compat_sys_getitimer
+87	common	gethostname			sys_gethostname
+88	common	sethostname			sys_sethostname
+89	common	getdtablesize			sys_getdtablesize
+90	common	dup2				sys_dup2
+91	common	fstat				sys_newfstat
+92	common	fcntl				sys_fcntl
+93	common	osf_select			sys_osf_select
+94	common	poll				sys_poll
+95	common	fsync				sys_fsync
+96	common	setpriority			sys_setpriority
+97	common	socket				sys_socket
+98	common	connect				sys_connect
+99	common	accept				sys_accept
+100	common	getpriority			sys_osf_getpriority
+101	common	send				sys_send
+102	common	recv				sys_recv
+103	common	sigreturn			sys_sigreturn
+104	common	bind				sys_bind
+105	common	setsockopt			sys_setsockopt
+106	common	listen				sys_listen
+107	common	osf_plock			sys_ni_syscall
+108	common	osf_old_sigvec			sys_ni_syscall
+109	common	osf_old_sigblock		sys_ni_syscall
+110	common	osf_old_sigsetmask		sys_ni_syscall
+111	common	sigsuspend			sys_sigsuspend
+112	common	osf_sigstack			sys_osf_sigstack
+113	common	recvmsg				sys_recvmsg
+114	common	sendmsg				sys_sendmsg
+115	common	osf_old_vtrace			sys_ni_syscall
+116	common	osf_gettimeofday		sys_osf_gettimeofday
+117	common	osf_getrusage			sys_osf_getrusage
+118	common	getsockopt			sys_getsockopt
+120	common	readv				sys_readv
+121	common	writev				sys_writev
+122	common	osf_settimeofday		sys_osf_settimeofday
+123	common	fchown				sys_fchown
+124	common	fchmod				sys_fchmod
+125	common	recvfrom			sys_recvfrom
+126	common	setreuid			sys_setreuid
+127	common	setregid			sys_setregid
+128	common	rename				sys_rename
+129	common	truncate			sys_truncate
+130	common	ftruncate			sys_ftruncate
+131	common	flock				sys_flock
+132	common	setgid				sys_setgid
+133	common	sendto				sys_sendto
+134	common	shutdown			sys_shutdown
+135	common	socketpair			sys_socketpair
+136	common	mkdir				sys_mkdir
+137	common	rmdir				sys_rmdir
+138	common	osf_utimes			sys_osf_utimes
+139	common	osf_old_sigreturn		sys_ni_syscall
+140	common	osf_adjtime			sys_ni_syscall
+141	common	getpeername			sys_getpeername
+142	common	osf_gethostid			sys_ni_syscall
+143	common	osf_sethostid			sys_ni_syscall
+144	common	getrlimit			sys_getrlimit
+145	common	setrlimit			sys_setrlimit
+146	common	osf_old_killpg			sys_ni_syscall
+147	common	setsid				sys_setsid
+148	common	quotactl			sys_quotactl
+149	common	osf_oldquota			sys_ni_syscall
+150	common	getsockname			sys_getsockname
+153	common	osf_pid_block			sys_ni_syscall
+154	common	osf_pid_unblock			sys_ni_syscall
+156	common	sigaction			sys_osf_sigaction
+157	common	osf_sigwaitprim			sys_ni_syscall
+158	common	osf_nfssvc			sys_ni_syscall
+159	common	osf_getdirentries		sys_osf_getdirentries
+160	common	osf_statfs			sys_osf_statfs
+161	common	osf_fstatfs			sys_osf_fstatfs
+163	common	osf_asynch_daemon		sys_ni_syscall
+164	common	osf_getfh			sys_ni_syscall
+165	common	osf_getdomainname		sys_osf_getdomainname
+166	common	setdomainname			sys_setdomainname
+169	common	osf_exportfs			sys_ni_syscall
+181	common	osf_alt_plock			sys_ni_syscall
+184	common	osf_getmnt			sys_ni_syscall
+187	common	osf_alt_sigpending		sys_ni_syscall
+188	common	osf_alt_setsid			sys_ni_syscall
+199	common	osf_swapon			sys_swapon
+200	common	msgctl				sys_old_msgctl
+201	common	msgget				sys_msgget
+202	common	msgrcv				sys_msgrcv
+203	common	msgsnd				sys_msgsnd
+204	common	semctl				sys_old_semctl
+205	common	semget				sys_semget
+206	common	semop				sys_semop
+207	common	osf_utsname			sys_osf_utsname
+208	common	lchown				sys_lchown
+209	common	shmat				sys_shmat
+210	common	shmctl				sys_old_shmctl
+211	common	shmdt				sys_shmdt
+212	common	shmget				sys_shmget
+213	common	osf_mvalid			sys_ni_syscall
+214	common	osf_getaddressconf		sys_ni_syscall
+215	common	osf_msleep			sys_ni_syscall
+216	common	osf_mwakeup			sys_ni_syscall
+217	common	msync				sys_msync
+218	common	osf_signal			sys_ni_syscall
+219	common	osf_utc_gettime			sys_ni_syscall
+220	common	osf_utc_adjtime			sys_ni_syscall
+222	common	osf_security			sys_ni_syscall
+223	common	osf_kloadcall			sys_ni_syscall
+224	common	osf_stat			sys_osf_stat
+225	common	osf_lstat			sys_osf_lstat
+226	common	osf_fstat			sys_osf_fstat
+227	common	osf_statfs64			sys_osf_statfs64
+228	common	osf_fstatfs64			sys_osf_fstatfs64
+233	common	getpgid				sys_getpgid
+234	common	getsid				sys_getsid
+235	common	sigaltstack			sys_sigaltstack
+236	common	osf_waitid			sys_ni_syscall
+237	common	osf_priocntlset			sys_ni_syscall
+238	common	osf_sigsendset			sys_ni_syscall
+239	common	osf_set_speculative		sys_ni_syscall
+240	common	osf_msfs_syscall		sys_ni_syscall
+241	common	osf_sysinfo			sys_osf_sysinfo
+242	common	osf_uadmin			sys_ni_syscall
+243	common	osf_fuser			sys_ni_syscall
+244	common	osf_proplist_syscall		sys_osf_proplist_syscall
+245	common	osf_ntp_adjtime			sys_ni_syscall
+246	common	osf_ntp_gettime			sys_ni_syscall
+247	common	osf_pathconf			sys_ni_syscall
+248	common	osf_fpathconf			sys_ni_syscall
+250	common	osf_uswitch			sys_ni_syscall
+251	common	osf_usleep_thread		sys_osf_usleep_thread
+252	common	osf_audcntl			sys_ni_syscall
+253	common	osf_audgen			sys_ni_syscall
+254	common	sysfs				sys_sysfs
+255	common	osf_subsys_info			sys_ni_syscall
+256	common	osf_getsysinfo			sys_osf_getsysinfo
+257	common	osf_setsysinfo			sys_osf_setsysinfo
+258	common	osf_afs_syscall			sys_ni_syscall
+259	common	osf_swapctl			sys_ni_syscall
+260	common	osf_memcntl			sys_ni_syscall
+261	common	osf_fdatasync			sys_ni_syscall
+300	common	bdflush				sys_ni_syscall
+301	common	sethae				sys_sethae
+302	common	mount				sys_mount
+303	common	old_adjtimex			sys_old_adjtimex
+304	common	swapoff				sys_swapoff
+305	common	getdents			sys_getdents
+306	common	create_module			sys_ni_syscall
+307	common	init_module			sys_init_module
+308	common	delete_module			sys_delete_module
+309	common	get_kernel_syms			sys_ni_syscall
+310	common	syslog				sys_syslog
+311	common	reboot				sys_reboot
+312	common	clone				alpha_clone
+313	common	uselib				sys_uselib
+314	common	mlock				sys_mlock
+315	common	munlock				sys_munlock
+316	common	mlockall			sys_mlockall
+317	common	munlockall			sys_munlockall
+318	common	sysinfo				sys_sysinfo
+319	common	_sysctl				sys_ni_syscall
+# 320 was sys_idle
+321	common	oldumount			sys_oldumount
+322	common	swapon				sys_swapon
+323	common	times				sys_times
+324	common	personality			sys_personality
+325	common	setfsuid			sys_setfsuid
+326	common	setfsgid			sys_setfsgid
+327	common	ustat				sys_ustat
+328	common	statfs				sys_statfs
+329	common	fstatfs				sys_fstatfs
+330	common	sched_setparam			sys_sched_setparam
+331	common	sched_getparam			sys_sched_getparam
+332	common	sched_setscheduler		sys_sched_setscheduler
+333	common	sched_getscheduler		sys_sched_getscheduler
+334	common	sched_yield			sys_sched_yield
+335	common	sched_get_priority_max		sys_sched_get_priority_max
+336	common	sched_get_priority_min		sys_sched_get_priority_min
+337	common	sched_rr_get_interval		sys_sched_rr_get_interval
+338	common	afs_syscall			sys_ni_syscall
+339	common	uname				sys_newuname
+340	common	nanosleep			sys_nanosleep
+341	common	mremap				sys_mremap
+342	common	nfsservctl			sys_ni_syscall
+343	common	setresuid			sys_setresuid
+344	common	getresuid			sys_getresuid
+345	common	pciconfig_read			sys_pciconfig_read
+346	common	pciconfig_write			sys_pciconfig_write
+347	common	query_module			sys_ni_syscall
+348	common	prctl				sys_prctl
+349	common	pread64				sys_pread64
+350	common	pwrite64			sys_pwrite64
+351	common	rt_sigreturn			sys_rt_sigreturn
+352	common	rt_sigaction			sys_rt_sigaction
+353	common	rt_sigprocmask			sys_rt_sigprocmask
+354	common	rt_sigpending			sys_rt_sigpending
+355	common	rt_sigtimedwait			sys_rt_sigtimedwait
+356	common	rt_sigqueueinfo			sys_rt_sigqueueinfo
+357	common	rt_sigsuspend			sys_rt_sigsuspend
+358	common	select				sys_select
+359	common	gettimeofday			sys_gettimeofday
+360	common	settimeofday			sys_settimeofday
+361	common	getitimer			sys_getitimer
+362	common	setitimer			sys_setitimer
+363	common	utimes				sys_utimes
+364	common	getrusage			sys_getrusage
+365	common	wait4				sys_wait4
+366	common	adjtimex			sys_adjtimex
+367	common	getcwd				sys_getcwd
+368	common	capget				sys_capget
+369	common	capset				sys_capset
+370	common	sendfile			sys_sendfile64
+371	common	setresgid			sys_setresgid
+372	common	getresgid			sys_getresgid
+373	common	dipc				sys_ni_syscall
+374	common	pivot_root			sys_pivot_root
+375	common	mincore				sys_mincore
+376	common	pciconfig_iobase		sys_pciconfig_iobase
+377	common	getdents64			sys_getdents64
+378	common	gettid				sys_gettid
+379	common	readahead			sys_readahead
+# 380 is unused
+381	common	tkill				sys_tkill
+382	common	setxattr			sys_setxattr
+383	common	lsetxattr			sys_lsetxattr
+384	common	fsetxattr			sys_fsetxattr
+385	common	getxattr			sys_getxattr
+386	common	lgetxattr			sys_lgetxattr
+387	common	fgetxattr			sys_fgetxattr
+388	common	listxattr			sys_listxattr
+389	common	llistxattr			sys_llistxattr
+390	common	flistxattr			sys_flistxattr
+391	common	removexattr			sys_removexattr
+392	common	lremovexattr			sys_lremovexattr
+393	common	fremovexattr			sys_fremovexattr
+394	common	futex				sys_futex
+395	common	sched_setaffinity		sys_sched_setaffinity
+396	common	sched_getaffinity		sys_sched_getaffinity
+397	common	tuxcall				sys_ni_syscall
+398	common	io_setup			sys_io_setup
+399	common	io_destroy			sys_io_destroy
+400	common	io_getevents			sys_io_getevents
+401	common	io_submit			sys_io_submit
+402	common	io_cancel			sys_io_cancel
+405	common	exit_group			sys_exit_group
+406	common	lookup_dcookie			sys_ni_syscall
+407	common	epoll_create			sys_epoll_create
+408	common	epoll_ctl			sys_epoll_ctl
+409	common	epoll_wait			sys_epoll_wait
+410	common	remap_file_pages		sys_remap_file_pages
+411	common	set_tid_address			sys_set_tid_address
+412	common	restart_syscall			sys_restart_syscall
+413	common	fadvise64			sys_fadvise64
+414	common	timer_create			sys_timer_create
+415	common	timer_settime			sys_timer_settime
+416	common	timer_gettime			sys_timer_gettime
+417	common	timer_getoverrun		sys_timer_getoverrun
+418	common	timer_delete			sys_timer_delete
+419	common	clock_settime			sys_clock_settime
+420	common	clock_gettime			sys_clock_gettime
+421	common	clock_getres			sys_clock_getres
+422	common	clock_nanosleep			sys_clock_nanosleep
+423	common	semtimedop			sys_semtimedop
+424	common	tgkill				sys_tgkill
+425	common	stat64				sys_stat64
+426	common	lstat64				sys_lstat64
+427	common	fstat64				sys_fstat64
+428	common	vserver				sys_ni_syscall
+429	common	mbind				sys_ni_syscall
+430	common	get_mempolicy			sys_ni_syscall
+431	common	set_mempolicy			sys_ni_syscall
+432	common	mq_open				sys_mq_open
+433	common	mq_unlink			sys_mq_unlink
+434	common	mq_timedsend			sys_mq_timedsend
+435	common	mq_timedreceive			sys_mq_timedreceive
+436	common	mq_notify			sys_mq_notify
+437	common	mq_getsetattr			sys_mq_getsetattr
+438	common	waitid				sys_waitid
+439	common	add_key				sys_add_key
+440	common	request_key			sys_request_key
+441	common	keyctl				sys_keyctl
+442	common	ioprio_set			sys_ioprio_set
+443	common	ioprio_get			sys_ioprio_get
+444	common	inotify_init			sys_inotify_init
+445	common	inotify_add_watch		sys_inotify_add_watch
+446	common	inotify_rm_watch		sys_inotify_rm_watch
+447	common	fdatasync			sys_fdatasync
+448	common	kexec_load			sys_kexec_load
+449	common	migrate_pages			sys_migrate_pages
+450	common	openat				sys_openat
+451	common	mkdirat				sys_mkdirat
+452	common	mknodat				sys_mknodat
+453	common	fchownat			sys_fchownat
+454	common	futimesat			sys_futimesat
+455	common	fstatat64			sys_fstatat64
+456	common	unlinkat			sys_unlinkat
+457	common	renameat			sys_renameat
+458	common	linkat				sys_linkat
+459	common	symlinkat			sys_symlinkat
+460	common	readlinkat			sys_readlinkat
+461	common	fchmodat			sys_fchmodat
+462	common	faccessat			sys_faccessat
+463	common	pselect6			sys_pselect6
+464	common	ppoll				sys_ppoll
+465	common	unshare				sys_unshare
+466	common	set_robust_list			sys_set_robust_list
+467	common	get_robust_list			sys_get_robust_list
+468	common	splice				sys_splice
+469	common	sync_file_range			sys_sync_file_range
+470	common	tee				sys_tee
+471	common	vmsplice			sys_vmsplice
+472	common	move_pages			sys_move_pages
+473	common	getcpu				sys_getcpu
+474	common	epoll_pwait			sys_epoll_pwait
+475	common	utimensat			sys_utimensat
+476	common	signalfd			sys_signalfd
+477	common	timerfd				sys_ni_syscall
+478	common	eventfd				sys_eventfd
+479	common	recvmmsg			sys_recvmmsg
+480	common	fallocate			sys_fallocate
+481	common	timerfd_create			sys_timerfd_create
+482	common	timerfd_settime			sys_timerfd_settime
+483	common	timerfd_gettime			sys_timerfd_gettime
+484	common	signalfd4			sys_signalfd4
+485	common	eventfd2			sys_eventfd2
+486	common	epoll_create1			sys_epoll_create1
+487	common	dup3				sys_dup3
+488	common	pipe2				sys_pipe2
+489	common	inotify_init1			sys_inotify_init1
+490	common	preadv				sys_preadv
+491	common	pwritev				sys_pwritev
+492	common	rt_tgsigqueueinfo		sys_rt_tgsigqueueinfo
+493	common	perf_event_open			sys_perf_event_open
+494	common	fanotify_init			sys_fanotify_init
+495	common	fanotify_mark			sys_fanotify_mark
+496	common	prlimit64			sys_prlimit64
+497	common	name_to_handle_at		sys_name_to_handle_at
+498	common	open_by_handle_at		sys_open_by_handle_at
+499	common	clock_adjtime			sys_clock_adjtime
+500	common	syncfs				sys_syncfs
+501	common	setns				sys_setns
+502	common	accept4				sys_accept4
+503	common	sendmmsg			sys_sendmmsg
+504	common	process_vm_readv		sys_process_vm_readv
+505	common	process_vm_writev		sys_process_vm_writev
+506	common	kcmp				sys_kcmp
+507	common	finit_module			sys_finit_module
+508	common	sched_setattr			sys_sched_setattr
+509	common	sched_getattr			sys_sched_getattr
+510	common	renameat2			sys_renameat2
+511	common	getrandom			sys_getrandom
+512	common	memfd_create			sys_memfd_create
+513	common	execveat			sys_execveat
+514	common	seccomp				sys_seccomp
+515	common	bpf				sys_bpf
+516	common	userfaultfd			sys_userfaultfd
+517	common	membarrier			sys_membarrier
+518	common	mlock2				sys_mlock2
+519	common	copy_file_range			sys_copy_file_range
+520	common	preadv2				sys_preadv2
+521	common	pwritev2			sys_pwritev2
+522	common	statx				sys_statx
+523	common	io_pgetevents			sys_io_pgetevents
+524	common	pkey_mprotect			sys_pkey_mprotect
+525	common	pkey_alloc			sys_pkey_alloc
+526	common	pkey_free			sys_pkey_free
+527	common	rseq				sys_rseq
+528	common	statfs64			sys_statfs64
+529	common	fstatfs64			sys_fstatfs64
+530	common	getegid				sys_getegid
+531	common	geteuid				sys_geteuid
+532	common	getppid				sys_getppid
+# all other architectures have common numbers for new syscall, alpha
+# is the exception.
+534	common	pidfd_send_signal		sys_pidfd_send_signal
+535	common	io_uring_setup			sys_io_uring_setup
+536	common	io_uring_enter			sys_io_uring_enter
+537	common	io_uring_register		sys_io_uring_register
+538	common	open_tree			sys_open_tree
+539	common	move_mount			sys_move_mount
+540	common	fsopen				sys_fsopen
+541	common	fsconfig			sys_fsconfig
+542	common	fsmount				sys_fsmount
+543	common	fspick				sys_fspick
+544	common	pidfd_open			sys_pidfd_open
+545	common	clone3				alpha_clone3
+546	common	close_range			sys_close_range
+547	common	openat2				sys_openat2
+548	common	pidfd_getfd			sys_pidfd_getfd
+549	common	faccessat2			sys_faccessat2
+550	common	process_madvise			sys_process_madvise
+551	common	epoll_pwait2			sys_epoll_pwait2
+552	common	mount_setattr			sys_mount_setattr
+553	common	quotactl_fd			sys_quotactl_fd
+554	common	landlock_create_ruleset		sys_landlock_create_ruleset
+555	common	landlock_add_rule		sys_landlock_add_rule
+556	common	landlock_restrict_self		sys_landlock_restrict_self
+# 557 reserved for memfd_secret
+558	common	process_mrelease		sys_process_mrelease
+559	common  futex_waitv                     sys_futex_waitv
+560	common	set_mempolicy_home_node		sys_ni_syscall
+561	common	cachestat			sys_cachestat
+562	common	fchmodat2			sys_fchmodat2
+563	common	map_shadow_stack		sys_map_shadow_stack
+564	common	futex_wake			sys_futex_wake
+565	common	futex_wait			sys_futex_wait
+566	common	futex_requeue			sys_futex_requeue
+567	common	statmount			sys_statmount
+568	common	listmount			sys_listmount
+569	common	lsm_get_self_attr		sys_lsm_get_self_attr
+570	common	lsm_set_self_attr		sys_lsm_set_self_attr
+571	common	lsm_list_modules		sys_lsm_list_modules
+572	common  mseal				sys_mseal
diff --git a/tools/perf/arch/alpha/include/syscall_table.h b/tools/perf/arch/alpha/include/syscall_table.h
new file mode 100644
index 000000000000..b53e31c15805
--- /dev/null
+++ b/tools/perf/arch/alpha/include/syscall_table.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/syscalls_64.h>
diff --git a/tools/perf/arch/arc/entry/syscalls/Kbuild b/tools/perf/arch/arc/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..11707c481a24
--- /dev/null
+++ b/tools/perf/arch/arc/entry/syscalls/Kbuild
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_32.h
diff --git a/tools/perf/arch/arc/entry/syscalls/Makefile.syscalls b/tools/perf/arch/arc/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..391d30ab7a83
--- /dev/null
+++ b/tools/perf/arch/arc/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32 += arc time32 renameat stat64 rlimit
diff --git a/tools/perf/arch/arc/include/syscall_table.h b/tools/perf/arch/arc/include/syscall_table.h
new file mode 100644
index 000000000000..4c942821662d
--- /dev/null
+++ b/tools/perf/arch/arc/include/syscall_table.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/syscalls_32.h>
diff --git a/tools/perf/arch/arm/entry/syscalls/Kbuild b/tools/perf/arch/arm/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..9d777540f089
--- /dev/null
+++ b/tools/perf/arch/arm/entry/syscalls/Kbuild
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32 += oabi
+syscalltbl = $(srctree)/tools/perf/arch/arm/entry/syscalls/syscall.tbl
diff --git a/tools/perf/arch/arm/entry/syscalls/Makefile.syscalls b/tools/perf/arch/arm/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..11707c481a24
--- /dev/null
+++ b/tools/perf/arch/arm/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_32.h
diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl
new file mode 100644
index 000000000000..49eeb2ad8dbd
--- /dev/null
+++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl
@@ -0,0 +1,483 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# Linux system call numbers and entry vectors
+#
+# The format is:
+# <num>	<abi>	<name>			[<entry point>			[<oabi compat entry point>]]
+#
+# Where abi is:
+#  common - for system calls shared between oabi and eabi (may have compat)
+#  oabi   - for oabi-only system calls (may have compat)
+#  eabi   - for eabi-only system calls
+#
+# For each syscall number, "common" is mutually exclusive with oabi and eabi
+#
+0	common	restart_syscall		sys_restart_syscall
+1	common	exit			sys_exit
+2	common	fork			sys_fork
+3	common	read			sys_read
+4	common	write			sys_write
+5	common	open			sys_open
+6	common	close			sys_close
+# 7 was sys_waitpid
+8	common	creat			sys_creat
+9	common	link			sys_link
+10	common	unlink			sys_unlink
+11	common	execve			sys_execve
+12	common	chdir			sys_chdir
+13	oabi	time			sys_time32
+14	common	mknod			sys_mknod
+15	common	chmod			sys_chmod
+16	common	lchown			sys_lchown16
+# 17 was sys_break
+# 18 was sys_stat
+19	common	lseek			sys_lseek
+20	common	getpid			sys_getpid
+21	common	mount			sys_mount
+22	oabi	umount			sys_oldumount
+23	common	setuid			sys_setuid16
+24	common	getuid			sys_getuid16
+25	oabi	stime			sys_stime32
+26	common	ptrace			sys_ptrace
+27	oabi	alarm			sys_alarm
+# 28 was sys_fstat
+29	common	pause			sys_pause
+30	oabi	utime			sys_utime32
+# 31 was sys_stty
+# 32 was sys_gtty
+33	common	access			sys_access
+34	common	nice			sys_nice
+# 35 was sys_ftime
+36	common	sync			sys_sync
+37	common	kill			sys_kill
+38	common	rename			sys_rename
+39	common	mkdir			sys_mkdir
+40	common	rmdir			sys_rmdir
+41	common	dup			sys_dup
+42	common	pipe			sys_pipe
+43	common	times			sys_times
+# 44 was sys_prof
+45	common	brk			sys_brk
+46	common	setgid			sys_setgid16
+47	common	getgid			sys_getgid16
+# 48 was sys_signal
+49	common	geteuid			sys_geteuid16
+50	common	getegid			sys_getegid16
+51	common	acct			sys_acct
+52	common	umount2			sys_umount
+# 53 was sys_lock
+54	common	ioctl			sys_ioctl
+55	common	fcntl			sys_fcntl
+# 56 was sys_mpx
+57	common	setpgid			sys_setpgid
+# 58 was sys_ulimit
+# 59 was sys_olduname
+60	common	umask			sys_umask
+61	common	chroot			sys_chroot
+62	common	ustat			sys_ustat
+63	common	dup2			sys_dup2
+64	common	getppid			sys_getppid
+65	common	getpgrp			sys_getpgrp
+66	common	setsid			sys_setsid
+67	common	sigaction		sys_sigaction
+# 68 was sys_sgetmask
+# 69 was sys_ssetmask
+70	common	setreuid		sys_setreuid16
+71	common	setregid		sys_setregid16
+72	common	sigsuspend		sys_sigsuspend
+73	common	sigpending		sys_sigpending
+74	common	sethostname		sys_sethostname
+75	common	setrlimit		sys_setrlimit
+# Back compat 2GB limited rlimit
+76	oabi	getrlimit		sys_old_getrlimit
+77	common	getrusage		sys_getrusage
+78	common	gettimeofday		sys_gettimeofday
+79	common	settimeofday		sys_settimeofday
+80	common	getgroups		sys_getgroups16
+81	common	setgroups		sys_setgroups16
+82	oabi	select			sys_old_select
+83	common	symlink			sys_symlink
+# 84 was sys_lstat
+85	common	readlink		sys_readlink
+86	common	uselib			sys_uselib
+87	common	swapon			sys_swapon
+88	common	reboot			sys_reboot
+89	oabi	readdir			sys_old_readdir
+90	oabi	mmap			sys_old_mmap
+91	common	munmap			sys_munmap
+92	common	truncate		sys_truncate
+93	common	ftruncate		sys_ftruncate
+94	common	fchmod			sys_fchmod
+95	common	fchown			sys_fchown16
+96	common	getpriority		sys_getpriority
+97	common	setpriority		sys_setpriority
+# 98 was sys_profil
+99	common	statfs			sys_statfs
+100	common	fstatfs			sys_fstatfs
+# 101 was sys_ioperm
+102	oabi	socketcall		sys_socketcall		sys_oabi_socketcall
+103	common	syslog			sys_syslog
+104	common	setitimer		sys_setitimer
+105	common	getitimer		sys_getitimer
+106	common	stat			sys_newstat
+107	common	lstat			sys_newlstat
+108	common	fstat			sys_newfstat
+# 109 was sys_uname
+# 110 was sys_iopl
+111	common	vhangup			sys_vhangup
+# 112 was sys_idle
+# syscall to call a syscall!
+113	oabi	syscall			sys_syscall
+114	common	wait4			sys_wait4
+115	common	swapoff			sys_swapoff
+116	common	sysinfo			sys_sysinfo
+117	oabi	ipc			sys_ipc			sys_oabi_ipc
+118	common	fsync			sys_fsync
+119	common	sigreturn		sys_sigreturn_wrapper
+120	common	clone			sys_clone
+121	common	setdomainname		sys_setdomainname
+122	common	uname			sys_newuname
+# 123 was sys_modify_ldt
+124	common	adjtimex		sys_adjtimex_time32
+125	common	mprotect		sys_mprotect
+126	common	sigprocmask		sys_sigprocmask
+# 127 was sys_create_module
+128	common	init_module		sys_init_module
+129	common	delete_module		sys_delete_module
+# 130 was sys_get_kernel_syms
+131	common	quotactl		sys_quotactl
+132	common	getpgid			sys_getpgid
+133	common	fchdir			sys_fchdir
+134	common	bdflush			sys_ni_syscall
+135	common	sysfs			sys_sysfs
+136	common	personality		sys_personality
+# 137 was sys_afs_syscall
+138	common	setfsuid		sys_setfsuid16
+139	common	setfsgid		sys_setfsgid16
+140	common	_llseek			sys_llseek
+141	common	getdents		sys_getdents
+142	common	_newselect		sys_select
+143	common	flock			sys_flock
+144	common	msync			sys_msync
+145	common	readv			sys_readv
+146	common	writev			sys_writev
+147	common	getsid			sys_getsid
+148	common	fdatasync		sys_fdatasync
+149	common	_sysctl			sys_ni_syscall
+150	common	mlock			sys_mlock
+151	common	munlock			sys_munlock
+152	common	mlockall		sys_mlockall
+153	common	munlockall		sys_munlockall
+154	common	sched_setparam		sys_sched_setparam
+155	common	sched_getparam		sys_sched_getparam
+156	common	sched_setscheduler	sys_sched_setscheduler
+157	common	sched_getscheduler	sys_sched_getscheduler
+158	common	sched_yield		sys_sched_yield
+159	common	sched_get_priority_max	sys_sched_get_priority_max
+160	common	sched_get_priority_min	sys_sched_get_priority_min
+161	common	sched_rr_get_interval	sys_sched_rr_get_interval_time32
+162	common	nanosleep		sys_nanosleep_time32
+163	common	mremap			sys_mremap
+164	common	setresuid		sys_setresuid16
+165	common	getresuid		sys_getresuid16
+# 166 was sys_vm86
+# 167 was sys_query_module
+168	common	poll			sys_poll
+169	common	nfsservctl
+170	common	setresgid		sys_setresgid16
+171	common	getresgid		sys_getresgid16
+172	common	prctl			sys_prctl
+173	common	rt_sigreturn		sys_rt_sigreturn_wrapper
+174	common	rt_sigaction		sys_rt_sigaction
+175	common	rt_sigprocmask		sys_rt_sigprocmask
+176	common	rt_sigpending		sys_rt_sigpending
+177	common	rt_sigtimedwait		sys_rt_sigtimedwait_time32
+178	common	rt_sigqueueinfo		sys_rt_sigqueueinfo
+179	common	rt_sigsuspend		sys_rt_sigsuspend
+180	common	pread64			sys_pread64		sys_oabi_pread64
+181	common	pwrite64		sys_pwrite64		sys_oabi_pwrite64
+182	common	chown			sys_chown16
+183	common	getcwd			sys_getcwd
+184	common	capget			sys_capget
+185	common	capset			sys_capset
+186	common	sigaltstack		sys_sigaltstack
+187	common	sendfile		sys_sendfile
+# 188 reserved
+# 189 reserved
+190	common	vfork			sys_vfork
+# SuS compliant getrlimit
+191	common	ugetrlimit		sys_getrlimit
+192	common	mmap2			sys_mmap2
+193	common	truncate64		sys_truncate64		sys_oabi_truncate64
+194	common	ftruncate64		sys_ftruncate64		sys_oabi_ftruncate64
+195	common	stat64			sys_stat64		sys_oabi_stat64
+196	common	lstat64			sys_lstat64		sys_oabi_lstat64
+197	common	fstat64			sys_fstat64		sys_oabi_fstat64
+198	common	lchown32		sys_lchown
+199	common	getuid32		sys_getuid
+200	common	getgid32		sys_getgid
+201	common	geteuid32		sys_geteuid
+202	common	getegid32		sys_getegid
+203	common	setreuid32		sys_setreuid
+204	common	setregid32		sys_setregid
+205	common	getgroups32		sys_getgroups
+206	common	setgroups32		sys_setgroups
+207	common	fchown32		sys_fchown
+208	common	setresuid32		sys_setresuid
+209	common	getresuid32		sys_getresuid
+210	common	setresgid32		sys_setresgid
+211	common	getresgid32		sys_getresgid
+212	common	chown32			sys_chown
+213	common	setuid32		sys_setuid
+214	common	setgid32		sys_setgid
+215	common	setfsuid32		sys_setfsuid
+216	common	setfsgid32		sys_setfsgid
+217	common	getdents64		sys_getdents64
+218	common	pivot_root		sys_pivot_root
+219	common	mincore			sys_mincore
+220	common	madvise			sys_madvise
+221	common	fcntl64			sys_fcntl64		sys_oabi_fcntl64
+# 222 for tux
+# 223 is unused
+224	common	gettid			sys_gettid
+225	common	readahead		sys_readahead		sys_oabi_readahead
+226	common	setxattr		sys_setxattr
+227	common	lsetxattr		sys_lsetxattr
+228	common	fsetxattr		sys_fsetxattr
+229	common	getxattr		sys_getxattr
+230	common	lgetxattr		sys_lgetxattr
+231	common	fgetxattr		sys_fgetxattr
+232	common	listxattr		sys_listxattr
+233	common	llistxattr		sys_llistxattr
+234	common	flistxattr		sys_flistxattr
+235	common	removexattr		sys_removexattr
+236	common	lremovexattr		sys_lremovexattr
+237	common	fremovexattr		sys_fremovexattr
+238	common	tkill			sys_tkill
+239	common	sendfile64		sys_sendfile64
+240	common	futex			sys_futex_time32
+241	common	sched_setaffinity	sys_sched_setaffinity
+242	common	sched_getaffinity	sys_sched_getaffinity
+243	common	io_setup		sys_io_setup
+244	common	io_destroy		sys_io_destroy
+245	common	io_getevents		sys_io_getevents_time32
+246	common	io_submit		sys_io_submit
+247	common	io_cancel		sys_io_cancel
+248	common	exit_group		sys_exit_group
+249	common	lookup_dcookie		sys_ni_syscall
+250	common	epoll_create		sys_epoll_create
+251	common	epoll_ctl		sys_epoll_ctl		sys_oabi_epoll_ctl
+252	common	epoll_wait		sys_epoll_wait
+253	common	remap_file_pages	sys_remap_file_pages
+# 254 for set_thread_area
+# 255 for get_thread_area
+256	common	set_tid_address		sys_set_tid_address
+257	common	timer_create		sys_timer_create
+258	common	timer_settime		sys_timer_settime32
+259	common	timer_gettime		sys_timer_gettime32
+260	common	timer_getoverrun	sys_timer_getoverrun
+261	common	timer_delete		sys_timer_delete
+262	common	clock_settime		sys_clock_settime32
+263	common	clock_gettime		sys_clock_gettime32
+264	common	clock_getres		sys_clock_getres_time32
+265	common	clock_nanosleep		sys_clock_nanosleep_time32
+266	common	statfs64		sys_statfs64_wrapper
+267	common	fstatfs64		sys_fstatfs64_wrapper
+268	common	tgkill			sys_tgkill
+269	common	utimes			sys_utimes_time32
+270	common	arm_fadvise64_64	sys_arm_fadvise64_64
+271	common	pciconfig_iobase	sys_pciconfig_iobase
+272	common	pciconfig_read		sys_pciconfig_read
+273	common	pciconfig_write		sys_pciconfig_write
+274	common	mq_open			sys_mq_open
+275	common	mq_unlink		sys_mq_unlink
+276	common	mq_timedsend		sys_mq_timedsend_time32
+277	common	mq_timedreceive		sys_mq_timedreceive_time32
+278	common	mq_notify		sys_mq_notify
+279	common	mq_getsetattr		sys_mq_getsetattr
+280	common	waitid			sys_waitid
+281	common	socket			sys_socket
+282	common	bind			sys_bind		sys_oabi_bind
+283	common	connect			sys_connect		sys_oabi_connect
+284	common	listen			sys_listen
+285	common	accept			sys_accept
+286	common	getsockname		sys_getsockname
+287	common	getpeername		sys_getpeername
+288	common	socketpair		sys_socketpair
+289	common	send			sys_send
+290	common	sendto			sys_sendto		sys_oabi_sendto
+291	common	recv			sys_recv
+292	common	recvfrom		sys_recvfrom
+293	common	shutdown		sys_shutdown
+294	common	setsockopt		sys_setsockopt
+295	common	getsockopt		sys_getsockopt
+296	common	sendmsg			sys_sendmsg		sys_oabi_sendmsg
+297	common	recvmsg			sys_recvmsg
+298	common	semop			sys_semop		sys_oabi_semop
+299	common	semget			sys_semget
+300	common	semctl			sys_old_semctl
+301	common	msgsnd			sys_msgsnd
+302	common	msgrcv			sys_msgrcv
+303	common	msgget			sys_msgget
+304	common	msgctl			sys_old_msgctl
+305	common	shmat			sys_shmat
+306	common	shmdt			sys_shmdt
+307	common	shmget			sys_shmget
+308	common	shmctl			sys_old_shmctl
+309	common	add_key			sys_add_key
+310	common	request_key		sys_request_key
+311	common	keyctl			sys_keyctl
+312	common	semtimedop		sys_semtimedop_time32	sys_oabi_semtimedop
+313	common	vserver
+314	common	ioprio_set		sys_ioprio_set
+315	common	ioprio_get		sys_ioprio_get
+316	common	inotify_init		sys_inotify_init
+317	common	inotify_add_watch	sys_inotify_add_watch
+318	common	inotify_rm_watch	sys_inotify_rm_watch
+319	common	mbind			sys_mbind
+320	common	get_mempolicy		sys_get_mempolicy
+321	common	set_mempolicy		sys_set_mempolicy
+322	common	openat			sys_openat
+323	common	mkdirat			sys_mkdirat
+324	common	mknodat			sys_mknodat
+325	common	fchownat		sys_fchownat
+326	common	futimesat		sys_futimesat_time32
+327	common	fstatat64		sys_fstatat64		sys_oabi_fstatat64
+328	common	unlinkat		sys_unlinkat
+329	common	renameat		sys_renameat
+330	common	linkat			sys_linkat
+331	common	symlinkat		sys_symlinkat
+332	common	readlinkat		sys_readlinkat
+333	common	fchmodat		sys_fchmodat
+334	common	faccessat		sys_faccessat
+335	common	pselect6		sys_pselect6_time32
+336	common	ppoll			sys_ppoll_time32
+337	common	unshare			sys_unshare
+338	common	set_robust_list		sys_set_robust_list
+339	common	get_robust_list		sys_get_robust_list
+340	common	splice			sys_splice
+341	common	arm_sync_file_range	sys_sync_file_range2
+342	common	tee			sys_tee
+343	common	vmsplice		sys_vmsplice
+344	common	move_pages		sys_move_pages
+345	common	getcpu			sys_getcpu
+346	common	epoll_pwait		sys_epoll_pwait
+347	common	kexec_load		sys_kexec_load
+348	common	utimensat		sys_utimensat_time32
+349	common	signalfd		sys_signalfd
+350	common	timerfd_create		sys_timerfd_create
+351	common	eventfd			sys_eventfd
+352	common	fallocate		sys_fallocate
+353	common	timerfd_settime		sys_timerfd_settime32
+354	common	timerfd_gettime		sys_timerfd_gettime32
+355	common	signalfd4		sys_signalfd4
+356	common	eventfd2		sys_eventfd2
+357	common	epoll_create1		sys_epoll_create1
+358	common	dup3			sys_dup3
+359	common	pipe2			sys_pipe2
+360	common	inotify_init1		sys_inotify_init1
+361	common	preadv			sys_preadv
+362	common	pwritev			sys_pwritev
+363	common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo
+364	common	perf_event_open		sys_perf_event_open
+365	common	recvmmsg		sys_recvmmsg_time32
+366	common	accept4			sys_accept4
+367	common	fanotify_init		sys_fanotify_init
+368	common	fanotify_mark		sys_fanotify_mark
+369	common	prlimit64		sys_prlimit64
+370	common	name_to_handle_at	sys_name_to_handle_at
+371	common	open_by_handle_at	sys_open_by_handle_at
+372	common	clock_adjtime		sys_clock_adjtime32
+373	common	syncfs			sys_syncfs
+374	common	sendmmsg		sys_sendmmsg
+375	common	setns			sys_setns
+376	common	process_vm_readv	sys_process_vm_readv
+377	common	process_vm_writev	sys_process_vm_writev
+378	common	kcmp			sys_kcmp
+379	common	finit_module		sys_finit_module
+380	common	sched_setattr		sys_sched_setattr
+381	common	sched_getattr		sys_sched_getattr
+382	common	renameat2		sys_renameat2
+383	common	seccomp			sys_seccomp
+384	common	getrandom		sys_getrandom
+385	common	memfd_create		sys_memfd_create
+386	common	bpf			sys_bpf
+387	common	execveat		sys_execveat
+388	common	userfaultfd		sys_userfaultfd
+389	common	membarrier		sys_membarrier
+390	common	mlock2			sys_mlock2
+391	common	copy_file_range		sys_copy_file_range
+392	common	preadv2			sys_preadv2
+393	common	pwritev2		sys_pwritev2
+394	common	pkey_mprotect		sys_pkey_mprotect
+395	common	pkey_alloc		sys_pkey_alloc
+396	common	pkey_free		sys_pkey_free
+397	common	statx			sys_statx
+398	common	rseq			sys_rseq
+399	common	io_pgetevents		sys_io_pgetevents_time32
+400	common	migrate_pages		sys_migrate_pages
+401	common	kexec_file_load		sys_kexec_file_load
+# 402 is unused
+403	common	clock_gettime64			sys_clock_gettime
+404	common	clock_settime64			sys_clock_settime
+405	common	clock_adjtime64			sys_clock_adjtime
+406	common	clock_getres_time64		sys_clock_getres
+407	common	clock_nanosleep_time64		sys_clock_nanosleep
+408	common	timer_gettime64			sys_timer_gettime
+409	common	timer_settime64			sys_timer_settime
+410	common	timerfd_gettime64		sys_timerfd_gettime
+411	common	timerfd_settime64		sys_timerfd_settime
+412	common	utimensat_time64		sys_utimensat
+413	common	pselect6_time64			sys_pselect6
+414	common	ppoll_time64			sys_ppoll
+416	common	io_pgetevents_time64		sys_io_pgetevents
+417	common	recvmmsg_time64			sys_recvmmsg
+418	common	mq_timedsend_time64		sys_mq_timedsend
+419	common	mq_timedreceive_time64		sys_mq_timedreceive
+420	common	semtimedop_time64		sys_semtimedop
+421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait
+422	common	futex_time64			sys_futex
+423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
+428	common	open_tree			sys_open_tree
+429	common	move_mount			sys_move_mount
+430	common	fsopen				sys_fsopen
+431	common	fsconfig			sys_fsconfig
+432	common	fsmount				sys_fsmount
+433	common	fspick				sys_fspick
+434	common	pidfd_open			sys_pidfd_open
+435	common	clone3				sys_clone3
+436	common	close_range			sys_close_range
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+443	common	quotactl_fd			sys_quotactl_fd
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
+449	common	futex_waitv			sys_futex_waitv
+450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
+451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
+453	common	map_shadow_stack		sys_map_shadow_stack
+454	common	futex_wake			sys_futex_wake
+455	common	futex_wait			sys_futex_wait
+456	common	futex_requeue			sys_futex_requeue
+457	common	statmount			sys_statmount
+458	common	listmount			sys_listmount
+459	common	lsm_get_self_attr		sys_lsm_get_self_attr
+460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+461	common	lsm_list_modules		sys_lsm_list_modules
+462	common	mseal				sys_mseal
+463	common	setxattrat			sys_setxattrat
+464	common	getxattrat			sys_getxattrat
+465	common	listxattrat			sys_listxattrat
+466	common	removexattrat			sys_removexattrat
diff --git a/tools/perf/arch/arm/include/syscall_table.h b/tools/perf/arch/arm/include/syscall_table.h
new file mode 100644
index 000000000000..4c942821662d
--- /dev/null
+++ b/tools/perf/arch/arm/include/syscall_table.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/syscalls_32.h>
diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
index 91570d5d428e..087e099fb453 100644
--- a/tools/perf/arch/arm64/Makefile
+++ b/tools/perf/arch/arm64/Makefile
@@ -1,25 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 PERF_HAVE_JITDUMP := 1
 HAVE_KVM_STAT_SUPPORT := 1
-
-#
-# Syscall table generation for perf
-#
-
-out    := $(OUTPUT)arch/arm64/include/generated/asm
-header := $(out)/syscalls.c
-incpath := $(srctree)/tools
-sysdef := $(srctree)/tools/arch/arm64/include/uapi/asm/unistd.h
-sysprf := $(srctree)/tools/perf/arch/arm64/entry/syscalls/
-systbl := $(sysprf)/mksyscalltbl
-
-# Create output directory if not already present
-$(shell [ -d '$(out)' ] || mkdir -p '$(out)')
-
-$(header): $(sysdef) $(systbl)
-	$(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@
-
-clean::
-	$(call QUIET_CLEAN, arm64) $(RM) $(header)
-
-archheaders: $(header)
diff --git a/tools/perf/arch/arm64/entry/syscalls/Kbuild b/tools/perf/arch/arm64/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..84c6599b4ea6
--- /dev/null
+++ b/tools/perf/arch/arm64/entry/syscalls/Kbuild
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_32.h
+syscall-y += syscalls_64.h
diff --git a/tools/perf/arch/arm64/entry/syscalls/Makefile.syscalls b/tools/perf/arch/arm64/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..e7e78c2d1c02
--- /dev/null
+++ b/tools/perf/arch/arm64/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32 +=
+syscall_abis_64 += renameat rlimit memfd_secret
+
+syscalltbl = $(srctree)/tools/perf/arch/arm64/entry/syscalls/syscall_%.tbl
diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
deleted file mode 100755
index 27d747c92d44..000000000000
--- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# Generate system call table for perf. Derived from
-# powerpc script.
-#
-# Copyright IBM Corp. 2017
-# Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
-# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
-# Changed by: Kim Phillips <kim.phillips@arm.com>
-
-gcc=$1
-hostcc=$2
-incpath=$3
-input=$4
-
-if ! test -r $input; then
-	echo "Could not read input file" >&2
-	exit 1
-fi
-
-create_sc_table()
-{
-	local sc nr max_nr
-
-	while read sc nr; do
-		printf "%s\n" "	[$nr] = \"$sc\","
-		max_nr=$nr
-	done
-
-	echo "#define SYSCALLTBL_ARM64_MAX_ID $max_nr"
-}
-
-create_table()
-{
-	echo "#include \"$input\""
-	echo "static const char *const syscalltbl_arm64[] = {"
-	create_sc_table
-	echo "};"
-}
-
-$gcc -E -dM -x c -I $incpath/include/uapi $input \
-	|awk '$2 ~ "__NR" && $3 !~ "__NR3264_" {
-		sub("^#define __NR(3264)?_", "");
-		print | "sort -k2 -n"}' \
-	|create_table
diff --git a/tools/perf/arch/arm64/entry/syscalls/syscall_32.tbl b/tools/perf/arch/arm64/entry/syscalls/syscall_32.tbl
new file mode 100644
index 000000000000..9a37930d4e26
--- /dev/null
+++ b/tools/perf/arch/arm64/entry/syscalls/syscall_32.tbl
@@ -0,0 +1,476 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# AArch32 (compat) system call definitions.
+#
+# Copyright (C) 2001-2005 Russell King
+# Copyright (C) 2012 ARM Ltd.
+#
+# This file corresponds to arch/arm/tools/syscall.tbl
+# for the native EABI syscalls and should be kept in sync
+# Instead of the OABI syscalls, it contains pointers to
+# the compat entry points where they differ from the native
+# syscalls.
+#
+0	common	restart_syscall		sys_restart_syscall
+1	common	exit			sys_exit
+2	common	fork			sys_fork
+3	common	read			sys_read
+4	common	write			sys_write
+5	common	open			sys_open		compat_sys_open
+6	common	close			sys_close
+# 7 was sys_waitpid
+8	common	creat			sys_creat
+9	common	link			sys_link
+10	common	unlink			sys_unlink
+11	common	execve			sys_execve		compat_sys_execve
+12	common	chdir			sys_chdir
+# 13 was sys_time
+14	common	mknod			sys_mknod
+15	common	chmod			sys_chmod
+16	common	lchown			sys_lchown16
+# 17 was sys_break
+# 18 was sys_stat
+19	common	lseek			sys_lseek		compat_sys_lseek
+20	common	getpid			sys_getpid
+21	common	mount			sys_mount
+# 22 was sys_umount
+23	common	setuid			sys_setuid16
+24	common	getuid			sys_getuid16
+# 25 was sys_stime
+26	common	ptrace			sys_ptrace		compat_sys_ptrace
+# 27 was sys_alarm
+# 28 was sys_fstat
+29	common	pause			sys_pause
+# 30 was sys_utime
+# 31 was sys_stty
+# 32 was sys_gtty
+33	common	access			sys_access
+34	common	nice			sys_nice
+# 35 was sys_ftime
+36	common	sync			sys_sync
+37	common	kill			sys_kill
+38	common	rename			sys_rename
+39	common	mkdir			sys_mkdir
+40	common	rmdir			sys_rmdir
+41	common	dup			sys_dup
+42	common	pipe			sys_pipe
+43	common	times			sys_times		compat_sys_times
+# 44 was sys_prof
+45	common	brk			sys_brk
+46	common	setgid			sys_setgid16
+47	common	getgid			sys_getgid16
+# 48 was sys_signal
+49	common	geteuid			sys_geteuid16
+50	common	getegid			sys_getegid16
+51	common	acct			sys_acct
+52	common	umount2			sys_umount
+# 53 was sys_lock
+54	common	ioctl			sys_ioctl		compat_sys_ioctl
+55	common	fcntl			sys_fcntl		compat_sys_fcntl
+# 56 was sys_mpx
+57	common	setpgid			sys_setpgid
+# 58 was sys_ulimit
+# 59 was sys_olduname
+60	common	umask			sys_umask
+61	common	chroot			sys_chroot
+62	common	ustat			sys_ustat		compat_sys_ustat
+63	common	dup2			sys_dup2
+64	common	getppid			sys_getppid
+65	common	getpgrp			sys_getpgrp
+66	common	setsid			sys_setsid
+67	common	sigaction		sys_sigaction		compat_sys_sigaction
+# 68 was sys_sgetmask
+# 69 was sys_ssetmask
+70	common	setreuid		sys_setreuid16
+71	common	setregid		sys_setregid16
+72	common	sigsuspend		sys_sigsuspend
+73	common	sigpending		sys_sigpending		compat_sys_sigpending
+74	common	sethostname		sys_sethostname
+75	common	setrlimit		sys_setrlimit		compat_sys_setrlimit
+# 76 was compat_sys_getrlimit
+77	common	getrusage		sys_getrusage		compat_sys_getrusage
+78	common	gettimeofday		sys_gettimeofday	compat_sys_gettimeofday
+79	common	settimeofday		sys_settimeofday	compat_sys_settimeofday
+80	common	getgroups		sys_getgroups16
+81	common	setgroups		sys_setgroups16
+# 82 was compat_sys_select
+83	common	symlink			sys_symlink
+# 84 was sys_lstat
+85	common	readlink		sys_readlink
+86	common	uselib			sys_uselib
+87	common	swapon			sys_swapon
+88	common	reboot			sys_reboot
+# 89 was sys_readdir
+# 90 was sys_mmap
+91	common	munmap			sys_munmap
+92	common	truncate		sys_truncate		compat_sys_truncate
+93	common	ftruncate		sys_ftruncate		compat_sys_ftruncate
+94	common	fchmod			sys_fchmod
+95	common	fchown			sys_fchown16
+96	common	getpriority		sys_getpriority
+97	common	setpriority		sys_setpriority
+# 98 was sys_profil
+99	common	statfs			sys_statfs		compat_sys_statfs
+100	common	fstatfs			sys_fstatfs		compat_sys_fstatfs
+# 101 was sys_ioperm
+# 102 was sys_socketcall
+103	common	syslog			sys_syslog
+104	common	setitimer		sys_setitimer		compat_sys_setitimer
+105	common	getitimer		sys_getitimer		compat_sys_getitimer
+106	common	stat			sys_newstat		compat_sys_newstat
+107	common	lstat			sys_newlstat		compat_sys_newlstat
+108	common	fstat			sys_newfstat		compat_sys_newfstat
+# 109 was sys_uname
+# 110 was sys_iopl
+111	common	vhangup			sys_vhangup
+# 112 was sys_idle
+# 113 was sys_syscall
+114	common	wait4			sys_wait4		compat_sys_wait4
+115	common	swapoff			sys_swapoff
+116	common	sysinfo			sys_sysinfo		compat_sys_sysinfo
+# 117 was sys_ipc
+118	common	fsync			sys_fsync
+119	common	sigreturn		sys_sigreturn_wrapper	compat_sys_sigreturn
+120	common	clone			sys_clone
+121	common	setdomainname		sys_setdomainname
+122	common	uname			sys_newuname
+# 123 was sys_modify_ldt
+124	common	adjtimex		sys_adjtimex_time32
+125	common	mprotect		sys_mprotect
+126	common	sigprocmask		sys_sigprocmask		compat_sys_sigprocmask
+# 127 was sys_create_module
+128	common	init_module		sys_init_module
+129	common	delete_module		sys_delete_module
+# 130 was sys_get_kernel_syms
+131	common	quotactl		sys_quotactl
+132	common	getpgid			sys_getpgid
+133	common	fchdir			sys_fchdir
+134	common	bdflush			sys_ni_syscall
+135	common	sysfs			sys_sysfs
+136	common	personality		sys_personality
+# 137 was sys_afs_syscall
+138	common	setfsuid		sys_setfsuid16
+139	common	setfsgid		sys_setfsgid16
+140	common	_llseek			sys_llseek
+141	common	getdents		sys_getdents		compat_sys_getdents
+142	common	_newselect		sys_select		compat_sys_select
+143	common	flock			sys_flock
+144	common	msync			sys_msync
+145	common	readv			sys_readv
+146	common	writev			sys_writev
+147	common	getsid			sys_getsid
+148	common	fdatasync		sys_fdatasync
+149	common	_sysctl			sys_ni_syscall
+150	common	mlock			sys_mlock
+151	common	munlock			sys_munlock
+152	common	mlockall		sys_mlockall
+153	common	munlockall		sys_munlockall
+154	common	sched_setparam		sys_sched_setparam
+155	common	sched_getparam		sys_sched_getparam
+156	common	sched_setscheduler	sys_sched_setscheduler
+157	common	sched_getscheduler	sys_sched_getscheduler
+158	common	sched_yield		sys_sched_yield
+159	common	sched_get_priority_max	sys_sched_get_priority_max
+160	common	sched_get_priority_min	sys_sched_get_priority_min
+161	common	sched_rr_get_interval	sys_sched_rr_get_interval_time32
+162	common	nanosleep		sys_nanosleep_time32
+163	common	mremap			sys_mremap
+164	common	setresuid		sys_setresuid16
+165	common	getresuid		sys_getresuid16
+# 166 was sys_vm86
+# 167 was sys_query_module
+168	common	poll			sys_poll
+169	common	nfsservctl		sys_ni_syscall
+170	common	setresgid		sys_setresgid16
+171	common	getresgid		sys_getresgid16
+172	common	prctl			sys_prctl
+173	common	rt_sigreturn		sys_rt_sigreturn_wrapper	compat_sys_rt_sigreturn
+174	common	rt_sigaction		sys_rt_sigaction	compat_sys_rt_sigaction
+175	common	rt_sigprocmask		sys_rt_sigprocmask	compat_sys_rt_sigprocmask
+176	common	rt_sigpending		sys_rt_sigpending	compat_sys_rt_sigpending
+177	common	rt_sigtimedwait		sys_rt_sigtimedwait_time32	compat_sys_rt_sigtimedwait_time32
+178	common	rt_sigqueueinfo		sys_rt_sigqueueinfo	compat_sys_rt_sigqueueinfo
+179	common	rt_sigsuspend		sys_rt_sigsuspend	compat_sys_rt_sigsuspend
+180	common	pread64			sys_pread64		compat_sys_aarch32_pread64
+181	common	pwrite64		sys_pwrite64		compat_sys_aarch32_pwrite64
+182	common	chown			sys_chown16
+183	common	getcwd			sys_getcwd
+184	common	capget			sys_capget
+185	common	capset			sys_capset
+186	common	sigaltstack		sys_sigaltstack		compat_sys_sigaltstack
+187	common	sendfile		sys_sendfile		compat_sys_sendfile
+# 188 reserved
+# 189 reserved
+190	common	vfork			sys_vfork
+# SuS compliant getrlimit
+191	common	ugetrlimit		sys_getrlimit		compat_sys_getrlimit
+192	common	mmap2			sys_mmap2		compat_sys_aarch32_mmap2
+193	common	truncate64		sys_truncate64		compat_sys_aarch32_truncate64
+194	common	ftruncate64		sys_ftruncate64		compat_sys_aarch32_ftruncate64
+195	common	stat64			sys_stat64
+196	common	lstat64			sys_lstat64
+197	common	fstat64			sys_fstat64
+198	common	lchown32		sys_lchown
+199	common	getuid32		sys_getuid
+200	common	getgid32		sys_getgid
+201	common	geteuid32		sys_geteuid
+202	common	getegid32		sys_getegid
+203	common	setreuid32		sys_setreuid
+204	common	setregid32		sys_setregid
+205	common	getgroups32		sys_getgroups
+206	common	setgroups32		sys_setgroups
+207	common	fchown32		sys_fchown
+208	common	setresuid32		sys_setresuid
+209	common	getresuid32		sys_getresuid
+210	common	setresgid32		sys_setresgid
+211	common	getresgid32		sys_getresgid
+212	common	chown32			sys_chown
+213	common	setuid32		sys_setuid
+214	common	setgid32		sys_setgid
+215	common	setfsuid32		sys_setfsuid
+216	common	setfsgid32		sys_setfsgid
+217	common	getdents64		sys_getdents64
+218	common	pivot_root		sys_pivot_root
+219	common	mincore			sys_mincore
+220	common	madvise			sys_madvise
+221	common	fcntl64			sys_fcntl64		compat_sys_fcntl64
+# 222 for tux
+# 223 is unused
+224	common	gettid			sys_gettid
+225	common	readahead		sys_readahead		compat_sys_aarch32_readahead
+226	common	setxattr		sys_setxattr
+227	common	lsetxattr		sys_lsetxattr
+228	common	fsetxattr		sys_fsetxattr
+229	common	getxattr		sys_getxattr
+230	common	lgetxattr		sys_lgetxattr
+231	common	fgetxattr		sys_fgetxattr
+232	common	listxattr		sys_listxattr
+233	common	llistxattr		sys_llistxattr
+234	common	flistxattr		sys_flistxattr
+235	common	removexattr		sys_removexattr
+236	common	lremovexattr		sys_lremovexattr
+237	common	fremovexattr		sys_fremovexattr
+238	common	tkill			sys_tkill
+239	common	sendfile64		sys_sendfile64
+240	common	futex			sys_futex_time32
+241	common	sched_setaffinity	sys_sched_setaffinity	compat_sys_sched_setaffinity
+242	common	sched_getaffinity	sys_sched_getaffinity	compat_sys_sched_getaffinity
+243	common	io_setup		sys_io_setup		compat_sys_io_setup
+244	common	io_destroy		sys_io_destroy
+245	common	io_getevents		sys_io_getevents_time32
+246	common	io_submit		sys_io_submit		compat_sys_io_submit
+247	common	io_cancel		sys_io_cancel
+248	common	exit_group		sys_exit_group
+249	common	lookup_dcookie		sys_ni_syscall
+250	common	epoll_create		sys_epoll_create
+251	common	epoll_ctl		sys_epoll_ctl
+252	common	epoll_wait		sys_epoll_wait
+253	common	remap_file_pages	sys_remap_file_pages
+# 254 for set_thread_area
+# 255 for get_thread_area
+256	common	set_tid_address		sys_set_tid_address
+257	common	timer_create		sys_timer_create	compat_sys_timer_create
+258	common	timer_settime		sys_timer_settime32
+259	common	timer_gettime		sys_timer_gettime32
+260	common	timer_getoverrun	sys_timer_getoverrun
+261	common	timer_delete		sys_timer_delete
+262	common	clock_settime		sys_clock_settime32
+263	common	clock_gettime		sys_clock_gettime32
+264	common	clock_getres		sys_clock_getres_time32
+265	common	clock_nanosleep		sys_clock_nanosleep_time32
+266	common	statfs64		sys_statfs64_wrapper	compat_sys_aarch32_statfs64
+267	common	fstatfs64		sys_fstatfs64_wrapper	compat_sys_aarch32_fstatfs64
+268	common	tgkill			sys_tgkill
+269	common	utimes			sys_utimes_time32
+270	common	arm_fadvise64_64	sys_arm_fadvise64_64	compat_sys_aarch32_fadvise64_64
+271	common	pciconfig_iobase	sys_pciconfig_iobase
+272	common	pciconfig_read		sys_pciconfig_read
+273	common	pciconfig_write		sys_pciconfig_write
+274	common	mq_open			sys_mq_open		compat_sys_mq_open
+275	common	mq_unlink		sys_mq_unlink
+276	common	mq_timedsend		sys_mq_timedsend_time32
+277	common	mq_timedreceive		sys_mq_timedreceive_time32
+278	common	mq_notify		sys_mq_notify		compat_sys_mq_notify
+279	common	mq_getsetattr		sys_mq_getsetattr	compat_sys_mq_getsetattr
+280	common	waitid			sys_waitid		compat_sys_waitid
+281	common	socket			sys_socket
+282	common	bind			sys_bind
+283	common	connect			sys_connect
+284	common	listen			sys_listen
+285	common	accept			sys_accept
+286	common	getsockname		sys_getsockname
+287	common	getpeername		sys_getpeername
+288	common	socketpair		sys_socketpair
+289	common	send			sys_send
+290	common	sendto			sys_sendto
+291	common	recv			sys_recv		compat_sys_recv
+292	common	recvfrom		sys_recvfrom		compat_sys_recvfrom
+293	common	shutdown		sys_shutdown
+294	common	setsockopt		sys_setsockopt
+295	common	getsockopt		sys_getsockopt
+296	common	sendmsg			sys_sendmsg		compat_sys_sendmsg
+297	common	recvmsg			sys_recvmsg		compat_sys_recvmsg
+298	common	semop			sys_semop
+299	common	semget			sys_semget
+300	common	semctl			sys_old_semctl		compat_sys_old_semctl
+301	common	msgsnd			sys_msgsnd		compat_sys_msgsnd
+302	common	msgrcv			sys_msgrcv		compat_sys_msgrcv
+303	common	msgget			sys_msgget
+304	common	msgctl			sys_old_msgctl		compat_sys_old_msgctl
+305	common	shmat			sys_shmat		compat_sys_shmat
+306	common	shmdt			sys_shmdt
+307	common	shmget			sys_shmget
+308	common	shmctl			sys_old_shmctl		compat_sys_old_shmctl
+309	common	add_key			sys_add_key
+310	common	request_key		sys_request_key
+311	common	keyctl			sys_keyctl		compat_sys_keyctl
+312	common	semtimedop		sys_semtimedop_time32
+313	common	vserver			sys_ni_syscall
+314	common	ioprio_set		sys_ioprio_set
+315	common	ioprio_get		sys_ioprio_get
+316	common	inotify_init		sys_inotify_init
+317	common	inotify_add_watch	sys_inotify_add_watch
+318	common	inotify_rm_watch	sys_inotify_rm_watch
+319	common	mbind			sys_mbind
+320	common	get_mempolicy		sys_get_mempolicy
+321	common	set_mempolicy		sys_set_mempolicy
+322	common	openat			sys_openat		compat_sys_openat
+323	common	mkdirat			sys_mkdirat
+324	common	mknodat			sys_mknodat
+325	common	fchownat		sys_fchownat
+326	common	futimesat		sys_futimesat_time32
+327	common	fstatat64		sys_fstatat64
+328	common	unlinkat		sys_unlinkat
+329	common	renameat		sys_renameat
+330	common	linkat			sys_linkat
+331	common	symlinkat		sys_symlinkat
+332	common	readlinkat		sys_readlinkat
+333	common	fchmodat		sys_fchmodat
+334	common	faccessat		sys_faccessat
+335	common	pselect6		sys_pselect6_time32	compat_sys_pselect6_time32
+336	common	ppoll			sys_ppoll_time32	compat_sys_ppoll_time32
+337	common	unshare			sys_unshare
+338	common	set_robust_list		sys_set_robust_list	compat_sys_set_robust_list
+339	common	get_robust_list		sys_get_robust_list	compat_sys_get_robust_list
+340	common	splice			sys_splice
+341	common	arm_sync_file_range	sys_sync_file_range2	compat_sys_aarch32_sync_file_range2
+342	common	tee			sys_tee
+343	common	vmsplice		sys_vmsplice
+344	common	move_pages		sys_move_pages
+345	common	getcpu			sys_getcpu
+346	common	epoll_pwait		sys_epoll_pwait		compat_sys_epoll_pwait
+347	common	kexec_load		sys_kexec_load		compat_sys_kexec_load
+348	common	utimensat		sys_utimensat_time32
+349	common	signalfd		sys_signalfd		compat_sys_signalfd
+350	common	timerfd_create		sys_timerfd_create
+351	common	eventfd			sys_eventfd
+352	common	fallocate		sys_fallocate		compat_sys_aarch32_fallocate
+353	common	timerfd_settime		sys_timerfd_settime32
+354	common	timerfd_gettime		sys_timerfd_gettime32
+355	common	signalfd4		sys_signalfd4		compat_sys_signalfd4
+356	common	eventfd2		sys_eventfd2
+357	common	epoll_create1		sys_epoll_create1
+358	common	dup3			sys_dup3
+359	common	pipe2			sys_pipe2
+360	common	inotify_init1		sys_inotify_init1
+361	common	preadv			sys_preadv		compat_sys_preadv
+362	common	pwritev			sys_pwritev		compat_sys_pwritev
+363	common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo	compat_sys_rt_tgsigqueueinfo
+364	common	perf_event_open		sys_perf_event_open
+365	common	recvmmsg		sys_recvmmsg_time32	compat_sys_recvmmsg_time32
+366	common	accept4			sys_accept4
+367	common	fanotify_init		sys_fanotify_init
+368	common	fanotify_mark		sys_fanotify_mark	compat_sys_fanotify_mark
+369	common	prlimit64		sys_prlimit64
+370	common	name_to_handle_at	sys_name_to_handle_at
+371	common	open_by_handle_at	sys_open_by_handle_at	compat_sys_open_by_handle_at
+372	common	clock_adjtime		sys_clock_adjtime32
+373	common	syncfs			sys_syncfs
+374	common	sendmmsg		sys_sendmmsg		compat_sys_sendmmsg
+375	common	setns			sys_setns
+376	common	process_vm_readv	sys_process_vm_readv
+377	common	process_vm_writev	sys_process_vm_writev
+378	common	kcmp			sys_kcmp
+379	common	finit_module		sys_finit_module
+380	common	sched_setattr		sys_sched_setattr
+381	common	sched_getattr		sys_sched_getattr
+382	common	renameat2		sys_renameat2
+383	common	seccomp			sys_seccomp
+384	common	getrandom		sys_getrandom
+385	common	memfd_create		sys_memfd_create
+386	common	bpf			sys_bpf
+387	common	execveat		sys_execveat		compat_sys_execveat
+388	common	userfaultfd		sys_userfaultfd
+389	common	membarrier		sys_membarrier
+390	common	mlock2			sys_mlock2
+391	common	copy_file_range		sys_copy_file_range
+392	common	preadv2			sys_preadv2		compat_sys_preadv2
+393	common	pwritev2		sys_pwritev2		compat_sys_pwritev2
+394	common	pkey_mprotect		sys_pkey_mprotect
+395	common	pkey_alloc		sys_pkey_alloc
+396	common	pkey_free		sys_pkey_free
+397	common	statx			sys_statx
+398	common	rseq			sys_rseq
+399	common	io_pgetevents		sys_io_pgetevents_time32	compat_sys_io_pgetevents
+400	common	migrate_pages		sys_migrate_pages
+401	common	kexec_file_load		sys_kexec_file_load
+# 402 is unused
+403	common	clock_gettime64			sys_clock_gettime
+404	common	clock_settime64			sys_clock_settime
+405	common	clock_adjtime64			sys_clock_adjtime
+406	common	clock_getres_time64		sys_clock_getres
+407	common	clock_nanosleep_time64		sys_clock_nanosleep
+408	common	timer_gettime64			sys_timer_gettime
+409	common	timer_settime64			sys_timer_settime
+410	common	timerfd_gettime64		sys_timerfd_gettime
+411	common	timerfd_settime64		sys_timerfd_settime
+412	common	utimensat_time64		sys_utimensat
+413	common	pselect6_time64			sys_pselect6			compat_sys_pselect6_time64
+414	common	ppoll_time64			sys_ppoll			compat_sys_ppoll_time64
+416	common	io_pgetevents_time64		sys_io_pgetevents		compat_sys_io_pgetevents_time64
+417	common	recvmmsg_time64			sys_recvmmsg			compat_sys_recvmmsg_time64
+418	common	mq_timedsend_time64		sys_mq_timedsend
+419	common	mq_timedreceive_time64		sys_mq_timedreceive
+420	common	semtimedop_time64		sys_semtimedop
+421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
+422	common	futex_time64			sys_futex
+423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
+428	common	open_tree			sys_open_tree
+429	common	move_mount			sys_move_mount
+430	common	fsopen				sys_fsopen
+431	common	fsconfig			sys_fsconfig
+432	common	fsmount				sys_fsmount
+433	common	fspick				sys_fspick
+434	common	pidfd_open			sys_pidfd_open
+435	common	clone3				sys_clone3
+436	common	close_range			sys_close_range
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+443	common	quotactl_fd			sys_quotactl_fd
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
+449	common	futex_waitv			sys_futex_waitv
+450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
+451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
+453	common	map_shadow_stack		sys_map_shadow_stack
+454	common	futex_wake			sys_futex_wake
+455	common	futex_wait			sys_futex_wait
+456	common	futex_requeue			sys_futex_requeue
+457	common	statmount			sys_statmount
+458	common	listmount			sys_listmount
+459	common	lsm_get_self_attr		sys_lsm_get_self_attr
+460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+461	common	lsm_list_modules		sys_lsm_list_modules
+462	common	mseal				sys_mseal
diff --git a/tools/perf/arch/arm64/entry/syscalls/syscall_64.tbl b/tools/perf/arch/arm64/entry/syscalls/syscall_64.tbl
new file mode 120000
index 000000000000..4fdd58f10c15
--- /dev/null
+++ b/tools/perf/arch/arm64/entry/syscalls/syscall_64.tbl
@@ -0,0 +1 @@
+../../../../../scripts/syscall.tbl
+\ No newline at end of file
diff --git a/tools/perf/arch/arm64/include/syscall_table.h b/tools/perf/arch/arm64/include/syscall_table.h
new file mode 100644
index 000000000000..7ff51b783000
--- /dev/null
+++ b/tools/perf/arch/arm64/include/syscall_table.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/bitsperlong.h>
+
+#if __BITS_PER_LONG == 64
+#include <asm/syscalls_64.h>
+#else
+#include <asm/syscalls_32.h>
+#endif
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 22b19dcc6beb..4301181b8e45 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -274,33 +274,9 @@ static void arm_spe_setup_evsel(struct evsel *evsel, struct perf_cpu_map *cpus)
 		evsel__set_sample_bit(evsel, PHYS_ADDR);
 }
 
-static int arm_spe_recording_options(struct auxtrace_record *itr,
-				     struct evlist *evlist,
-				     struct record_opts *opts)
+static int arm_spe_setup_aux_buffer(struct record_opts *opts)
 {
-	struct arm_spe_recording *sper =
-			container_of(itr, struct arm_spe_recording, itr);
-	struct evsel *evsel, *tmp;
-	struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
 	bool privileged = perf_event_paranoid_check(-1);
-	struct evsel *tracking_evsel;
-	int err;
-
-	sper->evlist = evlist;
-
-	evlist__for_each_entry(evlist, evsel) {
-		if (evsel__is_aux_event(evsel)) {
-			if (!strstarts(evsel->pmu->name, ARM_SPE_PMU_NAME)) {
-				pr_err("Found unexpected auxtrace event: %s\n",
-				       evsel->pmu->name);
-				return -EINVAL;
-			}
-			opts->full_auxtrace = true;
-		}
-	}
-
-	if (!opts->full_auxtrace)
-		return 0;
 
 	/*
 	 * we are in snapshot mode.
@@ -330,6 +306,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
 			pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
 			return -EINVAL;
 		}
+
+		pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME,
+			  opts->auxtrace_snapshot_size);
 	}
 
 	/* We are in full trace mode but '-m,xyz' wasn't specified */
@@ -355,14 +334,15 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
 		}
 	}
 
-	if (opts->auxtrace_snapshot_mode)
-		pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME,
-			  opts->auxtrace_snapshot_size);
+	return 0;
+}
 
-	evlist__for_each_entry_safe(evlist, tmp, evsel) {
-		if (evsel__is_aux_event(evsel))
-			arm_spe_setup_evsel(evsel, cpus);
-	}
+static int arm_spe_setup_tracking_event(struct evlist *evlist,
+					struct record_opts *opts)
+{
+	int err;
+	struct evsel *tracking_evsel;
+	struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
 
 	/* Add dummy event to keep tracking */
 	err = parse_event(evlist, "dummy:u");
@@ -388,6 +368,52 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
 	return 0;
 }
 
+static int arm_spe_recording_options(struct auxtrace_record *itr,
+				     struct evlist *evlist,
+				     struct record_opts *opts)
+{
+	struct arm_spe_recording *sper =
+			container_of(itr, struct arm_spe_recording, itr);
+	struct evsel *evsel, *tmp;
+	struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;
+	bool discard = false;
+	int err;
+
+	sper->evlist = evlist;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel__is_aux_event(evsel)) {
+			if (!strstarts(evsel->pmu->name, ARM_SPE_PMU_NAME)) {
+				pr_err("Found unexpected auxtrace event: %s\n",
+				       evsel->pmu->name);
+				return -EINVAL;
+			}
+			opts->full_auxtrace = true;
+		}
+	}
+
+	if (!opts->full_auxtrace)
+		return 0;
+
+	evlist__for_each_entry_safe(evlist, tmp, evsel) {
+		if (evsel__is_aux_event(evsel)) {
+			arm_spe_setup_evsel(evsel, cpus);
+			if (evsel->core.attr.config &
+			    perf_pmu__format_bits(evsel->pmu, "discard"))
+				discard = true;
+		}
+	}
+
+	if (discard)
+		return 0;
+
+	err = arm_spe_setup_aux_buffer(opts);
+	if (err)
+		return err;
+
+	return arm_spe_setup_tracking_event(evlist, opts);
+}
+
 static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
 					 struct record_opts *opts,
 					 const char *str)
diff --git a/tools/perf/arch/csky/entry/syscalls/Kbuild b/tools/perf/arch/csky/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..11707c481a24
--- /dev/null
+++ b/tools/perf/arch/csky/entry/syscalls/Kbuild
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_32.h
diff --git a/tools/perf/arch/csky/entry/syscalls/Makefile.syscalls b/tools/perf/arch/csky/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..ea2dd10d0571
--- /dev/null
+++ b/tools/perf/arch/csky/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32 += csky time32 stat64 rlimit
diff --git a/tools/perf/arch/csky/include/syscall_table.h b/tools/perf/arch/csky/include/syscall_table.h
new file mode 100644
index 000000000000..4c942821662d
--- /dev/null
+++ b/tools/perf/arch/csky/include/syscall_table.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/syscalls_32.h>
diff --git a/tools/perf/arch/loongarch/Makefile b/tools/perf/arch/loongarch/Makefile
index 52544d59245b..087e099fb453 100644
--- a/tools/perf/arch/loongarch/Makefile
+++ b/tools/perf/arch/loongarch/Makefile
@@ -1,25 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 PERF_HAVE_JITDUMP := 1
 HAVE_KVM_STAT_SUPPORT := 1
-
-#
-# Syscall table generation for perf
-#
-
-out    := $(OUTPUT)arch/loongarch/include/generated/asm
-header := $(out)/syscalls.c
-incpath := $(srctree)/tools
-sysdef := $(srctree)/tools/arch/loongarch/include/uapi/asm/unistd.h
-sysprf := $(srctree)/tools/perf/arch/loongarch/entry/syscalls/
-systbl := $(sysprf)/mksyscalltbl
-
-# Create output directory if not already present
-$(shell [ -d '$(out)' ] || mkdir -p '$(out)')
-
-$(header): $(sysdef) $(systbl)
-	$(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@
-
-clean::
-	$(call QUIET_CLEAN, loongarch) $(RM) $(header)
-
-archheaders: $(header)
diff --git a/tools/perf/arch/loongarch/entry/syscalls/Kbuild b/tools/perf/arch/loongarch/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..9a41e3572c3a
--- /dev/null
+++ b/tools/perf/arch/loongarch/entry/syscalls/Kbuild
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_64.h
diff --git a/tools/perf/arch/loongarch/entry/syscalls/Makefile.syscalls b/tools/perf/arch/loongarch/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..47d32da2aed8
--- /dev/null
+++ b/tools/perf/arch/loongarch/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_64 +=
diff --git a/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl b/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl
deleted file mode 100755
index c10ad3580aef..000000000000
--- a/tools/perf/arch/loongarch/entry/syscalls/mksyscalltbl
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# Generate system call table for perf. Derived from
-# powerpc script.
-#
-# Author(s):  Ming Wang <wangming01@loongson.cn>
-# Author(s):  Huacai Chen <chenhuacai@loongson.cn>
-# Copyright (C) 2020-2023 Loongson Technology Corporation Limited
-
-gcc=$1
-hostcc=$2
-incpath=$3
-input=$4
-
-if ! test -r $input; then
-	echo "Could not read input file" >&2
-	exit 1
-fi
-
-create_sc_table()
-{
-	local sc nr max_nr
-
-	while read sc nr; do
-		printf "%s\n" "	[$nr] = \"$sc\","
-		max_nr=$nr
-	done
-
-	echo "#define SYSCALLTBL_LOONGARCH_MAX_ID $max_nr"
-}
-
-create_table()
-{
-	echo "#include \"$input\""
-	echo "static const char *const syscalltbl_loongarch[] = {"
-	create_sc_table
-	echo "};"
-}
-
-$gcc -E -dM -x c -I $incpath/include/uapi $input \
-	|awk '$2 ~ "__NR" && $3 !~ "__NR3264_" {
-		sub("^#define __NR(3264)?_", "");
-		print | "sort -k2 -n"}' \
-	|create_table
diff --git a/tools/perf/arch/loongarch/include/syscall_table.h b/tools/perf/arch/loongarch/include/syscall_table.h
new file mode 100644
index 000000000000..9d0646d3455c
--- /dev/null
+++ b/tools/perf/arch/loongarch/include/syscall_table.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/syscall_table_64.h>
diff --git a/tools/perf/arch/mips/Makefile b/tools/perf/arch/mips/Makefile
deleted file mode 100644
index 827168f1077a..000000000000
--- a/tools/perf/arch/mips/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Syscall table generation for perf
-out    := $(OUTPUT)arch/mips/include/generated/asm
-header := $(out)/syscalls_n64.c
-sysprf := $(srctree)/tools/perf/arch/mips/entry/syscalls
-sysdef := $(sysprf)/syscall_n64.tbl
-systbl := $(sysprf)/mksyscalltbl
-
-# Create output directory if not already present
-$(shell [ -d '$(out)' ] || mkdir -p '$(out)')
-
-$(header): $(sysdef) $(systbl)
-	$(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
-
-clean::
-	$(call QUIET_CLEAN, mips) $(RM) $(header)
-
-archheaders: $(header)
diff --git a/tools/perf/arch/mips/entry/syscalls/Kbuild b/tools/perf/arch/mips/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..9a41e3572c3a
--- /dev/null
+++ b/tools/perf/arch/mips/entry/syscalls/Kbuild
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_64.h
diff --git a/tools/perf/arch/mips/entry/syscalls/Makefile.syscalls b/tools/perf/arch/mips/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..9ee914bdfb05
--- /dev/null
+++ b/tools/perf/arch/mips/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_64 += n64
+
+syscalltbl = $(srctree)/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
diff --git a/tools/perf/arch/mips/entry/syscalls/mksyscalltbl b/tools/perf/arch/mips/entry/syscalls/mksyscalltbl
deleted file mode 100644
index c0d93f959c4e..000000000000
--- a/tools/perf/arch/mips/entry/syscalls/mksyscalltbl
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# Generate system call table for perf. Derived from
-# s390 script.
-#
-# Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
-# Changed by: Tiezhu Yang <yangtiezhu@loongson.cn>
-
-SYSCALL_TBL=$1
-
-if ! test -r $SYSCALL_TBL; then
-	echo "Could not read input file" >&2
-	exit 1
-fi
-
-create_table()
-{
-	local max_nr nr abi sc discard
-
-	echo 'static const char *const syscalltbl_mips_n64[] = {'
-	while read nr abi sc discard; do
-		printf '\t[%d] = "%s",\n' $nr $sc
-		max_nr=$nr
-	done
-	echo '};'
-	echo "#define SYSCALLTBL_MIPS_N64_MAX_ID $max_nr"
-}
-
-grep -E "^[[:digit:]]+[[:space:]]+(n64)" $SYSCALL_TBL	\
-	|sort -k1 -n					\
-	|create_table
diff --git a/tools/perf/arch/mips/include/syscall_table.h b/tools/perf/arch/mips/include/syscall_table.h
new file mode 100644
index 000000000000..b53e31c15805
--- /dev/null
+++ b/tools/perf/arch/mips/include/syscall_table.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/syscalls_64.h>
diff --git a/tools/perf/arch/parisc/entry/syscalls/Kbuild b/tools/perf/arch/parisc/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..84c6599b4ea6
--- /dev/null
+++ b/tools/perf/arch/parisc/entry/syscalls/Kbuild
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_32.h
+syscall-y += syscalls_64.h
diff --git a/tools/perf/arch/parisc/entry/syscalls/Makefile.syscalls b/tools/perf/arch/parisc/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..ae326fecb83b
--- /dev/null
+++ b/tools/perf/arch/parisc/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32  +=
+syscall_abis_64  +=
+
+syscalltbl = $(srctree)/tools/perf/arch/parisc/entry/syscalls/syscall.tbl
diff --git a/tools/perf/arch/parisc/entry/syscalls/syscall.tbl b/tools/perf/arch/parisc/entry/syscalls/syscall.tbl
new file mode 100644
index 000000000000..66dc406b12e4
--- /dev/null
+++ b/tools/perf/arch/parisc/entry/syscalls/syscall.tbl
@@ -0,0 +1,463 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for parisc
+#
+# The format is:
+# <number> <abi> <name> <entry point> <compat entry point>
+#
+# The <abi> can be common, 64, or 32 for this file.
+#
+0	common	restart_syscall		sys_restart_syscall
+1	common	exit			sys_exit
+2	common	fork			sys_fork_wrapper
+3	common	read			sys_read
+4	common	write			sys_write
+5	common	open			sys_open			compat_sys_open
+6	common	close			sys_close
+7	common	waitpid			sys_waitpid
+8	common	creat			sys_creat
+9	common	link			sys_link
+10	common	unlink			sys_unlink
+11	common	execve			sys_execve			compat_sys_execve
+12	common	chdir			sys_chdir
+13	32	time			sys_time32
+13	64	time			sys_time
+14	common	mknod			sys_mknod
+15	common	chmod			sys_chmod
+16	common	lchown			sys_lchown
+17	common	socket			sys_socket
+18	common	stat			sys_newstat			compat_sys_newstat
+19	common	lseek			sys_lseek			compat_sys_lseek
+20	common	getpid			sys_getpid
+21	common	mount			sys_mount
+22	common	bind			sys_bind
+23	common	setuid			sys_setuid
+24	common	getuid			sys_getuid
+25	32	stime			sys_stime32
+25	64	stime			sys_stime
+26	common	ptrace			sys_ptrace			compat_sys_ptrace
+27	common	alarm			sys_alarm
+28	common	fstat			sys_newfstat			compat_sys_newfstat
+29	common	pause			sys_pause
+30	32	utime			sys_utime32
+30	64	utime			sys_utime
+31	common	connect			sys_connect
+32	common	listen			sys_listen
+33	common	access			sys_access
+34	common	nice			sys_nice
+35	common	accept			sys_accept
+36	common	sync			sys_sync
+37	common	kill			sys_kill
+38	common	rename			sys_rename
+39	common	mkdir			sys_mkdir
+40	common	rmdir			sys_rmdir
+41	common	dup			sys_dup
+42	common	pipe			sys_pipe
+43	common	times			sys_times			compat_sys_times
+44	common	getsockname		sys_getsockname
+45	common	brk			sys_brk
+46	common	setgid			sys_setgid
+47	common	getgid			sys_getgid
+48	common	signal			sys_signal
+49	common	geteuid			sys_geteuid
+50	common	getegid			sys_getegid
+51	common	acct			sys_acct
+52	common	umount2			sys_umount
+53	common	getpeername		sys_getpeername
+54	common	ioctl			sys_ioctl			compat_sys_ioctl
+55	common	fcntl			sys_fcntl			compat_sys_fcntl
+56	common	socketpair		sys_socketpair
+57	common	setpgid			sys_setpgid
+58	common	send			sys_send
+59	common	uname			sys_newuname
+60	common	umask			sys_umask
+61	common	chroot			sys_chroot
+62	common	ustat			sys_ustat			compat_sys_ustat
+63	common	dup2			sys_dup2
+64	common	getppid			sys_getppid
+65	common	getpgrp			sys_getpgrp
+66	common	setsid			sys_setsid
+67	common	pivot_root		sys_pivot_root
+68	common	sgetmask		sys_sgetmask			sys32_unimplemented
+69	common	ssetmask		sys_ssetmask			sys32_unimplemented
+70	common	setreuid		sys_setreuid
+71	common	setregid		sys_setregid
+72	common	mincore			sys_mincore
+73	common	sigpending		sys_sigpending			compat_sys_sigpending
+74	common	sethostname		sys_sethostname
+75	common	setrlimit		sys_setrlimit			compat_sys_setrlimit
+76	common	getrlimit		sys_getrlimit			compat_sys_getrlimit
+77	common	getrusage		sys_getrusage			compat_sys_getrusage
+78	common	gettimeofday		sys_gettimeofday		compat_sys_gettimeofday
+79	common	settimeofday		sys_settimeofday		compat_sys_settimeofday
+80	common	getgroups		sys_getgroups
+81	common	setgroups		sys_setgroups
+82	common	sendto			sys_sendto
+83	common	symlink			sys_symlink
+84	common	lstat			sys_newlstat			compat_sys_newlstat
+85	common	readlink		sys_readlink
+86	common	uselib			sys_ni_syscall
+87	common	swapon			sys_swapon
+88	common	reboot			sys_reboot
+89	common	mmap2			sys_mmap2
+90	common	mmap			sys_mmap
+91	common	munmap			sys_munmap
+92	common	truncate		sys_truncate			compat_sys_truncate
+93	common	ftruncate		sys_ftruncate			compat_sys_ftruncate
+94	common	fchmod			sys_fchmod
+95	common	fchown			sys_fchown
+96	common	getpriority		sys_getpriority
+97	common	setpriority		sys_setpriority
+98	common	recv			sys_recv			compat_sys_recv
+99	common	statfs			sys_statfs			compat_sys_statfs
+100	common	fstatfs			sys_fstatfs			compat_sys_fstatfs
+101	common	stat64			sys_stat64
+# 102 was socketcall
+103	common	syslog			sys_syslog
+104	common	setitimer		sys_setitimer			compat_sys_setitimer
+105	common	getitimer		sys_getitimer			compat_sys_getitimer
+106	common	capget			sys_capget
+107	common	capset			sys_capset
+108	32	pread64			parisc_pread64
+108	64	pread64			sys_pread64
+109	32	pwrite64		parisc_pwrite64
+109	64	pwrite64		sys_pwrite64
+110	common	getcwd			sys_getcwd
+111	common	vhangup			sys_vhangup
+112	common	fstat64			sys_fstat64
+113	common	vfork			sys_vfork_wrapper
+114	common	wait4			sys_wait4			compat_sys_wait4
+115	common	swapoff			sys_swapoff
+116	common	sysinfo			sys_sysinfo			compat_sys_sysinfo
+117	common	shutdown		sys_shutdown
+118	common	fsync			sys_fsync
+119	common	madvise			parisc_madvise
+120	common	clone			sys_clone_wrapper
+121	common	setdomainname		sys_setdomainname
+122	common	sendfile		sys_sendfile			compat_sys_sendfile
+123	common	recvfrom		sys_recvfrom			compat_sys_recvfrom
+124	32	adjtimex		sys_adjtimex_time32
+124	64	adjtimex		sys_adjtimex
+125	common	mprotect		sys_mprotect
+126	common	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
+# 127 was create_module
+128	common	init_module		sys_init_module
+129	common	delete_module		sys_delete_module
+# 130 was get_kernel_syms
+131	common	quotactl		sys_quotactl
+132	common	getpgid			sys_getpgid
+133	common	fchdir			sys_fchdir
+134	common	bdflush			sys_ni_syscall
+135	common	sysfs			sys_sysfs
+136	32	personality		parisc_personality
+136	64	personality		sys_personality
+# 137 was afs_syscall
+138	common	setfsuid		sys_setfsuid
+139	common	setfsgid		sys_setfsgid
+140	common	_llseek			sys_llseek
+141	common	getdents		sys_getdents			compat_sys_getdents
+142	common	_newselect		sys_select			compat_sys_select
+143	common	flock			sys_flock
+144	common	msync			sys_msync
+145	common	readv			sys_readv
+146	common	writev			sys_writev
+147	common	getsid			sys_getsid
+148	common	fdatasync		sys_fdatasync
+149	common	_sysctl			sys_ni_syscall
+150	common	mlock			sys_mlock
+151	common	munlock			sys_munlock
+152	common	mlockall		sys_mlockall
+153	common	munlockall		sys_munlockall
+154	common	sched_setparam		sys_sched_setparam
+155	common	sched_getparam		sys_sched_getparam
+156	common	sched_setscheduler	sys_sched_setscheduler
+157	common	sched_getscheduler	sys_sched_getscheduler
+158	common	sched_yield		sys_sched_yield
+159	common	sched_get_priority_max	sys_sched_get_priority_max
+160	common	sched_get_priority_min	sys_sched_get_priority_min
+161	32	sched_rr_get_interval	sys_sched_rr_get_interval_time32
+161	64	sched_rr_get_interval	sys_sched_rr_get_interval
+162	32	nanosleep		sys_nanosleep_time32
+162	64	nanosleep		sys_nanosleep
+163	common	mremap			sys_mremap
+164	common	setresuid		sys_setresuid
+165	common	getresuid		sys_getresuid
+166	common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
+# 167 was query_module
+168	common	poll			sys_poll
+# 169 was nfsservctl
+170	common	setresgid		sys_setresgid
+171	common	getresgid		sys_getresgid
+172	common	prctl			sys_prctl
+173	common	rt_sigreturn		sys_rt_sigreturn_wrapper
+174	common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
+175	common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
+176	common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
+177	32	rt_sigtimedwait		sys_rt_sigtimedwait_time32	compat_sys_rt_sigtimedwait_time32
+177	64	rt_sigtimedwait		sys_rt_sigtimedwait
+178	common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
+179	common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
+180	common	chown			sys_chown
+181	common	setsockopt		sys_setsockopt			sys_setsockopt
+182	common	getsockopt		sys_getsockopt			sys_getsockopt
+183	common	sendmsg			sys_sendmsg			compat_sys_sendmsg
+184	common	recvmsg			sys_recvmsg			compat_sys_recvmsg
+185	common	semop			sys_semop
+186	common	semget			sys_semget
+187	common	semctl			sys_semctl			compat_sys_semctl
+188	common	msgsnd			sys_msgsnd			compat_sys_msgsnd
+189	common	msgrcv			sys_msgrcv			compat_sys_msgrcv
+190	common	msgget			sys_msgget
+191	common	msgctl			sys_msgctl			compat_sys_msgctl
+192	common	shmat			sys_shmat			compat_sys_shmat
+193	common	shmdt			sys_shmdt
+194	common	shmget			sys_shmget
+195	common	shmctl			sys_shmctl			compat_sys_shmctl
+# 196 was getpmsg
+# 197 was putpmsg
+198	common	lstat64			sys_lstat64
+199	32	truncate64		parisc_truncate64
+199	64	truncate64		sys_truncate64
+200	32	ftruncate64		parisc_ftruncate64
+200	64	ftruncate64		sys_ftruncate64
+201	common	getdents64		sys_getdents64
+202	common	fcntl64			sys_fcntl64			compat_sys_fcntl64
+# 203 was attrctl
+# 204 was acl_get
+# 205 was acl_set
+206	common	gettid			sys_gettid
+207	32	readahead		parisc_readahead
+207	64	readahead		sys_readahead
+208	common	tkill			sys_tkill
+209	common	sendfile64		sys_sendfile64			compat_sys_sendfile64
+210	32	futex			sys_futex_time32
+210	64	futex			sys_futex
+211	common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
+212	common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
+# 213 was set_thread_area
+# 214 was get_thread_area
+215	common	io_setup		sys_io_setup			compat_sys_io_setup
+216	common	io_destroy		sys_io_destroy
+217	32	io_getevents		sys_io_getevents_time32
+217	64	io_getevents		sys_io_getevents
+218	common	io_submit		sys_io_submit			compat_sys_io_submit
+219	common	io_cancel		sys_io_cancel
+# 220 was alloc_hugepages
+# 221 was free_hugepages
+222	common	exit_group		sys_exit_group
+223	common	lookup_dcookie		sys_ni_syscall
+224	common	epoll_create		sys_epoll_create
+225	common	epoll_ctl		sys_epoll_ctl
+226	common	epoll_wait		sys_epoll_wait
+227	common	remap_file_pages	sys_remap_file_pages
+228	32	semtimedop		sys_semtimedop_time32
+228	64	semtimedop		sys_semtimedop
+229	common	mq_open			sys_mq_open			compat_sys_mq_open
+230	common	mq_unlink		sys_mq_unlink
+231	32	mq_timedsend		sys_mq_timedsend_time32
+231	64	mq_timedsend		sys_mq_timedsend
+232	32	mq_timedreceive		sys_mq_timedreceive_time32
+232	64	mq_timedreceive		sys_mq_timedreceive
+233	common	mq_notify		sys_mq_notify			compat_sys_mq_notify
+234	common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
+235	common	waitid			sys_waitid			compat_sys_waitid
+236	32	fadvise64_64		parisc_fadvise64_64
+236	64	fadvise64_64		sys_fadvise64_64
+237	common	set_tid_address		sys_set_tid_address
+238	common	setxattr		sys_setxattr
+239	common	lsetxattr		sys_lsetxattr
+240	common	fsetxattr		sys_fsetxattr
+241	common	getxattr		sys_getxattr
+242	common	lgetxattr		sys_lgetxattr
+243	common	fgetxattr		sys_fgetxattr
+244	common	listxattr		sys_listxattr
+245	common	llistxattr		sys_llistxattr
+246	common	flistxattr		sys_flistxattr
+247	common	removexattr		sys_removexattr
+248	common	lremovexattr		sys_lremovexattr
+249	common	fremovexattr		sys_fremovexattr
+250	common	timer_create		sys_timer_create		compat_sys_timer_create
+251	32	timer_settime		sys_timer_settime32
+251	64	timer_settime		sys_timer_settime
+252	32	timer_gettime		sys_timer_gettime32
+252	64	timer_gettime		sys_timer_gettime
+253	common	timer_getoverrun	sys_timer_getoverrun
+254	common	timer_delete		sys_timer_delete
+255	32	clock_settime		sys_clock_settime32
+255	64	clock_settime		sys_clock_settime
+256	32	clock_gettime		sys_clock_gettime32
+256	64	clock_gettime		sys_clock_gettime
+257	32	clock_getres		sys_clock_getres_time32
+257	64	clock_getres		sys_clock_getres
+258	32	clock_nanosleep		sys_clock_nanosleep_time32
+258	64	clock_nanosleep		sys_clock_nanosleep
+259	common	tgkill			sys_tgkill
+260	common	mbind			sys_mbind
+261	common	get_mempolicy		sys_get_mempolicy
+262	common	set_mempolicy		sys_set_mempolicy
+# 263 was vserver
+264	common	add_key			sys_add_key
+265	common	request_key		sys_request_key
+266	common	keyctl			sys_keyctl			compat_sys_keyctl
+267	common	ioprio_set		sys_ioprio_set
+268	common	ioprio_get		sys_ioprio_get
+269	common	inotify_init		sys_inotify_init
+270	common	inotify_add_watch	sys_inotify_add_watch
+271	common	inotify_rm_watch	sys_inotify_rm_watch
+272	common	migrate_pages		sys_migrate_pages
+273	32	pselect6		sys_pselect6_time32		compat_sys_pselect6_time32
+273	64	pselect6		sys_pselect6
+274	32	ppoll			sys_ppoll_time32		compat_sys_ppoll_time32
+274	64	ppoll			sys_ppoll
+275	common	openat			sys_openat			compat_sys_openat
+276	common	mkdirat			sys_mkdirat
+277	common	mknodat			sys_mknodat
+278	common	fchownat		sys_fchownat
+279	32	futimesat		sys_futimesat_time32
+279	64	futimesat		sys_futimesat
+280	common	fstatat64		sys_fstatat64
+281	common	unlinkat		sys_unlinkat
+282	common	renameat		sys_renameat
+283	common	linkat			sys_linkat
+284	common	symlinkat		sys_symlinkat
+285	common	readlinkat		sys_readlinkat
+286	common	fchmodat		sys_fchmodat
+287	common	faccessat		sys_faccessat
+288	common	unshare			sys_unshare
+289	common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
+290	common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
+291	common	splice			sys_splice
+292	32	sync_file_range		parisc_sync_file_range
+292	64	sync_file_range		sys_sync_file_range
+293	common	tee			sys_tee
+294	common	vmsplice		sys_vmsplice
+295	common	move_pages		sys_move_pages
+296	common	getcpu			sys_getcpu
+297	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
+298	common	statfs64		sys_statfs64			compat_sys_statfs64
+299	common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
+300	common	kexec_load		sys_kexec_load			compat_sys_kexec_load
+301	32	utimensat		sys_utimensat_time32
+301	64	utimensat		sys_utimensat
+302	common	signalfd		sys_signalfd			compat_sys_signalfd
+# 303 was timerfd
+304	common	eventfd			sys_eventfd
+305	32	fallocate		parisc_fallocate
+305	64	fallocate		sys_fallocate
+306	common	timerfd_create		parisc_timerfd_create
+307	32	timerfd_settime		sys_timerfd_settime32
+307	64	timerfd_settime		sys_timerfd_settime
+308	32	timerfd_gettime		sys_timerfd_gettime32
+308	64	timerfd_gettime		sys_timerfd_gettime
+309	common	signalfd4		parisc_signalfd4		parisc_compat_signalfd4
+310	common	eventfd2		parisc_eventfd2
+311	common	epoll_create1		sys_epoll_create1
+312	common	dup3			sys_dup3
+313	common	pipe2			parisc_pipe2
+314	common	inotify_init1		parisc_inotify_init1
+315	common	preadv	sys_preadv	compat_sys_preadv
+316	common	pwritev	sys_pwritev	compat_sys_pwritev
+317	common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
+318	common	perf_event_open		sys_perf_event_open
+319	32	recvmmsg		sys_recvmmsg_time32		compat_sys_recvmmsg_time32
+319	64	recvmmsg		sys_recvmmsg
+320	common	accept4			sys_accept4
+321	common	prlimit64		sys_prlimit64
+322	common	fanotify_init		sys_fanotify_init
+323	common	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
+324	32	clock_adjtime		sys_clock_adjtime32
+324	64	clock_adjtime		sys_clock_adjtime
+325	common	name_to_handle_at	sys_name_to_handle_at
+326	common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
+327	common	syncfs			sys_syncfs
+328	common	setns			sys_setns
+329	common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
+330	common	process_vm_readv	sys_process_vm_readv
+331	common	process_vm_writev	sys_process_vm_writev
+332	common	kcmp			sys_kcmp
+333	common	finit_module		sys_finit_module
+334	common	sched_setattr		sys_sched_setattr
+335	common	sched_getattr		sys_sched_getattr
+336	32	utimes			sys_utimes_time32
+336	64	utimes			sys_utimes
+337	common	renameat2		sys_renameat2
+338	common	seccomp			sys_seccomp
+339	common	getrandom		sys_getrandom
+340	common	memfd_create		sys_memfd_create
+341	common	bpf			sys_bpf
+342	common	execveat		sys_execveat			compat_sys_execveat
+343	common	membarrier		sys_membarrier
+344	common	userfaultfd		parisc_userfaultfd
+345	common	mlock2			sys_mlock2
+346	common	copy_file_range		sys_copy_file_range
+347	common	preadv2			sys_preadv2			compat_sys_preadv2
+348	common	pwritev2		sys_pwritev2			compat_sys_pwritev2
+349	common	statx			sys_statx
+350	32	io_pgetevents		sys_io_pgetevents_time32	compat_sys_io_pgetevents
+350	64	io_pgetevents		sys_io_pgetevents
+351	common	pkey_mprotect		sys_pkey_mprotect
+352	common	pkey_alloc		sys_pkey_alloc
+353	common	pkey_free		sys_pkey_free
+354	common	rseq			sys_rseq
+355	common	kexec_file_load		sys_kexec_file_load		sys_kexec_file_load
+356	common	cacheflush		sys_cacheflush
+# up to 402 is unassigned and reserved for arch specific syscalls
+403	32	clock_gettime64			sys_clock_gettime		sys_clock_gettime
+404	32	clock_settime64			sys_clock_settime		sys_clock_settime
+405	32	clock_adjtime64			sys_clock_adjtime		sys_clock_adjtime
+406	32	clock_getres_time64		sys_clock_getres		sys_clock_getres
+407	32	clock_nanosleep_time64		sys_clock_nanosleep		sys_clock_nanosleep
+408	32	timer_gettime64			sys_timer_gettime		sys_timer_gettime
+409	32	timer_settime64			sys_timer_settime		sys_timer_settime
+410	32	timerfd_gettime64		sys_timerfd_gettime		sys_timerfd_gettime
+411	32	timerfd_settime64		sys_timerfd_settime		sys_timerfd_settime
+412	32	utimensat_time64		sys_utimensat			sys_utimensat
+413	32	pselect6_time64			sys_pselect6			compat_sys_pselect6_time64
+414	32	ppoll_time64			sys_ppoll			compat_sys_ppoll_time64
+416	32	io_pgetevents_time64		sys_io_pgetevents		compat_sys_io_pgetevents_time64
+417	32	recvmmsg_time64			sys_recvmmsg			compat_sys_recvmmsg_time64
+418	32	mq_timedsend_time64		sys_mq_timedsend		sys_mq_timedsend
+419	32	mq_timedreceive_time64		sys_mq_timedreceive		sys_mq_timedreceive
+420	32	semtimedop_time64		sys_semtimedop			sys_semtimedop
+421	32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
+422	32	futex_time64			sys_futex			sys_futex
+423	32	sched_rr_get_interval_time64	sys_sched_rr_get_interval	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
+428	common	open_tree			sys_open_tree
+429	common	move_mount			sys_move_mount
+430	common	fsopen				sys_fsopen
+431	common	fsconfig			sys_fsconfig
+432	common	fsmount				sys_fsmount
+433	common	fspick				sys_fspick
+434	common	pidfd_open			sys_pidfd_open
+435	common	clone3				sys_clone3_wrapper
+436	common	close_range			sys_close_range
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+443	common	quotactl_fd			sys_quotactl_fd
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
+449	common	futex_waitv			sys_futex_waitv
+450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
+451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
+453	common	map_shadow_stack		sys_map_shadow_stack
+454	common	futex_wake			sys_futex_wake
+455	common	futex_wait			sys_futex_wait
+456	common	futex_requeue			sys_futex_requeue
+457	common	statmount			sys_statmount
+458	common	listmount			sys_listmount
+459	common	lsm_get_self_attr		sys_lsm_get_self_attr
+460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+461	common	lsm_list_modules		sys_lsm_list_modules
+462	common	mseal				sys_mseal
diff --git a/tools/perf/arch/parisc/include/syscall_table.h b/tools/perf/arch/parisc/include/syscall_table.h
new file mode 100644
index 000000000000..7ff51b783000
--- /dev/null
+++ b/tools/perf/arch/parisc/include/syscall_table.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/bitsperlong.h>
+
+#if __BITS_PER_LONG == 64
+#include <asm/syscalls_64.h>
+#else
+#include <asm/syscalls_32.h>
+#endif
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index dc8f4fb8e324..a295a80ea078 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -1,28 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_JITDUMP := 1
-
-#
-# Syscall table generation for perf
-#
-
-out    := $(OUTPUT)arch/powerpc/include/generated/asm
-header32 := $(out)/syscalls_32.c
-header64 := $(out)/syscalls_64.c
-sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls
-sysdef := $(sysprf)/syscall.tbl
-systbl := $(sysprf)/mksyscalltbl
-
-# Create output directory if not already present
-$(shell [ -d '$(out)' ] || mkdir -p '$(out)')
-
-$(header64): $(sysdef) $(systbl)
-	$(Q)$(SHELL) '$(systbl)' '64' $(sysdef) > $@
-
-$(header32): $(sysdef) $(systbl)
-	$(Q)$(SHELL) '$(systbl)' '32' $(sysdef) > $@
-
-clean::
-	$(call QUIET_CLEAN, powerpc) $(RM) $(header32) $(header64)
-
-archheaders: $(header32) $(header64)
diff --git a/tools/perf/arch/powerpc/entry/syscalls/Kbuild b/tools/perf/arch/powerpc/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..84c6599b4ea6
--- /dev/null
+++ b/tools/perf/arch/powerpc/entry/syscalls/Kbuild
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_32.h
+syscall-y += syscalls_64.h
diff --git a/tools/perf/arch/powerpc/entry/syscalls/Makefile.syscalls b/tools/perf/arch/powerpc/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..e35afbc57c79
--- /dev/null
+++ b/tools/perf/arch/powerpc/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32 += nospu
+syscall_abis_64 += nospu
+
+syscalltbl = $(srctree)/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
diff --git a/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
deleted file mode 100755
index 0eb316fe6dd1..000000000000
--- a/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# Generate system call table for perf. Derived from
-# s390 script.
-#
-# Copyright IBM Corp. 2017
-# Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
-# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
-
-wordsize=$1
-SYSCALL_TBL=$2
-
-if ! test -r $SYSCALL_TBL; then
-	echo "Could not read input file" >&2
-	exit 1
-fi
-
-create_table()
-{
-	local wordsize=$1
-	local max_nr nr abi sc discard
-	max_nr=-1
-	nr=0
-
-	echo "static const char *const syscalltbl_powerpc_${wordsize}[] = {"
-	while read nr abi sc discard; do
-		if [ "$max_nr" -lt "$nr" ]; then
-			printf '\t[%d] = "%s",\n' $nr $sc
-			max_nr=$nr
-		fi
-	done
-	echo '};'
-	echo "#define SYSCALLTBL_POWERPC_${wordsize}_MAX_ID $max_nr"
-}
-
-grep -E "^[[:digit:]]+[[:space:]]+(common|spu|nospu|${wordsize})" $SYSCALL_TBL \
-	|sort -k1 -n                                                           \
-	|create_table ${wordsize}
diff --git a/tools/perf/arch/powerpc/include/syscall_table.h b/tools/perf/arch/powerpc/include/syscall_table.h
new file mode 100644
index 000000000000..7ff51b783000
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/syscall_table.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/bitsperlong.h>
+
+#if __BITS_PER_LONG == 64
+#include <asm/syscalls_64.h>
+#else
+#include <asm/syscalls_32.h>
+#endif
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
index e8e6e6fc6f17..bd36cfd420a2 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -16,6 +16,7 @@
 
 #define PVR_POWER9		0x004E
 #define PVR_POWER10		0x0080
+#define PVR_POWER11		0x0082
 
 static const struct sample_reg sample_reg_masks[] = {
 	SMPL_REG(r0, PERF_REG_POWERPC_R0),
@@ -207,7 +208,7 @@ uint64_t arch__intr_reg_mask(void)
 	version = (((mfspr(SPRN_PVR)) >>  16) & 0xFFFF);
 	if (version == PVR_POWER9)
 		extended_mask = PERF_REG_PMU_MASK_300;
-	else if (version == PVR_POWER10)
+	else if ((version == PVR_POWER10) || (version == PVR_POWER11))
 		extended_mask = PERF_REG_PMU_MASK_31;
 	else
 		return mask;
diff --git a/tools/perf/arch/riscv/Makefile b/tools/perf/arch/riscv/Makefile
index 18ad078000e2..087e099fb453 100644
--- a/tools/perf/arch/riscv/Makefile
+++ b/tools/perf/arch/riscv/Makefile
@@ -1,25 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 PERF_HAVE_JITDUMP := 1
 HAVE_KVM_STAT_SUPPORT := 1
-
-#
-# Syscall table generation for perf
-#
-
-out    := $(OUTPUT)arch/riscv/include/generated/asm
-header := $(out)/syscalls.c
-incpath := $(srctree)/tools
-sysdef := $(srctree)/tools/arch/riscv/include/uapi/asm/unistd.h
-sysprf := $(srctree)/tools/perf/arch/riscv/entry/syscalls/
-systbl := $(sysprf)/mksyscalltbl
-
-# Create output directory if not already present
-$(shell [ -d '$(out)' ] || mkdir -p '$(out)')
-
-$(header): $(sysdef) $(systbl)
-	$(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@
-
-clean::
-	$(call QUIET_CLEAN, riscv) $(RM) $(header)
-
-archheaders: $(header)
diff --git a/tools/perf/arch/riscv/entry/syscalls/Kbuild b/tools/perf/arch/riscv/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..9a41e3572c3a
--- /dev/null
+++ b/tools/perf/arch/riscv/entry/syscalls/Kbuild
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_64.h
diff --git a/tools/perf/arch/riscv/entry/syscalls/Makefile.syscalls b/tools/perf/arch/riscv/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..9668fd1faf60
--- /dev/null
+++ b/tools/perf/arch/riscv/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32 += riscv memfd_secret
+syscall_abis_64 += riscv rlimit memfd_secret
diff --git a/tools/perf/arch/riscv/entry/syscalls/mksyscalltbl b/tools/perf/arch/riscv/entry/syscalls/mksyscalltbl
deleted file mode 100755
index c59f5e852b97..000000000000
--- a/tools/perf/arch/riscv/entry/syscalls/mksyscalltbl
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# Generate system call table for perf. Derived from
-# powerpc script.
-#
-# Copyright IBM Corp. 2017
-# Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
-# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
-# Changed by: Kim Phillips <kim.phillips@arm.com>
-# Changed by: Björn Töpel <bjorn@rivosinc.com>
-
-gcc=$1
-hostcc=$2
-incpath=$3
-input=$4
-
-if ! test -r $input; then
-	echo "Could not read input file" >&2
-	exit 1
-fi
-
-create_sc_table()
-{
-	local sc nr max_nr
-
-	while read sc nr; do
-		printf "%s\n" "	[$nr] = \"$sc\","
-		max_nr=$nr
-	done
-
-	echo "#define SYSCALLTBL_RISCV_MAX_ID $max_nr"
-}
-
-create_table()
-{
-	echo "#include \"$input\""
-	echo "static const char *const syscalltbl_riscv[] = {"
-	create_sc_table
-	echo "};"
-}
-
-$gcc -E -dM -x c -I $incpath/include/uapi $input \
-	|awk '$2 ~ "__NR" && $3 !~ "__NR3264_" {
-		sub("^#define __NR(3264)?_", "");
-		print | "sort -k2 -n"}' \
-	|create_table
diff --git a/tools/perf/arch/riscv/include/syscall_table.h b/tools/perf/arch/riscv/include/syscall_table.h
new file mode 100644
index 000000000000..7ff51b783000
--- /dev/null
+++ b/tools/perf/arch/riscv/include/syscall_table.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/bitsperlong.h>
+
+#if __BITS_PER_LONG == 64
+#include <asm/syscalls_64.h>
+#else
+#include <asm/syscalls_32.h>
+#endif
diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile
index c431c21b11ef..0033698a65ce 100644
--- a/tools/perf/arch/s390/Makefile
+++ b/tools/perf/arch/s390/Makefile
@@ -1,24 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_JITDUMP := 1
-
-#
-# Syscall table generation for perf
-#
-
-out    := $(OUTPUT)arch/s390/include/generated/asm
-header := $(out)/syscalls_64.c
-sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls
-sysdef := $(sysprf)/syscall.tbl
-systbl := $(sysprf)/mksyscalltbl
-
-# Create output directory if not already present
-$(shell [ -d '$(out)' ] || mkdir -p '$(out)')
-
-$(header): $(sysdef) $(systbl)
-	$(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
-
-clean::
-	$(call QUIET_CLEAN, s390) $(RM) $(header)
-
-archheaders: $(header)
diff --git a/tools/perf/arch/s390/entry/syscalls/Kbuild b/tools/perf/arch/s390/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..9a41e3572c3a
--- /dev/null
+++ b/tools/perf/arch/s390/entry/syscalls/Kbuild
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_64.h
diff --git a/tools/perf/arch/s390/entry/syscalls/Makefile.syscalls b/tools/perf/arch/s390/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..9762d7abf17c
--- /dev/null
+++ b/tools/perf/arch/s390/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_64 += renameat rlimit memfd_secret
+
+syscalltbl = $(srctree)/tools/perf/arch/s390/entry/syscalls/syscall.tbl
diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
deleted file mode 100755
index 52eb88a77c94..000000000000
--- a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# Generate system call table for perf
-#
-# Copyright IBM Corp. 2017, 2018
-# Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
-#
-
-SYSCALL_TBL=$1
-
-if ! test -r $SYSCALL_TBL; then
-	echo "Could not read input file" >&2
-	exit 1
-fi
-
-create_table()
-{
-	local max_nr nr abi sc discard
-
-	echo 'static const char *const syscalltbl_s390_64[] = {'
-	while read nr abi sc discard; do
-		printf '\t[%d] = "%s",\n' $nr $sc
-		max_nr=$nr
-	done
-	echo '};'
-	echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr"
-}
-
-grep -E "^[[:digit:]]+[[:space:]]+(common|64)" $SYSCALL_TBL	\
-	|sort -k1 -n					\
-	|create_table
diff --git a/tools/perf/arch/s390/include/syscall_table.h b/tools/perf/arch/s390/include/syscall_table.h
new file mode 100644
index 000000000000..b53e31c15805
--- /dev/null
+++ b/tools/perf/arch/s390/include/syscall_table.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/syscalls_64.h>
diff --git a/tools/perf/arch/sh/entry/syscalls/Kbuild b/tools/perf/arch/sh/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..11707c481a24
--- /dev/null
+++ b/tools/perf/arch/sh/entry/syscalls/Kbuild
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_32.h
diff --git a/tools/perf/arch/sh/entry/syscalls/Makefile.syscalls b/tools/perf/arch/sh/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..25080390e4ed
--- /dev/null
+++ b/tools/perf/arch/sh/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32 +=
+syscalltbl = $(srctree)/tools/perf/arch/sh/entry/syscalls/syscall.tbl
diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl
new file mode 100644
index 000000000000..c8cad33bf250
--- /dev/null
+++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl
@@ -0,0 +1,472 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for sh
+#
+# The format is:
+# <number> <abi> <name> <entry point>
+#
+# The <abi> is always "common" for this file
+#
+0	common	restart_syscall			sys_restart_syscall
+1	common	exit				sys_exit
+2	common	fork				sys_fork
+3	common	read				sys_read
+4	common	write				sys_write
+5	common	open				sys_open
+6	common	close				sys_close
+7	common	waitpid				sys_waitpid
+8	common	creat				sys_creat
+9	common	link				sys_link
+10	common	unlink				sys_unlink
+11	common	execve				sys_execve
+12	common	chdir				sys_chdir
+13	common	time				sys_time32
+14	common	mknod				sys_mknod
+15	common	chmod				sys_chmod
+16	common	lchown				sys_lchown16
+# 17 was break
+18	common	oldstat				sys_stat
+19	common	lseek				sys_lseek
+20	common	getpid				sys_getpid
+21	common	mount				sys_mount
+22	common	umount				sys_oldumount
+23	common	setuid				sys_setuid16
+24	common	getuid				sys_getuid16
+25	common	stime				sys_stime32
+26	common	ptrace				sys_ptrace
+27	common	alarm				sys_alarm
+28	common	oldfstat			sys_fstat
+29	common	pause				sys_pause
+30	common	utime				sys_utime32
+# 31 was stty
+# 32 was gtty
+33	common	access				sys_access
+34	common	nice				sys_nice
+# 35 was ftime
+36	common	sync				sys_sync
+37	common	kill				sys_kill
+38	common	rename				sys_rename
+39	common	mkdir				sys_mkdir
+40	common	rmdir				sys_rmdir
+41	common	dup				sys_dup
+42	common	pipe				sys_sh_pipe
+43	common	times				sys_times
+# 44 was prof
+45	common	brk				sys_brk
+46	common	setgid				sys_setgid16
+47	common	getgid				sys_getgid16
+48	common	signal				sys_signal
+49	common	geteuid				sys_geteuid16
+50	common	getegid				sys_getegid16
+51	common	acct				sys_acct
+52	common	umount2				sys_umount
+# 53 was lock
+54	common	ioctl				sys_ioctl
+55	common	fcntl				sys_fcntl
+# 56 was mpx
+57	common	setpgid				sys_setpgid
+# 58 was ulimit
+# 59 was olduname
+60	common	umask				sys_umask
+61	common	chroot				sys_chroot
+62	common	ustat				sys_ustat
+63	common	dup2				sys_dup2
+64	common	getppid				sys_getppid
+65	common	getpgrp				sys_getpgrp
+66	common	setsid				sys_setsid
+67	common	sigaction			sys_sigaction
+68	common	sgetmask			sys_sgetmask
+69	common	ssetmask			sys_ssetmask
+70	common	setreuid			sys_setreuid16
+71	common	setregid			sys_setregid16
+72	common	sigsuspend			sys_sigsuspend
+73	common	sigpending			sys_sigpending
+74	common	sethostname			sys_sethostname
+75	common	setrlimit			sys_setrlimit
+76	common	getrlimit			sys_old_getrlimit
+77	common	getrusage			sys_getrusage
+78	common	gettimeofday			sys_gettimeofday
+79	common	settimeofday			sys_settimeofday
+80	common	getgroups			sys_getgroups16
+81	common	setgroups			sys_setgroups16
+# 82 was select
+83	common	symlink				sys_symlink
+84	common	oldlstat			sys_lstat
+85	common	readlink			sys_readlink
+86	common	uselib				sys_uselib
+87	common	swapon				sys_swapon
+88	common	reboot				sys_reboot
+89	common	readdir				sys_old_readdir
+90	common	mmap				old_mmap
+91	common	munmap				sys_munmap
+92	common	truncate			sys_truncate
+93	common	ftruncate			sys_ftruncate
+94	common	fchmod				sys_fchmod
+95	common	fchown				sys_fchown16
+96	common	getpriority			sys_getpriority
+97	common	setpriority			sys_setpriority
+# 98 was profil
+99	common	statfs				sys_statfs
+100	common	fstatfs				sys_fstatfs
+# 101 was ioperm
+102	common	socketcall			sys_socketcall
+103	common	syslog				sys_syslog
+104	common	setitimer			sys_setitimer
+105	common	getitimer			sys_getitimer
+106	common	stat				sys_newstat
+107	common	lstat				sys_newlstat
+108	common	fstat				sys_newfstat
+109	common	olduname			sys_uname
+# 110 was iopl
+111	common	vhangup				sys_vhangup
+# 112 was idle
+# 113 was vm86old
+114	common	wait4				sys_wait4
+115	common	swapoff				sys_swapoff
+116	common	sysinfo				sys_sysinfo
+117	common	ipc				sys_ipc
+118	common	fsync				sys_fsync
+119	common	sigreturn			sys_sigreturn
+120	common	clone				sys_clone
+121	common	setdomainname			sys_setdomainname
+122	common	uname				sys_newuname
+123	common	cacheflush			sys_cacheflush
+124	common	adjtimex			sys_adjtimex_time32
+125	common	mprotect			sys_mprotect
+126	common	sigprocmask			sys_sigprocmask
+# 127 was create_module
+128	common	init_module			sys_init_module
+129	common	delete_module			sys_delete_module
+# 130 was get_kernel_syms
+131	common	quotactl			sys_quotactl
+132	common	getpgid				sys_getpgid
+133	common	fchdir				sys_fchdir
+134	common	bdflush				sys_ni_syscall
+135	common	sysfs				sys_sysfs
+136	common	personality			sys_personality
+# 137 was afs_syscall
+138	common	setfsuid			sys_setfsuid16
+139	common	setfsgid			sys_setfsgid16
+140	common	_llseek				sys_llseek
+141	common	getdents			sys_getdents
+142	common	_newselect			sys_select
+143	common	flock				sys_flock
+144	common	msync				sys_msync
+145	common	readv				sys_readv
+146	common	writev				sys_writev
+147	common	getsid				sys_getsid
+148	common	fdatasync			sys_fdatasync
+149	common	_sysctl				sys_ni_syscall
+150	common	mlock				sys_mlock
+151	common	munlock				sys_munlock
+152	common	mlockall			sys_mlockall
+153	common	munlockall			sys_munlockall
+154	common	sched_setparam			sys_sched_setparam
+155	common	sched_getparam			sys_sched_getparam
+156	common	sched_setscheduler		sys_sched_setscheduler
+157	common	sched_getscheduler		sys_sched_getscheduler
+158	common	sched_yield			sys_sched_yield
+159	common	sched_get_priority_max		sys_sched_get_priority_max
+160	common	sched_get_priority_min		sys_sched_get_priority_min
+161	common	sched_rr_get_interval		sys_sched_rr_get_interval_time32
+162	common	nanosleep			sys_nanosleep_time32
+163	common	mremap				sys_mremap
+164	common	setresuid			sys_setresuid16
+165	common	getresuid			sys_getresuid16
+# 166 was vm86
+# 167 was query_module
+168	common	poll				sys_poll
+169	common	nfsservctl			sys_ni_syscall
+170	common	setresgid			sys_setresgid16
+171	common	getresgid			sys_getresgid16
+172	common	prctl				sys_prctl
+173	common	rt_sigreturn			sys_rt_sigreturn
+174	common	rt_sigaction			sys_rt_sigaction
+175	common	rt_sigprocmask			sys_rt_sigprocmask
+176	common	rt_sigpending			sys_rt_sigpending
+177	common	rt_sigtimedwait			sys_rt_sigtimedwait_time32
+178	common	rt_sigqueueinfo			sys_rt_sigqueueinfo
+179	common	rt_sigsuspend			sys_rt_sigsuspend
+180	common	pread64				sys_pread_wrapper
+181	common	pwrite64			sys_pwrite_wrapper
+182	common	chown				sys_chown16
+183	common	getcwd				sys_getcwd
+184	common	capget				sys_capget
+185	common	capset				sys_capset
+186	common	sigaltstack			sys_sigaltstack
+187	common	sendfile			sys_sendfile
+# 188 is reserved for getpmsg
+# 189 is reserved for putpmsg
+190	common	vfork				sys_vfork
+191	common	ugetrlimit			sys_getrlimit
+192	common	mmap2				sys_mmap2
+193	common	truncate64			sys_truncate64
+194	common	ftruncate64			sys_ftruncate64
+195	common	stat64				sys_stat64
+196	common	lstat64				sys_lstat64
+197	common	fstat64				sys_fstat64
+198	common	lchown32			sys_lchown
+199	common	getuid32			sys_getuid
+200	common	getgid32			sys_getgid
+201	common	geteuid32			sys_geteuid
+202	common	getegid32			sys_getegid
+203	common	setreuid32			sys_setreuid
+204	common	setregid32			sys_setregid
+205	common	getgroups32			sys_getgroups
+206	common	setgroups32			sys_setgroups
+207	common	fchown32			sys_fchown
+208	common	setresuid32			sys_setresuid
+209	common	getresuid32			sys_getresuid
+210	common	setresgid32			sys_setresgid
+211	common	getresgid32			sys_getresgid
+212	common	chown32				sys_chown
+213	common	setuid32			sys_setuid
+214	common	setgid32			sys_setgid
+215	common	setfsuid32			sys_setfsuid
+216	common	setfsgid32			sys_setfsgid
+217	common	pivot_root			sys_pivot_root
+218	common	mincore				sys_mincore
+219	common	madvise				sys_madvise
+220	common	getdents64			sys_getdents64
+221	common	fcntl64				sys_fcntl64
+# 222 is reserved for tux
+# 223 is unused
+224	common	gettid				sys_gettid
+225	common	readahead			sys_readahead
+226	common	setxattr			sys_setxattr
+227	common	lsetxattr			sys_lsetxattr
+228	common	fsetxattr			sys_fsetxattr
+229	common	getxattr			sys_getxattr
+230	common	lgetxattr			sys_lgetxattr
+231	common	fgetxattr			sys_fgetxattr
+232	common	listxattr			sys_listxattr
+233	common	llistxattr			sys_llistxattr
+234	common	flistxattr			sys_flistxattr
+235	common	removexattr			sys_removexattr
+236	common	lremovexattr			sys_lremovexattr
+237	common	fremovexattr			sys_fremovexattr
+238	common	tkill				sys_tkill
+239	common	sendfile64			sys_sendfile64
+240	common	futex				sys_futex_time32
+241	common	sched_setaffinity		sys_sched_setaffinity
+242	common	sched_getaffinity		sys_sched_getaffinity
+# 243 is reserved for set_thread_area
+# 244 is reserved for get_thread_area
+245	common	io_setup			sys_io_setup
+246	common	io_destroy			sys_io_destroy
+247	common	io_getevents			sys_io_getevents_time32
+248	common	io_submit			sys_io_submit
+249	common	io_cancel			sys_io_cancel
+250	common	fadvise64			sys_fadvise64
+# 251 is unused
+252	common	exit_group			sys_exit_group
+253	common	lookup_dcookie			sys_ni_syscall
+254	common	epoll_create			sys_epoll_create
+255	common	epoll_ctl			sys_epoll_ctl
+256	common	epoll_wait			sys_epoll_wait
+257	common	remap_file_pages		sys_remap_file_pages
+258	common	set_tid_address			sys_set_tid_address
+259	common	timer_create			sys_timer_create
+260	common	timer_settime			sys_timer_settime32
+261	common	timer_gettime			sys_timer_gettime32
+262	common	timer_getoverrun		sys_timer_getoverrun
+263	common	timer_delete			sys_timer_delete
+264	common	clock_settime			sys_clock_settime32
+265	common	clock_gettime			sys_clock_gettime32
+266	common	clock_getres			sys_clock_getres_time32
+267	common	clock_nanosleep			sys_clock_nanosleep_time32
+268	common	statfs64			sys_statfs64
+269	common	fstatfs64			sys_fstatfs64
+270	common	tgkill				sys_tgkill
+271	common	utimes				sys_utimes_time32
+272	common	fadvise64_64			sys_fadvise64_64_wrapper
+# 273 is reserved for vserver
+274	common	mbind				sys_mbind
+275	common	get_mempolicy			sys_get_mempolicy
+276	common	set_mempolicy			sys_set_mempolicy
+277	common	mq_open				sys_mq_open
+278	common	mq_unlink			sys_mq_unlink
+279	common	mq_timedsend			sys_mq_timedsend_time32
+280	common	mq_timedreceive			sys_mq_timedreceive_time32
+281	common	mq_notify			sys_mq_notify
+282	common	mq_getsetattr			sys_mq_getsetattr
+283	common	kexec_load			sys_kexec_load
+284	common	waitid				sys_waitid
+285	common	add_key				sys_add_key
+286	common	request_key			sys_request_key
+287	common	keyctl				sys_keyctl
+288	common	ioprio_set			sys_ioprio_set
+289	common	ioprio_get			sys_ioprio_get
+290	common	inotify_init			sys_inotify_init
+291	common	inotify_add_watch		sys_inotify_add_watch
+292	common	inotify_rm_watch		sys_inotify_rm_watch
+# 293 is unused
+294	common	migrate_pages			sys_migrate_pages
+295	common	openat				sys_openat
+296	common	mkdirat				sys_mkdirat
+297	common	mknodat				sys_mknodat
+298	common	fchownat			sys_fchownat
+299	common	futimesat			sys_futimesat_time32
+300	common	fstatat64			sys_fstatat64
+301	common	unlinkat			sys_unlinkat
+302	common	renameat			sys_renameat
+303	common	linkat				sys_linkat
+304	common	symlinkat			sys_symlinkat
+305	common	readlinkat			sys_readlinkat
+306	common	fchmodat			sys_fchmodat
+307	common	faccessat			sys_faccessat
+308	common	pselect6			sys_pselect6_time32
+309	common	ppoll				sys_ppoll_time32
+310	common	unshare				sys_unshare
+311	common	set_robust_list			sys_set_robust_list
+312	common	get_robust_list			sys_get_robust_list
+313	common	splice				sys_splice
+314	common	sync_file_range			sys_sh_sync_file_range6
+315	common	tee				sys_tee
+316	common	vmsplice			sys_vmsplice
+317	common	move_pages			sys_move_pages
+318	common	getcpu				sys_getcpu
+319	common	epoll_pwait			sys_epoll_pwait
+320	common	utimensat			sys_utimensat_time32
+321	common	signalfd			sys_signalfd
+322	common	timerfd_create			sys_timerfd_create
+323	common	eventfd				sys_eventfd
+324	common	fallocate			sys_fallocate
+325	common	timerfd_settime			sys_timerfd_settime32
+326	common	timerfd_gettime			sys_timerfd_gettime32
+327	common	signalfd4			sys_signalfd4
+328	common	eventfd2			sys_eventfd2
+329	common	epoll_create1			sys_epoll_create1
+330	common	dup3				sys_dup3
+331	common	pipe2				sys_pipe2
+332	common	inotify_init1			sys_inotify_init1
+333	common	preadv				sys_preadv
+334	common	pwritev				sys_pwritev
+335	common	rt_tgsigqueueinfo		sys_rt_tgsigqueueinfo
+336	common	perf_event_open			sys_perf_event_open
+337	common	fanotify_init			sys_fanotify_init
+338	common	fanotify_mark			sys_fanotify_mark
+339	common	prlimit64			sys_prlimit64
+340	common	socket				sys_socket
+341	common	bind				sys_bind
+342	common	connect				sys_connect
+343	common	listen				sys_listen
+344	common	accept				sys_accept
+345	common	getsockname			sys_getsockname
+346	common	getpeername			sys_getpeername
+347	common	socketpair			sys_socketpair
+348	common	send				sys_send
+349	common	sendto				sys_sendto
+350	common	recv				sys_recv
+351	common	recvfrom			sys_recvfrom
+352	common	shutdown			sys_shutdown
+353	common	setsockopt			sys_setsockopt
+354	common	getsockopt			sys_getsockopt
+355	common	sendmsg				sys_sendmsg
+356	common	recvmsg				sys_recvmsg
+357	common	recvmmsg			sys_recvmmsg_time32
+358	common	accept4				sys_accept4
+359	common	name_to_handle_at		sys_name_to_handle_at
+360	common	open_by_handle_at		sys_open_by_handle_at
+361	common	clock_adjtime			sys_clock_adjtime32
+362	common	syncfs				sys_syncfs
+363	common	sendmmsg			sys_sendmmsg
+364	common	setns				sys_setns
+365	common	process_vm_readv		sys_process_vm_readv
+366	common	process_vm_writev		sys_process_vm_writev
+367	common	kcmp				sys_kcmp
+368	common	finit_module			sys_finit_module
+369	common	sched_getattr			sys_sched_getattr
+370	common	sched_setattr			sys_sched_setattr
+371	common	renameat2			sys_renameat2
+372	common	seccomp				sys_seccomp
+373	common	getrandom			sys_getrandom
+374	common	memfd_create			sys_memfd_create
+375	common	bpf				sys_bpf
+376	common	execveat			sys_execveat
+377	common	userfaultfd			sys_userfaultfd
+378	common	membarrier			sys_membarrier
+379	common	mlock2				sys_mlock2
+380	common	copy_file_range			sys_copy_file_range
+381	common	preadv2				sys_preadv2
+382	common	pwritev2			sys_pwritev2
+383	common	statx				sys_statx
+384	common	pkey_mprotect			sys_pkey_mprotect
+385	common	pkey_alloc			sys_pkey_alloc
+386	common	pkey_free			sys_pkey_free
+387	common	rseq				sys_rseq
+388	common	sync_file_range2		sys_sync_file_range2
+# room for arch specific syscalls
+393	common	semget				sys_semget
+394	common	semctl				sys_semctl
+395	common	shmget				sys_shmget
+396	common	shmctl				sys_shmctl
+397	common	shmat				sys_shmat
+398	common	shmdt				sys_shmdt
+399	common	msgget				sys_msgget
+400	common	msgsnd				sys_msgsnd
+401	common	msgrcv				sys_msgrcv
+402	common	msgctl				sys_msgctl
+403	common	clock_gettime64			sys_clock_gettime
+404	common	clock_settime64			sys_clock_settime
+405	common	clock_adjtime64			sys_clock_adjtime
+406	common	clock_getres_time64		sys_clock_getres
+407	common	clock_nanosleep_time64		sys_clock_nanosleep
+408	common	timer_gettime64			sys_timer_gettime
+409	common	timer_settime64			sys_timer_settime
+410	common	timerfd_gettime64		sys_timerfd_gettime
+411	common	timerfd_settime64		sys_timerfd_settime
+412	common	utimensat_time64		sys_utimensat
+413	common	pselect6_time64			sys_pselect6
+414	common	ppoll_time64			sys_ppoll
+416	common	io_pgetevents_time64		sys_io_pgetevents
+417	common	recvmmsg_time64			sys_recvmmsg
+418	common	mq_timedsend_time64		sys_mq_timedsend
+419	common	mq_timedreceive_time64		sys_mq_timedreceive
+420	common	semtimedop_time64		sys_semtimedop
+421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait
+422	common	futex_time64			sys_futex
+423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
+428	common	open_tree			sys_open_tree
+429	common	move_mount			sys_move_mount
+430	common	fsopen				sys_fsopen
+431	common	fsconfig			sys_fsconfig
+432	common	fsmount				sys_fsmount
+433	common	fspick				sys_fspick
+434	common	pidfd_open			sys_pidfd_open
+# 435 reserved for clone3
+436	common	close_range			sys_close_range
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+443	common	quotactl_fd			sys_quotactl_fd
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
+449	common  futex_waitv                     sys_futex_waitv
+450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
+451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
+453	common	map_shadow_stack		sys_map_shadow_stack
+454	common	futex_wake			sys_futex_wake
+455	common	futex_wait			sys_futex_wait
+456	common	futex_requeue			sys_futex_requeue
+457	common	statmount			sys_statmount
+458	common	listmount			sys_listmount
+459	common	lsm_get_self_attr		sys_lsm_get_self_attr
+460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+461	common	lsm_list_modules		sys_lsm_list_modules
+462	common	mseal				sys_mseal
+463	common	setxattrat			sys_setxattrat
+464	common	getxattrat			sys_getxattrat
+465	common	listxattrat			sys_listxattrat
+466	common	removexattrat			sys_removexattrat
diff --git a/tools/perf/arch/sh/include/syscall_table.h b/tools/perf/arch/sh/include/syscall_table.h
new file mode 100644
index 000000000000..4c942821662d
--- /dev/null
+++ b/tools/perf/arch/sh/include/syscall_table.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/syscalls_32.h>
diff --git a/tools/perf/arch/sparc/entry/syscalls/Kbuild b/tools/perf/arch/sparc/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..84c6599b4ea6
--- /dev/null
+++ b/tools/perf/arch/sparc/entry/syscalls/Kbuild
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_32.h
+syscall-y += syscalls_64.h
diff --git a/tools/perf/arch/sparc/entry/syscalls/Makefile.syscalls b/tools/perf/arch/sparc/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..212c1800b644
--- /dev/null
+++ b/tools/perf/arch/sparc/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32  +=
+syscall_abis_64  +=
+syscalltbl = $(srctree)/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
new file mode 100644
index 000000000000..727f99d333b3
--- /dev/null
+++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
@@ -0,0 +1,514 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for sparc
+#
+# The format is:
+# <number> <abi> <name> <entry point> <compat entry point>
+#
+# The <abi> can be common, 64, or 32 for this file.
+#
+0	common	restart_syscall		sys_restart_syscall
+1	32	exit			sys_exit			sparc_exit
+1	64	exit			sparc_exit
+2	common	fork			sys_fork
+3	common	read			sys_read
+4	common	write			sys_write
+5	common	open			sys_open			compat_sys_open
+6	common	close			sys_close
+7	common	wait4			sys_wait4			compat_sys_wait4
+8	common	creat			sys_creat
+9	common	link			sys_link
+10	common	unlink			sys_unlink
+11	32	execv			sunos_execv
+11	64	execv			sys_nis_syscall
+12	common	chdir			sys_chdir
+13	32    	chown			sys_chown16
+13	64	chown			sys_chown
+14	common	mknod			sys_mknod
+15	common	chmod			sys_chmod
+16	32	lchown			sys_lchown16
+16	64	lchown			sys_lchown
+17	common	brk			sys_brk
+18	common	perfctr			sys_nis_syscall
+19	common	lseek			sys_lseek			compat_sys_lseek
+20	common	getpid			sys_getpid
+21	common	capget			sys_capget
+22	common	capset			sys_capset
+23	32    	setuid			sys_setuid16
+23	64    	setuid			sys_setuid
+24	32	getuid			sys_getuid16
+24	64   	getuid			sys_getuid
+25	common	vmsplice		sys_vmsplice
+26	common	ptrace			sys_ptrace			compat_sys_ptrace
+27	common	alarm			sys_alarm
+28	common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
+29	32    	pause			sys_pause
+29	64    	pause			sys_nis_syscall
+30	32	utime			sys_utime32
+30	64	utime			sys_utime
+31	32    	lchown32		sys_lchown
+32	32    	fchown32		sys_fchown
+33	common	access			sys_access
+34	common	nice			sys_nice
+35	32    	chown32			sys_chown
+36	common	sync			sys_sync
+37	common	kill			sys_kill
+38	common	stat			sys_newstat			compat_sys_newstat
+39	32	sendfile		sys_sendfile			compat_sys_sendfile
+39	64	sendfile		sys_sendfile64
+40	common	lstat			sys_newlstat			compat_sys_newlstat
+41	common	dup			sys_dup
+42	common	pipe			sys_sparc_pipe
+43	common	times			sys_times			compat_sys_times
+44	32    	getuid32		sys_getuid
+45	common	umount2			sys_umount
+46	32	setgid			sys_setgid16
+46	64	setgid			sys_setgid
+47	32	getgid			sys_getgid16
+47	64	getgid			sys_getgid
+48	common	signal			sys_signal
+49	32	geteuid			sys_geteuid16
+49	64	geteuid			sys_geteuid
+50	32	getegid			sys_getegid16
+50	64	getegid			sys_getegid
+51	common	acct			sys_acct
+52	64	memory_ordering		sys_memory_ordering
+53	32	getgid32		sys_getgid
+54	common	ioctl			sys_ioctl			compat_sys_ioctl
+55	common	reboot			sys_reboot
+56	32    	mmap2			sys_mmap2			sys32_mmap2
+57	common	symlink			sys_symlink
+58	common	readlink		sys_readlink
+59	32	execve			sys_execve			sys32_execve
+59	64	execve			sys64_execve
+60	common	umask			sys_umask
+61	common	chroot			sys_chroot
+62	common	fstat			sys_newfstat			compat_sys_newfstat
+63	common	fstat64			sys_fstat64			compat_sys_fstat64
+64	common	getpagesize		sys_getpagesize
+65	common	msync			sys_msync
+66	common	vfork			sys_vfork
+67	common	pread64			sys_pread64			compat_sys_pread64
+68	common	pwrite64		sys_pwrite64			compat_sys_pwrite64
+69	32    	geteuid32		sys_geteuid
+70	32	getegid32		sys_getegid
+71	common	mmap			sys_mmap
+72	32	setreuid32		sys_setreuid
+73	32	munmap			sys_munmap
+73	64	munmap			sys_64_munmap
+74	common	mprotect		sys_mprotect
+75	common	madvise			sys_madvise
+76	common	vhangup			sys_vhangup
+77	32	truncate64		sys_truncate64			compat_sys_truncate64
+78	common	mincore			sys_mincore
+79	32	getgroups		sys_getgroups16
+79	64	getgroups		sys_getgroups
+80	32	setgroups		sys_setgroups16
+80	64	setgroups		sys_setgroups
+81	common	getpgrp			sys_getpgrp
+82	32	setgroups32		sys_setgroups
+83	common	setitimer		sys_setitimer			compat_sys_setitimer
+84	32	ftruncate64		sys_ftruncate64			compat_sys_ftruncate64
+85	common	swapon			sys_swapon
+86	common	getitimer		sys_getitimer			compat_sys_getitimer
+87	32	setuid32		sys_setuid
+88	common	sethostname		sys_sethostname
+89	32	setgid32		sys_setgid
+90	common	dup2			sys_dup2
+91	32	setfsuid32		sys_setfsuid
+92	common	fcntl			sys_fcntl			compat_sys_fcntl
+93	common	select			sys_select			compat_sys_select
+94	32	setfsgid32		sys_setfsgid
+95	common	fsync			sys_fsync
+96	common	setpriority		sys_setpriority
+97	common	socket			sys_socket
+98	common	connect			sys_connect
+99	common	accept			sys_accept
+100	common	getpriority		sys_getpriority
+101	common	rt_sigreturn		sys_rt_sigreturn		sys32_rt_sigreturn
+102	common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
+103	common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
+104	common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
+105	32	rt_sigtimedwait		sys_rt_sigtimedwait_time32	compat_sys_rt_sigtimedwait_time32
+105	64	rt_sigtimedwait		sys_rt_sigtimedwait
+106	common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
+107	common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
+108	32	setresuid32		sys_setresuid
+108	64	setresuid		sys_setresuid
+109	32	getresuid32		sys_getresuid
+109	64	getresuid		sys_getresuid
+110	32	setresgid32		sys_setresgid
+110	64	setresgid		sys_setresgid
+111	32	getresgid32		sys_getresgid
+111	64	getresgid		sys_getresgid
+112	32	setregid32		sys_setregid
+113	common	recvmsg			sys_recvmsg			compat_sys_recvmsg
+114	common	sendmsg			sys_sendmsg			compat_sys_sendmsg
+115	32	getgroups32		sys_getgroups
+116	common	gettimeofday		sys_gettimeofday		compat_sys_gettimeofday
+117	common	getrusage		sys_getrusage			compat_sys_getrusage
+118	common	getsockopt		sys_getsockopt			sys_getsockopt
+119	common	getcwd			sys_getcwd
+120	common	readv			sys_readv
+121	common	writev			sys_writev
+122	common	settimeofday		sys_settimeofday		compat_sys_settimeofday
+123	32	fchown			sys_fchown16
+123	64	fchown			sys_fchown
+124	common	fchmod			sys_fchmod
+125	common	recvfrom		sys_recvfrom			compat_sys_recvfrom
+126	32	setreuid		sys_setreuid16
+126	64	setreuid		sys_setreuid
+127	32	setregid		sys_setregid16
+127	64	setregid		sys_setregid
+128	common	rename			sys_rename
+129	common	truncate		sys_truncate			compat_sys_truncate
+130	common	ftruncate		sys_ftruncate			compat_sys_ftruncate
+131	common	flock			sys_flock
+132	common	lstat64			sys_lstat64			compat_sys_lstat64
+133	common	sendto			sys_sendto
+134	common	shutdown		sys_shutdown
+135	common	socketpair		sys_socketpair
+136	common	mkdir			sys_mkdir
+137	common	rmdir			sys_rmdir
+138	32	utimes			sys_utimes_time32
+138	64	utimes			sys_utimes
+139	common	stat64			sys_stat64			compat_sys_stat64
+140	common	sendfile64		sys_sendfile64
+141	common	getpeername		sys_getpeername
+142	32	futex			sys_futex_time32
+142	64	futex			sys_futex
+143	common	gettid			sys_gettid
+144	common	getrlimit		sys_getrlimit			compat_sys_getrlimit
+145	common	setrlimit		sys_setrlimit			compat_sys_setrlimit
+146	common	pivot_root		sys_pivot_root
+147	common	prctl			sys_prctl
+148	common	pciconfig_read		sys_pciconfig_read
+149	common	pciconfig_write		sys_pciconfig_write
+150	common	getsockname		sys_getsockname
+151	common	inotify_init		sys_inotify_init
+152	common	inotify_add_watch	sys_inotify_add_watch
+153	common	poll			sys_poll
+154	common	getdents64		sys_getdents64
+155	32	fcntl64			sys_fcntl64			compat_sys_fcntl64
+156	common	inotify_rm_watch	sys_inotify_rm_watch
+157	common	statfs			sys_statfs			compat_sys_statfs
+158	common	fstatfs			sys_fstatfs			compat_sys_fstatfs
+159	common	umount			sys_oldumount
+160	common	sched_set_affinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
+161	common	sched_get_affinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
+162	common	getdomainname		sys_getdomainname
+163	common	setdomainname		sys_setdomainname
+164	64	utrap_install		sys_utrap_install
+165	common	quotactl		sys_quotactl
+166	common	set_tid_address		sys_set_tid_address
+167	common	mount			sys_mount
+168	common	ustat			sys_ustat			compat_sys_ustat
+169	common	setxattr		sys_setxattr
+170	common	lsetxattr		sys_lsetxattr
+171	common	fsetxattr		sys_fsetxattr
+172	common	getxattr		sys_getxattr
+173	common	lgetxattr		sys_lgetxattr
+174	common	getdents		sys_getdents			compat_sys_getdents
+175	common	setsid			sys_setsid
+176	common	fchdir			sys_fchdir
+177	common	fgetxattr		sys_fgetxattr
+178	common	listxattr		sys_listxattr
+179	common	llistxattr		sys_llistxattr
+180	common	flistxattr		sys_flistxattr
+181	common	removexattr		sys_removexattr
+182	common	lremovexattr		sys_lremovexattr
+183	32	sigpending		sys_sigpending			compat_sys_sigpending
+183	64	sigpending		sys_nis_syscall
+184	common	query_module		sys_ni_syscall
+185	common	setpgid			sys_setpgid
+186	common	fremovexattr		sys_fremovexattr
+187	common	tkill			sys_tkill
+188	32	exit_group		sys_exit_group			sparc_exit_group
+188	64	exit_group		sparc_exit_group
+189	common	uname			sys_newuname
+190	common	init_module		sys_init_module
+191	32	personality		sys_personality			sys_sparc64_personality
+191	64	personality		sys_sparc64_personality
+192	32	remap_file_pages	sys_sparc_remap_file_pages	sys_remap_file_pages
+192	64	remap_file_pages	sys_remap_file_pages
+193	common	epoll_create		sys_epoll_create
+194	common	epoll_ctl		sys_epoll_ctl
+195	common	epoll_wait		sys_epoll_wait
+196	common	ioprio_set		sys_ioprio_set
+197	common	getppid			sys_getppid
+198	32	sigaction		sys_sparc_sigaction		compat_sys_sparc_sigaction
+198	64	sigaction		sys_nis_syscall
+199	common	sgetmask		sys_sgetmask
+200	common	ssetmask		sys_ssetmask
+201	32	sigsuspend		sys_sigsuspend
+201	64	sigsuspend		sys_nis_syscall
+202	common	oldlstat		sys_newlstat			compat_sys_newlstat
+203	common	uselib			sys_uselib
+204	32	readdir			sys_old_readdir			compat_sys_old_readdir
+204	64	readdir			sys_nis_syscall
+205	common	readahead		sys_readahead			compat_sys_readahead
+206	common	socketcall		sys_socketcall			compat_sys_socketcall
+207	common	syslog			sys_syslog
+208	common	lookup_dcookie		sys_ni_syscall
+209	common	fadvise64		sys_fadvise64			compat_sys_fadvise64
+210	common	fadvise64_64		sys_fadvise64_64		compat_sys_fadvise64_64
+211	common	tgkill			sys_tgkill
+212	common	waitpid			sys_waitpid
+213	common	swapoff			sys_swapoff
+214	common	sysinfo			sys_sysinfo			compat_sys_sysinfo
+215	32	ipc			sys_ipc				compat_sys_ipc
+215	64	ipc			sys_sparc_ipc
+216	32	sigreturn		sys_sigreturn			sys32_sigreturn
+216	64	sigreturn		sys_nis_syscall
+217	common	clone			sys_clone
+218	common	ioprio_get		sys_ioprio_get
+219	32	adjtimex		sys_adjtimex_time32
+219	64	adjtimex		sys_sparc_adjtimex
+220	32	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
+220	64	sigprocmask		sys_nis_syscall
+221	common	create_module		sys_ni_syscall
+222	common	delete_module		sys_delete_module
+223	common	get_kernel_syms		sys_ni_syscall
+224	common	getpgid			sys_getpgid
+225	common	bdflush			sys_ni_syscall
+226	common	sysfs			sys_sysfs
+227	common	afs_syscall		sys_nis_syscall
+228	common	setfsuid		sys_setfsuid16
+229	common	setfsgid		sys_setfsgid16
+230	common	_newselect		sys_select			compat_sys_select
+231	32	time			sys_time32
+232	common	splice			sys_splice
+233	32	stime			sys_stime32
+233	64	stime			sys_stime
+234	common	statfs64		sys_statfs64			compat_sys_statfs64
+235	common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
+236	common	_llseek			sys_llseek
+237	common	mlock			sys_mlock
+238	common	munlock			sys_munlock
+239	common	mlockall		sys_mlockall
+240	common	munlockall		sys_munlockall
+241	common	sched_setparam		sys_sched_setparam
+242	common	sched_getparam		sys_sched_getparam
+243	common	sched_setscheduler	sys_sched_setscheduler
+244	common	sched_getscheduler	sys_sched_getscheduler
+245	common	sched_yield		sys_sched_yield
+246	common	sched_get_priority_max	sys_sched_get_priority_max
+247	common	sched_get_priority_min	sys_sched_get_priority_min
+248	32	sched_rr_get_interval	sys_sched_rr_get_interval_time32
+248	64	sched_rr_get_interval	sys_sched_rr_get_interval
+249	32	nanosleep		sys_nanosleep_time32
+249	64	nanosleep		sys_nanosleep
+250	32	mremap			sys_mremap
+250	64	mremap			sys_64_mremap
+251	common	_sysctl			sys_ni_syscall
+252	common	getsid			sys_getsid
+253	common	fdatasync		sys_fdatasync
+254	32	nfsservctl		sys_ni_syscall			sys_nis_syscall
+254	64	nfsservctl		sys_nis_syscall
+255	common	sync_file_range		sys_sync_file_range		compat_sys_sync_file_range
+256	32	clock_settime		sys_clock_settime32
+256	64	clock_settime		sys_clock_settime
+257	32	clock_gettime		sys_clock_gettime32
+257	64	clock_gettime		sys_clock_gettime
+258	32	clock_getres		sys_clock_getres_time32
+258	64	clock_getres		sys_clock_getres
+259	32	clock_nanosleep		sys_clock_nanosleep_time32
+259	64	clock_nanosleep		sys_clock_nanosleep
+260	common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
+261	common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
+262	32	timer_settime		sys_timer_settime32
+262	64	timer_settime		sys_timer_settime
+263	32	timer_gettime		sys_timer_gettime32
+263	64	timer_gettime		sys_timer_gettime
+264	common	timer_getoverrun	sys_timer_getoverrun
+265	common	timer_delete		sys_timer_delete
+266	common	timer_create		sys_timer_create		compat_sys_timer_create
+# 267 was vserver
+267	common	vserver			sys_nis_syscall
+268	common	io_setup		sys_io_setup			compat_sys_io_setup
+269	common	io_destroy		sys_io_destroy
+270	common	io_submit		sys_io_submit			compat_sys_io_submit
+271	common	io_cancel		sys_io_cancel
+272	32	io_getevents		sys_io_getevents_time32
+272	64	io_getevents		sys_io_getevents
+273	common	mq_open			sys_mq_open			compat_sys_mq_open
+274	common	mq_unlink		sys_mq_unlink
+275	32	mq_timedsend		sys_mq_timedsend_time32
+275	64	mq_timedsend		sys_mq_timedsend
+276	32	mq_timedreceive		sys_mq_timedreceive_time32
+276	64	mq_timedreceive		sys_mq_timedreceive
+277	common	mq_notify		sys_mq_notify			compat_sys_mq_notify
+278	common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
+279	common	waitid			sys_waitid			compat_sys_waitid
+280	common	tee			sys_tee
+281	common	add_key			sys_add_key
+282	common	request_key		sys_request_key
+283	common	keyctl			sys_keyctl			compat_sys_keyctl
+284	common	openat			sys_openat			compat_sys_openat
+285	common	mkdirat			sys_mkdirat
+286	common	mknodat			sys_mknodat
+287	common	fchownat		sys_fchownat
+288	32	futimesat		sys_futimesat_time32
+288	64	futimesat		sys_futimesat
+289	common	fstatat64		sys_fstatat64			compat_sys_fstatat64
+290	common	unlinkat		sys_unlinkat
+291	common	renameat		sys_renameat
+292	common	linkat			sys_linkat
+293	common	symlinkat		sys_symlinkat
+294	common	readlinkat		sys_readlinkat
+295	common	fchmodat		sys_fchmodat
+296	common	faccessat		sys_faccessat
+297	32	pselect6		sys_pselect6_time32		compat_sys_pselect6_time32
+297	64	pselect6		sys_pselect6
+298	32	ppoll			sys_ppoll_time32		compat_sys_ppoll_time32
+298	64	ppoll			sys_ppoll
+299	common	unshare			sys_unshare
+300	common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
+301	common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
+302	common	migrate_pages		sys_migrate_pages
+303	common	mbind			sys_mbind
+304	common	get_mempolicy		sys_get_mempolicy
+305	common	set_mempolicy		sys_set_mempolicy
+306	common	kexec_load		sys_kexec_load			compat_sys_kexec_load
+307	common	move_pages		sys_move_pages
+308	common	getcpu			sys_getcpu
+309	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
+310	32	utimensat		sys_utimensat_time32
+310	64	utimensat		sys_utimensat
+311	common	signalfd		sys_signalfd			compat_sys_signalfd
+312	common	timerfd_create		sys_timerfd_create
+313	common	eventfd			sys_eventfd
+314	common	fallocate		sys_fallocate			compat_sys_fallocate
+315	32	timerfd_settime		sys_timerfd_settime32
+315	64	timerfd_settime		sys_timerfd_settime
+316	32	timerfd_gettime		sys_timerfd_gettime32
+316	64	timerfd_gettime		sys_timerfd_gettime
+317	common	signalfd4		sys_signalfd4			compat_sys_signalfd4
+318	common	eventfd2		sys_eventfd2
+319	common	epoll_create1		sys_epoll_create1
+320	common	dup3			sys_dup3
+321	common	pipe2			sys_pipe2
+322	common	inotify_init1		sys_inotify_init1
+323	common	accept4			sys_accept4
+324	common	preadv			sys_preadv			compat_sys_preadv
+325	common	pwritev			sys_pwritev			compat_sys_pwritev
+326	common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
+327	common	perf_event_open		sys_perf_event_open
+328	32	recvmmsg		sys_recvmmsg_time32		compat_sys_recvmmsg_time32
+328	64	recvmmsg		sys_recvmmsg
+329	common	fanotify_init		sys_fanotify_init
+330	common	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
+331	common	prlimit64		sys_prlimit64
+332	common	name_to_handle_at	sys_name_to_handle_at
+333	common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
+334	32	clock_adjtime		sys_clock_adjtime32
+334	64	clock_adjtime		sys_sparc_clock_adjtime
+335	common	syncfs			sys_syncfs
+336	common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
+337	common	setns			sys_setns
+338	common	process_vm_readv	sys_process_vm_readv
+339	common	process_vm_writev	sys_process_vm_writev
+340	32	kern_features		sys_ni_syscall			sys_kern_features
+340	64	kern_features		sys_kern_features
+341	common	kcmp			sys_kcmp
+342	common	finit_module		sys_finit_module
+343	common	sched_setattr		sys_sched_setattr
+344	common	sched_getattr		sys_sched_getattr
+345	common	renameat2		sys_renameat2
+346	common	seccomp			sys_seccomp
+347	common	getrandom		sys_getrandom
+348	common	memfd_create		sys_memfd_create
+349	common	bpf			sys_bpf
+350	32	execveat		sys_execveat			sys32_execveat
+350	64	execveat		sys64_execveat
+351	common	membarrier		sys_membarrier
+352	common	userfaultfd		sys_userfaultfd
+353	common	bind			sys_bind
+354	common	listen			sys_listen
+355	common	setsockopt		sys_setsockopt			sys_setsockopt
+356	common	mlock2			sys_mlock2
+357	common	copy_file_range		sys_copy_file_range
+358	common	preadv2			sys_preadv2			compat_sys_preadv2
+359	common	pwritev2		sys_pwritev2			compat_sys_pwritev2
+360	common	statx			sys_statx
+361	32	io_pgetevents		sys_io_pgetevents_time32	compat_sys_io_pgetevents
+361	64	io_pgetevents		sys_io_pgetevents
+362	common	pkey_mprotect		sys_pkey_mprotect
+363	common	pkey_alloc		sys_pkey_alloc
+364	common	pkey_free		sys_pkey_free
+365	common	rseq			sys_rseq
+# room for arch specific syscalls
+392	64	semtimedop			sys_semtimedop
+393	common	semget			sys_semget
+394	common	semctl			sys_semctl			compat_sys_semctl
+395	common	shmget			sys_shmget
+396	common	shmctl			sys_shmctl			compat_sys_shmctl
+397	common	shmat			sys_shmat			compat_sys_shmat
+398	common	shmdt			sys_shmdt
+399	common	msgget			sys_msgget
+400	common	msgsnd			sys_msgsnd			compat_sys_msgsnd
+401	common	msgrcv			sys_msgrcv			compat_sys_msgrcv
+402	common	msgctl			sys_msgctl			compat_sys_msgctl
+403	32	clock_gettime64			sys_clock_gettime		sys_clock_gettime
+404	32	clock_settime64			sys_clock_settime		sys_clock_settime
+405	32	clock_adjtime64			sys_clock_adjtime		sys_clock_adjtime
+406	32	clock_getres_time64		sys_clock_getres		sys_clock_getres
+407	32	clock_nanosleep_time64		sys_clock_nanosleep		sys_clock_nanosleep
+408	32	timer_gettime64			sys_timer_gettime		sys_timer_gettime
+409	32	timer_settime64			sys_timer_settime		sys_timer_settime
+410	32	timerfd_gettime64		sys_timerfd_gettime		sys_timerfd_gettime
+411	32	timerfd_settime64		sys_timerfd_settime		sys_timerfd_settime
+412	32	utimensat_time64		sys_utimensat			sys_utimensat
+413	32	pselect6_time64			sys_pselect6			compat_sys_pselect6_time64
+414	32	ppoll_time64			sys_ppoll			compat_sys_ppoll_time64
+416	32	io_pgetevents_time64		sys_io_pgetevents		compat_sys_io_pgetevents_time64
+417	32	recvmmsg_time64			sys_recvmmsg			compat_sys_recvmmsg_time64
+418	32	mq_timedsend_time64		sys_mq_timedsend		sys_mq_timedsend
+419	32	mq_timedreceive_time64		sys_mq_timedreceive		sys_mq_timedreceive
+420	32	semtimedop_time64		sys_semtimedop			sys_semtimedop
+421	32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
+422	32	futex_time64			sys_futex			sys_futex
+423	32	sched_rr_get_interval_time64	sys_sched_rr_get_interval	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
+428	common	open_tree			sys_open_tree
+429	common	move_mount			sys_move_mount
+430	common	fsopen				sys_fsopen
+431	common	fsconfig			sys_fsconfig
+432	common	fsmount				sys_fsmount
+433	common	fspick				sys_fspick
+434	common	pidfd_open			sys_pidfd_open
+# 435 reserved for clone3
+436	common	close_range			sys_close_range
+437	common	openat2			sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+443	common	quotactl_fd			sys_quotactl_fd
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
+449	common  futex_waitv                     sys_futex_waitv
+450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
+451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
+453	common	map_shadow_stack		sys_map_shadow_stack
+454	common	futex_wake			sys_futex_wake
+455	common	futex_wait			sys_futex_wait
+456	common	futex_requeue			sys_futex_requeue
+457	common	statmount			sys_statmount
+458	common	listmount			sys_listmount
+459	common	lsm_get_self_attr		sys_lsm_get_self_attr
+460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+461	common	lsm_list_modules		sys_lsm_list_modules
+462	common	mseal 				sys_mseal
+463	common	setxattrat			sys_setxattrat
+464	common	getxattrat			sys_getxattrat
+465	common	listxattrat			sys_listxattrat
+466	common	removexattrat			sys_removexattrat
diff --git a/tools/perf/arch/sparc/include/syscall_table.h b/tools/perf/arch/sparc/include/syscall_table.h
new file mode 100644
index 000000000000..7ff51b783000
--- /dev/null
+++ b/tools/perf/arch/sparc/include/syscall_table.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/bitsperlong.h>
+
+#if __BITS_PER_LONG == 64
+#include <asm/syscalls_64.h>
+#else
+#include <asm/syscalls_32.h>
+#endif
diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build
index 87d057491343..02a1ca780a20 100644
--- a/tools/perf/arch/x86/Build
+++ b/tools/perf/arch/x86/Build
@@ -2,7 +2,6 @@ perf-util-y += util/
 perf-test-y += tests/
 
 ifdef SHELLCHECK
-  SHELL_TESTS := entry/syscalls/syscalltbl.sh
   TEST_LOGS := $(SHELL_TESTS:%=%.shellcheck_log)
 else
   SHELL_TESTS :=
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index a6b6e0a9308a..a295a80ea078 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -1,28 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 HAVE_KVM_STAT_SUPPORT := 1
 PERF_HAVE_JITDUMP := 1
-
-###
-# Syscall table generation
-#
-
-generated := $(OUTPUT)arch/x86/include/generated
-out       := $(generated)/asm
-header    := $(out)/syscalls_64.c
-header_32 := $(out)/syscalls_32.c
-sys       := $(srctree)/tools/perf/arch/x86/entry/syscalls
-systbl    := $(sys)/syscalltbl.sh
-
-# Create output directory if not already present
-$(shell [ -d '$(out)' ] || mkdir -p '$(out)')
-
-$(header): $(sys)/syscall_64.tbl $(systbl)
-	$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
-
-$(header_32): $(sys)/syscall_32.tbl $(systbl)
-	$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_32.tbl 'x86' > $@
-
-clean::
-	$(call QUIET_CLEAN, x86) $(RM) -r $(header) $(generated)
-
-archheaders: $(header) $(header_32)
diff --git a/tools/perf/arch/x86/entry/syscalls/Kbuild b/tools/perf/arch/x86/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..84c6599b4ea6
--- /dev/null
+++ b/tools/perf/arch/x86/entry/syscalls/Kbuild
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_32.h
+syscall-y += syscalls_64.h
diff --git a/tools/perf/arch/x86/entry/syscalls/Makefile.syscalls b/tools/perf/arch/x86/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..db3d5d6d4e56
--- /dev/null
+++ b/tools/perf/arch/x86/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32 += i386
+syscall_abis_64 +=
+
+syscalltbl = $(srctree)/tools/perf/arch/x86/entry/syscalls/syscall_%.tbl
diff --git a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
deleted file mode 100755
index 2b71f99933a5..000000000000
--- a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-arch="$2"
-
-syscall_macro() {
-    nr="$1"
-    name="$2"
-
-    echo "	[$nr] = \"$name\","
-}
-
-emit() {
-    nr="$1"
-    entry="$2"
-
-    syscall_macro "$nr" "$entry"
-}
-
-echo "static const char *const syscalltbl_${arch}[] = {"
-
-sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
-grep '^[0-9]' "$in" | sort -n > $sorted_table
-
-max_nr=0
-# the params are: nr abi name entry compat
-# use _ for intentionally unused variables according to SC2034
-while read nr _ name _ _; do
-    if [ $nr -ge 512 ] ; then # discard compat sycalls
-        break
-    fi
-
-    emit "$nr" "$name"
-    max_nr=$nr
-done < $sorted_table
-
-rm -f $sorted_table
-
-echo "};"
-
-echo "#define SYSCALLTBL_${arch}_MAX_ID ${max_nr}"
diff --git a/tools/perf/arch/x86/include/syscall_table.h b/tools/perf/arch/x86/include/syscall_table.h
new file mode 100644
index 000000000000..7ff51b783000
--- /dev/null
+++ b/tools/perf/arch/x86/include/syscall_table.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/bitsperlong.h>
+
+#if __BITS_PER_LONG == 64
+#include <asm/syscalls_64.h>
+#else
+#include <asm/syscalls_32.h>
+#endif
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 848327378694..06d7c0205b3d 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -15,6 +15,6 @@ perf-util-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind.o
 perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
 
 perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
-perf-util-$(CONFIG_AUXTRACE) += archinsn.o
+perf-util-y += archinsn.o
 perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
 perf-util-$(CONFIG_AUXTRACE) += intel-bts.o
diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c
index 366b44d0bb7e..00f645a0c18a 100644
--- a/tools/perf/arch/x86/util/iostat.c
+++ b/tools/perf/arch/x86/util/iostat.c
@@ -403,6 +403,10 @@ void iostat_prefix(struct evlist *evlist,
 	struct iio_root_port *rp = evlist->selected->priv;
 
 	if (rp) {
+		/*
+		 * TODO: This is the incorrect format in JSON mode.
+		 *       See prepare_timestamp()
+		 */
 		if (ts)
 			sprintf(prefix, "%6lu.%09lu%s%04x:%02x%s",
 				ts->tv_sec, ts->tv_nsec,
diff --git a/tools/perf/arch/xtensa/entry/syscalls/Kbuild b/tools/perf/arch/xtensa/entry/syscalls/Kbuild
new file mode 100644
index 000000000000..11707c481a24
--- /dev/null
+++ b/tools/perf/arch/xtensa/entry/syscalls/Kbuild
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+syscall-y += syscalls_32.h
diff --git a/tools/perf/arch/xtensa/entry/syscalls/Makefile.syscalls b/tools/perf/arch/xtensa/entry/syscalls/Makefile.syscalls
new file mode 100644
index 000000000000..d4aa2358460c
--- /dev/null
+++ b/tools/perf/arch/xtensa/entry/syscalls/Makefile.syscalls
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+syscall_abis_32  +=
+syscalltbl = $(srctree)/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
new file mode 100644
index 000000000000..37effc1b134e
--- /dev/null
+++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
@@ -0,0 +1,439 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for xtensa
+#
+# The format is:
+# <number> <abi> <name> <entry point>
+#
+# The <abi> is always "common" for this file
+#
+0	common	spill				sys_ni_syscall
+1	common	xtensa				sys_ni_syscall
+2	common	available4			sys_ni_syscall
+3	common	available5			sys_ni_syscall
+4	common	available6			sys_ni_syscall
+5	common	available7			sys_ni_syscall
+6	common	available8			sys_ni_syscall
+7	common	available9			sys_ni_syscall
+# File Operations
+8	common	open				sys_open
+9	common	close				sys_close
+10	common	dup				sys_dup
+11	common	dup2				sys_dup2
+12	common	read				sys_read
+13	common	write				sys_write
+14	common	select				sys_select
+15	common	lseek				sys_lseek
+16	common	poll				sys_poll
+17	common	_llseek				sys_llseek
+18	common	epoll_wait			sys_epoll_wait
+19	common	epoll_ctl			sys_epoll_ctl
+20	common	epoll_create			sys_epoll_create
+21	common	creat				sys_creat
+22	common	truncate			sys_truncate
+23	common	ftruncate			sys_ftruncate
+24	common	readv				sys_readv
+25	common	writev				sys_writev
+26	common	fsync				sys_fsync
+27	common	fdatasync			sys_fdatasync
+28	common	truncate64			sys_truncate64
+29	common	ftruncate64			sys_ftruncate64
+30	common	pread64				sys_pread64
+31	common	pwrite64			sys_pwrite64
+32	common	link				sys_link
+33	common	rename				sys_rename
+34	common	symlink				sys_symlink
+35	common	readlink			sys_readlink
+36	common	mknod				sys_mknod
+37	common	pipe				sys_pipe
+38	common	unlink				sys_unlink
+39	common	rmdir				sys_rmdir
+40	common	mkdir				sys_mkdir
+41	common	chdir				sys_chdir
+42	common	fchdir				sys_fchdir
+43	common	getcwd				sys_getcwd
+44	common	chmod				sys_chmod
+45	common	chown				sys_chown
+46	common	stat				sys_newstat
+47	common	stat64				sys_stat64
+48	common	lchown				sys_lchown
+49	common	lstat				sys_newlstat
+50	common	lstat64				sys_lstat64
+51	common	available51			sys_ni_syscall
+52	common	fchmod				sys_fchmod
+53	common	fchown				sys_fchown
+54	common	fstat				sys_newfstat
+55	common	fstat64				sys_fstat64
+56	common	flock				sys_flock
+57	common	access				sys_access
+58	common	umask				sys_umask
+59	common	getdents			sys_getdents
+60	common	getdents64			sys_getdents64
+61	common	fcntl64				sys_fcntl64
+62	common	fallocate			sys_fallocate
+63	common	fadvise64_64			xtensa_fadvise64_64
+64	common	utime				sys_utime32
+65	common	utimes				sys_utimes_time32
+66	common	ioctl				sys_ioctl
+67	common	fcntl				sys_fcntl
+68	common	setxattr			sys_setxattr
+69	common	getxattr			sys_getxattr
+70	common	listxattr			sys_listxattr
+71	common	removexattr			sys_removexattr
+72	common	lsetxattr			sys_lsetxattr
+73	common	lgetxattr			sys_lgetxattr
+74	common	llistxattr			sys_llistxattr
+75	common	lremovexattr			sys_lremovexattr
+76	common	fsetxattr			sys_fsetxattr
+77	common	fgetxattr			sys_fgetxattr
+78	common	flistxattr			sys_flistxattr
+79	common	fremovexattr			sys_fremovexattr
+# File Map / Shared Memory Operations
+80	common	mmap2				sys_mmap_pgoff
+81	common	munmap				sys_munmap
+82	common	mprotect			sys_mprotect
+83	common	brk				sys_brk
+84	common	mlock				sys_mlock
+85	common	munlock				sys_munlock
+86	common	mlockall			sys_mlockall
+87	common	munlockall			sys_munlockall
+88	common	mremap				sys_mremap
+89	common	msync				sys_msync
+90	common	mincore				sys_mincore
+91	common	madvise				sys_madvise
+92	common	shmget				sys_shmget
+93	common	shmat				xtensa_shmat
+94	common	shmctl				sys_old_shmctl
+95	common	shmdt				sys_shmdt
+# Socket Operations
+96	common	socket				sys_socket
+97	common	setsockopt			sys_setsockopt
+98	common	getsockopt			sys_getsockopt
+99	common	shutdown			sys_shutdown
+100	common	bind				sys_bind
+101	common	connect				sys_connect
+102	common	listen				sys_listen
+103	common	accept				sys_accept
+104	common	getsockname			sys_getsockname
+105	common	getpeername			sys_getpeername
+106	common	sendmsg				sys_sendmsg
+107	common	recvmsg				sys_recvmsg
+108	common	send				sys_send
+109	common	recv				sys_recv
+110	common	sendto				sys_sendto
+111	common	recvfrom			sys_recvfrom
+112	common	socketpair			sys_socketpair
+113	common	sendfile			sys_sendfile
+114	common	sendfile64			sys_sendfile64
+115	common	sendmmsg			sys_sendmmsg
+# Process Operations
+116	common	clone				sys_clone
+117	common	execve				sys_execve
+118	common	exit				sys_exit
+119	common	exit_group			sys_exit_group
+120	common	getpid				sys_getpid
+121	common	wait4				sys_wait4
+122	common	waitid				sys_waitid
+123	common	kill				sys_kill
+124	common	tkill				sys_tkill
+125	common	tgkill				sys_tgkill
+126	common	set_tid_address			sys_set_tid_address
+127	common	gettid				sys_gettid
+128	common	setsid				sys_setsid
+129	common	getsid				sys_getsid
+130	common	prctl				sys_prctl
+131	common	personality			sys_personality
+132	common	getpriority			sys_getpriority
+133	common	setpriority			sys_setpriority
+134	common	setitimer			sys_setitimer
+135	common	getitimer			sys_getitimer
+136	common	setuid				sys_setuid
+137	common	getuid				sys_getuid
+138	common	setgid				sys_setgid
+139	common	getgid				sys_getgid
+140	common	geteuid				sys_geteuid
+141	common	getegid				sys_getegid
+142	common	setreuid			sys_setreuid
+143	common	setregid			sys_setregid
+144	common	setresuid			sys_setresuid
+145	common	getresuid			sys_getresuid
+146	common	setresgid			sys_setresgid
+147	common	getresgid			sys_getresgid
+148	common	setpgid				sys_setpgid
+149	common	getpgid				sys_getpgid
+150	common	getppid				sys_getppid
+151	common	getpgrp				sys_getpgrp
+# 152 was set_thread_area
+152	common	reserved152			sys_ni_syscall
+# 153 was get_thread_area
+153	common	reserved153			sys_ni_syscall
+154	common	times				sys_times
+155	common	acct				sys_acct
+156	common	sched_setaffinity		sys_sched_setaffinity
+157	common	sched_getaffinity		sys_sched_getaffinity
+158	common	capget				sys_capget
+159	common	capset				sys_capset
+160	common	ptrace				sys_ptrace
+161	common	semtimedop			sys_semtimedop_time32
+162	common	semget				sys_semget
+163	common	semop				sys_semop
+164	common	semctl				sys_old_semctl
+165	common	available165			sys_ni_syscall
+166	common	msgget				sys_msgget
+167	common	msgsnd				sys_msgsnd
+168	common	msgrcv				sys_msgrcv
+169	common	msgctl				sys_old_msgctl
+170	common	available170			sys_ni_syscall
+# File System
+171	common	umount2				sys_umount
+172	common	mount				sys_mount
+173	common	swapon				sys_swapon
+174	common	chroot				sys_chroot
+175	common	pivot_root			sys_pivot_root
+176	common	umount				sys_oldumount
+177	common	swapoff				sys_swapoff
+178	common	sync				sys_sync
+179	common	syncfs				sys_syncfs
+180	common	setfsuid			sys_setfsuid
+181	common	setfsgid			sys_setfsgid
+182	common	sysfs				sys_sysfs
+183	common	ustat				sys_ustat
+184	common	statfs				sys_statfs
+185	common	fstatfs				sys_fstatfs
+186	common	statfs64			sys_statfs64
+187	common	fstatfs64			sys_fstatfs64
+# System
+188	common	setrlimit			sys_setrlimit
+189	common	getrlimit			sys_getrlimit
+190	common	getrusage			sys_getrusage
+191	common	futex				sys_futex_time32
+192	common	gettimeofday			sys_gettimeofday
+193	common	settimeofday			sys_settimeofday
+194	common	adjtimex			sys_adjtimex_time32
+195	common	nanosleep			sys_nanosleep_time32
+196	common	getgroups			sys_getgroups
+197	common	setgroups			sys_setgroups
+198	common	sethostname			sys_sethostname
+199	common	setdomainname			sys_setdomainname
+200	common	syslog				sys_syslog
+201	common	vhangup				sys_vhangup
+202	common	uselib				sys_uselib
+203	common	reboot				sys_reboot
+204	common	quotactl			sys_quotactl
+# 205 was old nfsservctl
+205	common	nfsservctl			sys_ni_syscall
+206	common	_sysctl				sys_ni_syscall
+207	common	bdflush				sys_ni_syscall
+208	common	uname				sys_newuname
+209	common	sysinfo				sys_sysinfo
+210	common	init_module			sys_init_module
+211	common	delete_module			sys_delete_module
+212	common	sched_setparam			sys_sched_setparam
+213	common	sched_getparam			sys_sched_getparam
+214	common	sched_setscheduler		sys_sched_setscheduler
+215	common	sched_getscheduler		sys_sched_getscheduler
+216	common	sched_get_priority_max		sys_sched_get_priority_max
+217	common	sched_get_priority_min		sys_sched_get_priority_min
+218	common	sched_rr_get_interval		sys_sched_rr_get_interval_time32
+219	common	sched_yield			sys_sched_yield
+222	common	available222			sys_ni_syscall
+# Signal Handling
+223	common	restart_syscall			sys_restart_syscall
+224	common	sigaltstack			sys_sigaltstack
+225	common	rt_sigreturn			xtensa_rt_sigreturn
+226	common	rt_sigaction			sys_rt_sigaction
+227	common	rt_sigprocmask			sys_rt_sigprocmask
+228	common	rt_sigpending			sys_rt_sigpending
+229	common	rt_sigtimedwait			sys_rt_sigtimedwait_time32
+230	common	rt_sigqueueinfo			sys_rt_sigqueueinfo
+231	common	rt_sigsuspend			sys_rt_sigsuspend
+# Message
+232	common	mq_open				sys_mq_open
+233	common	mq_unlink			sys_mq_unlink
+234	common	mq_timedsend			sys_mq_timedsend_time32
+235	common	mq_timedreceive			sys_mq_timedreceive_time32
+236	common	mq_notify			sys_mq_notify
+237	common	mq_getsetattr			sys_mq_getsetattr
+238	common	available238			sys_ni_syscall
+239	common	io_setup			sys_io_setup
+# IO
+240	common	io_destroy			sys_io_destroy
+241	common	io_submit			sys_io_submit
+242	common	io_getevents			sys_io_getevents_time32
+243	common	io_cancel			sys_io_cancel
+244	common	clock_settime			sys_clock_settime32
+245	common	clock_gettime			sys_clock_gettime32
+246	common	clock_getres			sys_clock_getres_time32
+247	common	clock_nanosleep			sys_clock_nanosleep_time32
+# Timer
+248	common	timer_create			sys_timer_create
+249	common	timer_delete			sys_timer_delete
+250	common	timer_settime			sys_timer_settime32
+251	common	timer_gettime			sys_timer_gettime32
+252	common	timer_getoverrun		sys_timer_getoverrun
+# System
+253	common	reserved253			sys_ni_syscall
+254	common	lookup_dcookie			sys_ni_syscall
+255	common	available255			sys_ni_syscall
+256	common	add_key				sys_add_key
+257	common	request_key			sys_request_key
+258	common	keyctl				sys_keyctl
+259	common	available259			sys_ni_syscall
+260	common	readahead			sys_readahead
+261	common	remap_file_pages		sys_remap_file_pages
+262	common	migrate_pages			sys_migrate_pages
+263	common	mbind				sys_mbind
+264	common	get_mempolicy			sys_get_mempolicy
+265	common	set_mempolicy			sys_set_mempolicy
+266	common	unshare				sys_unshare
+267	common	move_pages			sys_move_pages
+268	common	splice				sys_splice
+269	common	tee				sys_tee
+270	common	vmsplice			sys_vmsplice
+271	common	available271			sys_ni_syscall
+272	common	pselect6			sys_pselect6_time32
+273	common	ppoll				sys_ppoll_time32
+274	common	epoll_pwait			sys_epoll_pwait
+275	common	epoll_create1			sys_epoll_create1
+276	common	inotify_init			sys_inotify_init
+277	common	inotify_add_watch		sys_inotify_add_watch
+278	common	inotify_rm_watch		sys_inotify_rm_watch
+279	common	inotify_init1			sys_inotify_init1
+280	common	getcpu				sys_getcpu
+281	common	kexec_load			sys_ni_syscall
+282	common	ioprio_set			sys_ioprio_set
+283	common	ioprio_get			sys_ioprio_get
+284	common	set_robust_list			sys_set_robust_list
+285	common	get_robust_list			sys_get_robust_list
+286	common	available286			sys_ni_syscall
+287	common	available287			sys_ni_syscall
+# Relative File Operations
+288	common	openat				sys_openat
+289	common	mkdirat				sys_mkdirat
+290	common	mknodat				sys_mknodat
+291	common	unlinkat			sys_unlinkat
+292	common	renameat			sys_renameat
+293	common	linkat				sys_linkat
+294	common	symlinkat			sys_symlinkat
+295	common	readlinkat			sys_readlinkat
+296	common	utimensat			sys_utimensat_time32
+297	common	fchownat			sys_fchownat
+298	common	futimesat			sys_futimesat_time32
+299	common	fstatat64			sys_fstatat64
+300	common	fchmodat			sys_fchmodat
+301	common	faccessat			sys_faccessat
+302	common	available302			sys_ni_syscall
+303	common	available303			sys_ni_syscall
+304	common	signalfd			sys_signalfd
+# 305 was timerfd
+306	common	eventfd				sys_eventfd
+307	common	recvmmsg			sys_recvmmsg_time32
+308	common	setns				sys_setns
+309	common	signalfd4			sys_signalfd4
+310	common	dup3				sys_dup3
+311	common	pipe2				sys_pipe2
+312	common	timerfd_create			sys_timerfd_create
+313	common	timerfd_settime			sys_timerfd_settime32
+314	common	timerfd_gettime			sys_timerfd_gettime32
+315	common	available315			sys_ni_syscall
+316	common	eventfd2			sys_eventfd2
+317	common	preadv				sys_preadv
+318	common	pwritev				sys_pwritev
+319	common	available319			sys_ni_syscall
+320	common	fanotify_init			sys_fanotify_init
+321	common	fanotify_mark			sys_fanotify_mark
+322	common	process_vm_readv		sys_process_vm_readv
+323	common	process_vm_writev		sys_process_vm_writev
+324	common	name_to_handle_at		sys_name_to_handle_at
+325	common	open_by_handle_at		sys_open_by_handle_at
+326	common	sync_file_range2		sys_sync_file_range2
+327	common	perf_event_open			sys_perf_event_open
+328	common	rt_tgsigqueueinfo		sys_rt_tgsigqueueinfo
+329	common	clock_adjtime			sys_clock_adjtime32
+330	common	prlimit64			sys_prlimit64
+331	common	kcmp				sys_kcmp
+332	common	finit_module			sys_finit_module
+333	common	accept4				sys_accept4
+334	common	sched_setattr			sys_sched_setattr
+335	common	sched_getattr			sys_sched_getattr
+336	common	renameat2			sys_renameat2
+337	common	seccomp				sys_seccomp
+338	common	getrandom			sys_getrandom
+339	common	memfd_create			sys_memfd_create
+340	common	bpf				sys_bpf
+341	common	execveat			sys_execveat
+342	common	userfaultfd			sys_userfaultfd
+343	common	membarrier			sys_membarrier
+344	common	mlock2				sys_mlock2
+345	common	copy_file_range			sys_copy_file_range
+346	common	preadv2				sys_preadv2
+347	common	pwritev2			sys_pwritev2
+348	common	pkey_mprotect			sys_pkey_mprotect
+349	common	pkey_alloc			sys_pkey_alloc
+350	common	pkey_free			sys_pkey_free
+351	common	statx				sys_statx
+352	common	rseq				sys_rseq
+# 353 through 402 are unassigned to sync up with generic numbers
+403	common	clock_gettime64			sys_clock_gettime
+404	common	clock_settime64			sys_clock_settime
+405	common	clock_adjtime64			sys_clock_adjtime
+406	common	clock_getres_time64		sys_clock_getres
+407	common	clock_nanosleep_time64		sys_clock_nanosleep
+408	common	timer_gettime64			sys_timer_gettime
+409	common	timer_settime64			sys_timer_settime
+410	common	timerfd_gettime64		sys_timerfd_gettime
+411	common	timerfd_settime64		sys_timerfd_settime
+412	common	utimensat_time64		sys_utimensat
+413	common	pselect6_time64			sys_pselect6
+414	common	ppoll_time64			sys_ppoll
+416	common	io_pgetevents_time64		sys_io_pgetevents
+417	common	recvmmsg_time64			sys_recvmmsg
+418	common	mq_timedsend_time64		sys_mq_timedsend
+419	common	mq_timedreceive_time64		sys_mq_timedreceive
+420	common	semtimedop_time64		sys_semtimedop
+421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait
+422	common	futex_time64			sys_futex
+423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
+428	common	open_tree			sys_open_tree
+429	common	move_mount			sys_move_mount
+430	common	fsopen				sys_fsopen
+431	common	fsconfig			sys_fsconfig
+432	common	fsmount				sys_fsmount
+433	common	fspick				sys_fspick
+434	common	pidfd_open			sys_pidfd_open
+435	common	clone3				sys_clone3
+436	common	close_range			sys_close_range
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+443	common	quotactl_fd			sys_quotactl_fd
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448	common	process_mrelease		sys_process_mrelease
+449	common  futex_waitv                     sys_futex_waitv
+450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
+451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
+453	common	map_shadow_stack		sys_map_shadow_stack
+454	common	futex_wake			sys_futex_wake
+455	common	futex_wait			sys_futex_wait
+456	common	futex_requeue			sys_futex_requeue
+457	common	statmount			sys_statmount
+458	common	listmount			sys_listmount
+459	common	lsm_get_self_attr		sys_lsm_get_self_attr
+460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+461	common	lsm_list_modules		sys_lsm_list_modules
+462	common	mseal 				sys_mseal
+463	common	setxattrat			sys_setxattrat
+464	common	getxattrat			sys_getxattrat
+465	common	listxattrat			sys_listxattrat
+466	common	removexattrat			sys_removexattrat
diff --git a/tools/perf/arch/xtensa/include/syscall_table.h b/tools/perf/arch/xtensa/include/syscall_table.h
new file mode 100644
index 000000000000..4c942821662d
--- /dev/null
+++ b/tools/perf/arch/xtensa/include/syscall_table.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/syscalls_32.h>
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index ef5c4257844d..20fe4f72b4af 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -420,7 +420,12 @@ static int cmpworker(const void *p1, const void *p2)
 
 	struct worker *w1 = (struct worker *) p1;
 	struct worker *w2 = (struct worker *) p2;
-	return w1->tid > w2->tid;
+
+	if (w1->tid > w2->tid)
+		return 1;
+	if (w1->tid < w2->tid)
+		return -1;
+	return 0;
 }
 
 int bench_epoll_wait(int argc, const char **argv)
diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c
index a759eb2328be..f55c07e4be94 100644
--- a/tools/perf/bench/inject-buildid.c
+++ b/tools/perf/bench/inject-buildid.c
@@ -52,7 +52,7 @@ struct bench_dso {
 static int nr_dsos;
 static struct bench_dso *dsos;
 
-extern int cmd_inject(int argc, const char *argv[]);
+extern int main(int argc, const char **argv);
 
 static const struct option options[] = {
 	OPT_UINTEGER('i', "iterations", &iterations,
@@ -294,7 +294,7 @@ static int setup_injection(struct bench_data *data, bool build_id_all)
 
 	if (data->pid == 0) {
 		const char **inject_argv;
-		int inject_argc = 2;
+		int inject_argc = 3;
 
 		close(data->input_pipe[1]);
 		close(data->output_pipe[0]);
@@ -318,15 +318,16 @@ static int setup_injection(struct bench_data *data, bool build_id_all)
 		if (inject_argv == NULL)
 			exit(1);
 
-		inject_argv[0] = strdup("inject");
-		inject_argv[1] = strdup("-b");
+		inject_argv[0] = strdup("perf");
+		inject_argv[1] = strdup("inject");
+		inject_argv[2] = strdup("-b");
 		if (build_id_all)
-			inject_argv[2] = strdup("--buildid-all");
+			inject_argv[3] = strdup("--buildid-all");
 
 		/* signal that we're ready to go */
 		close(ready_pipe[1]);
 
-		cmd_inject(inject_argc, inject_argv);
+		main(inject_argc, inject_argv);
 
 		exit(0);
 	}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index bb87e6e7687d..836ae0122dab 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -7,6 +7,7 @@
  * a histogram of results, along various sorting keys.
  */
 #include "builtin.h"
+#include "perf.h"
 
 #include "util/color.h"
 #include <linux/list.h>
diff --git a/tools/perf/builtin-check.c b/tools/perf/builtin-check.c
index 2346536a5ee1..61a11a9b4e75 100644
--- a/tools/perf/builtin-check.c
+++ b/tools/perf/builtin-check.c
@@ -31,7 +31,6 @@ struct feature_status supported_features[] = {
 	FEATURE_STATUS("dwarf_getlocations", HAVE_LIBDW_SUPPORT),
 	FEATURE_STATUS("dwarf-unwind", HAVE_DWARF_UNWIND_SUPPORT),
 	FEATURE_STATUS("auxtrace", HAVE_AUXTRACE_SUPPORT),
-	FEATURE_STATUS("libaudit", HAVE_LIBAUDIT_SUPPORT),
 	FEATURE_STATUS("libbfd", HAVE_LIBBFD_SUPPORT),
 	FEATURE_STATUS("libcapstone", HAVE_LIBCAPSTONE_SUPPORT),
 	FEATURE_STATUS("libcrypto", HAVE_LIBCRYPTO_SUPPORT),
@@ -47,7 +46,6 @@ struct feature_status supported_features[] = {
 	FEATURE_STATUS("libunwind", HAVE_LIBUNWIND_SUPPORT),
 	FEATURE_STATUS("lzma", HAVE_LZMA_SUPPORT),
 	FEATURE_STATUS("numa_num_possible_cpus", HAVE_LIBNUMA_SUPPORT),
-	FEATURE_STATUS("syscall_table", HAVE_SYSCALL_TABLE_SUPPORT),
 	FEATURE_STATUS("zlib", HAVE_ZLIB_SUPPORT),
 	FEATURE_STATUS("zstd", HAVE_ZSTD_SUPPORT),
 
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index 2e8363778935..45b5312fbe83 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c
@@ -154,6 +154,44 @@ static int parse_config_arg(char *arg, char **var, char **value)
 	return 0;
 }
 
+int perf_config__set_variable(const char *var, const char *value)
+{
+	char path[PATH_MAX];
+	char *user_config = mkpath(path, sizeof(path), "%s/.perfconfig", getenv("HOME"));
+	const char *config_filename;
+	struct perf_config_set *set;
+	int ret = -1;
+
+	if (use_system_config)
+		config_exclusive_filename = perf_etc_perfconfig();
+	else if (use_user_config)
+		config_exclusive_filename = user_config;
+
+	if (!config_exclusive_filename)
+		config_filename = user_config;
+	else
+		config_filename = config_exclusive_filename;
+
+	set = perf_config_set__new();
+	if (!set)
+		goto out_err;
+
+	if (perf_config_set__collect(set, config_filename, var, value) < 0) {
+		pr_err("Failed to add '%s=%s'\n", var, value);
+		goto out_err;
+	}
+
+	if (set_config(set, config_filename) < 0) {
+		pr_err("Failed to set the configs on %s\n", config_filename);
+		goto out_err;
+	}
+
+	ret = 0;
+out_err:
+	perf_config_set__delete(set);
+	return ret;
+}
+
 int cmd_config(int argc, const char **argv)
 {
 	int i, ret = -1;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 82fb7773e03e..ae490d58af92 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -6,6 +6,7 @@
  * DSOs and symbol information, sort them and produce a diff.
  */
 #include "builtin.h"
+#include "perf.h"
 
 #include "util/debug.h"
 #include "util/event.h"
@@ -1019,12 +1020,12 @@ static int process_base_stream(struct data__file *data_base,
 			continue;
 
 		es_base = evsel_streams__entry(data_base->evlist_streams,
-					       evsel_base->core.idx);
+					       evsel_base);
 		if (!es_base)
 			return -1;
 
 		es_pair = evsel_streams__entry(data_pair->evlist_streams,
-					       evsel_pair->core.idx);
+					       evsel_pair);
 		if (!es_pair)
 			return -1;
 
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index a56cf8b0a7d4..cfd770ec7286 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -43,6 +43,8 @@
 static volatile sig_atomic_t workload_exec_errno;
 static volatile sig_atomic_t done;
 
+static struct stats latency_stats;  /* for tracepoints */
+
 static void sig_handler(int sig __maybe_unused)
 {
 	done = true;
@@ -726,9 +728,11 @@ out:
 	return (done && !workload_exec_errno) ? 0 : -1;
 }
 
-static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
-			   bool use_nsec)
+static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
+			   char *buf, size_t len, char *linebuf)
 {
+	int min_latency = ftrace->min_latency;
+	int max_latency = ftrace->max_latency;
 	char *p, *q;
 	char *unit;
 	double num;
@@ -774,16 +778,34 @@ static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf,
 		if (!unit || strncmp(unit, " us", 3))
 			goto next;
 
-		if (use_nsec)
+		if (ftrace->use_nsec)
 			num *= 1000;
 
-		i = log2(num);
-		if (i < 0)
-			i = 0;
+		i = 0;
+		if (num < min_latency)
+			goto do_inc;
+
+		num -= min_latency;
+
+		if (!ftrace->bucket_range) {
+			i = log2(num);
+			if (i < 0)
+				i = 0;
+		} else {
+			// Less than 1 unit (ms or ns), or, in the future,
+			// than the min latency desired.
+			if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ]
+				i = num / ftrace->bucket_range + 1;
+			if (num >= max_latency - min_latency)
+				i = NUM_BUCKET -1;
+		}
 		if (i >= NUM_BUCKET)
 			i = NUM_BUCKET - 1;
 
+		num += min_latency;
+do_inc:
 		buckets[i]++;
+		update_stats(&latency_stats, num);
 
 next:
 		/* empty the line buffer for the next output  */
@@ -794,8 +816,10 @@ next:
 	strcat(linebuf, p);
 }
 
-static void display_histogram(int buckets[], bool use_nsec)
+static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
 {
+	int min_latency = ftrace->min_latency;
+	bool use_nsec = ftrace->use_nsec;
 	int i;
 	int total = 0;
 	int bar_total = 46;  /* to fit in 80 column */
@@ -814,30 +838,74 @@ static void display_histogram(int buckets[], bool use_nsec)
 	       "  DURATION    ", "COUNT", bar_total, "GRAPH");
 
 	bar_len = buckets[0] * bar_total / total;
-	printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
-	       0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
+
+	printf("  %4d - %4d %s | %10d | %.*s%*s |\n",
+	       0, min_latency ?: 1, use_nsec ? "ns" : "us",
+	       buckets[0], bar_len, bar, bar_total - bar_len, "");
 
 	for (i = 1; i < NUM_BUCKET - 1; i++) {
-		int start = (1 << (i - 1));
-		int stop = 1 << i;
+		unsigned int start, stop;
 		const char *unit = use_nsec ? "ns" : "us";
 
-		if (start >= 1024) {
-			start >>= 10;
-			stop >>= 10;
-			unit = use_nsec ? "us" : "ms";
+		if (!ftrace->bucket_range) {
+			start = (1 << (i - 1));
+			stop  = 1 << i;
+
+			if (start >= 1024) {
+				start >>= 10;
+				stop >>= 10;
+				unit = use_nsec ? "us" : "ms";
+			}
+		} else {
+			start = (i - 1) * ftrace->bucket_range + min_latency;
+			stop  = i * ftrace->bucket_range + min_latency;
+
+			if (start >= ftrace->max_latency)
+				break;
+			if (stop > ftrace->max_latency)
+				stop = ftrace->max_latency;
+
+			if (start >= 1000) {
+				double dstart = start / 1000.0,
+				       dstop  = stop / 1000.0;
+				printf("  %4.2f - %-4.2f", dstart, dstop);
+				unit = use_nsec ? "us" : "ms";
+				goto print_bucket_info;
+			}
 		}
+
+		printf("  %4d - %4d", start, stop);
+print_bucket_info:
 		bar_len = buckets[i] * bar_total / total;
-		printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
-		       start, stop, unit, buckets[i], bar_len, bar,
+		printf(" %s | %10d | %.*s%*s |\n", unit, buckets[i], bar_len, bar,
 		       bar_total - bar_len, "");
 	}
 
 	bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
-	printf("  %4d - %-4s %s | %10d | %.*s%*s |\n",
-	       1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
+	if (!ftrace->bucket_range) {
+		printf("  %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s ");
+	} else {
+		unsigned int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range + min_latency;
+		if (upper_outlier > ftrace->max_latency)
+			upper_outlier = ftrace->max_latency;
+
+		if (upper_outlier >= 1000) {
+			double dstart = upper_outlier / 1000.0;
+
+			printf("  %4.2f - %-4s %s", dstart, "...", use_nsec ? "us" : "ms");
+		} else {
+			printf("  %4d - %4s %s", upper_outlier, "...", use_nsec ? "ns" : "us");
+		}
+	}
+	printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1],
 	       bar_len, bar, bar_total - bar_len, "");
 
+	printf("\n# statistics  (in %s)\n", ftrace->use_nsec ? "nsec" : "usec");
+	printf("  total time: %20.0f\n", latency_stats.mean * latency_stats.n);
+	printf("    avg time: %20.0f\n", latency_stats.mean);
+	printf("    max time: %20"PRIu64"\n", latency_stats.max);
+	printf("    min time: %20"PRIu64"\n", latency_stats.min);
+	printf("       count: %20.0f\n", latency_stats.n);
 }
 
 static int prepare_func_latency(struct perf_ftrace *ftrace)
@@ -876,6 +944,8 @@ static int prepare_func_latency(struct perf_ftrace *ftrace)
 	if (fd < 0)
 		pr_err("failed to open trace_pipe\n");
 
+	init_stats(&latency_stats);
+
 	put_tracing_file(trace_file);
 	return fd;
 }
@@ -905,7 +975,7 @@ static int stop_func_latency(struct perf_ftrace *ftrace)
 static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
 {
 	if (ftrace->target.use_bpf)
-		return perf_ftrace__latency_read_bpf(ftrace, buckets);
+		return perf_ftrace__latency_read_bpf(ftrace, buckets, &latency_stats);
 
 	return 0;
 }
@@ -951,7 +1021,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
 			if (n < 0)
 				break;
 
-			make_histogram(buckets, buf, n, line, ftrace->use_nsec);
+			make_histogram(ftrace, buckets, buf, n, line);
 		}
 	}
 
@@ -968,12 +1038,12 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
 		int n = read(trace_fd, buf, sizeof(buf) - 1);
 		if (n <= 0)
 			break;
-		make_histogram(buckets, buf, n, line, ftrace->use_nsec);
+		make_histogram(ftrace, buckets, buf, n, line);
 	}
 
 	read_func_latency(ftrace, buckets);
 
-	display_histogram(buckets, ftrace->use_nsec);
+	display_histogram(ftrace, buckets);
 
 out:
 	close(trace_fd);
@@ -996,6 +1066,7 @@ static int prepare_func_profile(struct perf_ftrace *ftrace)
 {
 	ftrace->tracer = "function_graph";
 	ftrace->graph_tail = 1;
+	ftrace->graph_verbose = 0;
 
 	ftrace->profile_hash = hashmap__new(profile_hash, profile_equal, NULL);
 	if (ftrace->profile_hash == NULL)
@@ -1558,6 +1629,12 @@ int cmd_ftrace(int argc, const char **argv)
 #endif
 	OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec,
 		    "Use nano-second histogram"),
+	OPT_UINTEGER(0, "bucket-range", &ftrace.bucket_range,
+		    "Bucket range in ms or ns (-n/--use-nsec), default is log2() mode"),
+	OPT_UINTEGER(0, "min-latency", &ftrace.min_latency,
+		    "Minimum latency (1st bucket). Works only with --bucket-range."),
+	OPT_UINTEGER(0, "max-latency", &ftrace.max_latency,
+		    "Maximum latency (last bucket). Works only with --bucket-range and total buckets less than 22."),
 	OPT_PARENT(common_options),
 	};
 	const struct option profile_options[] = {
@@ -1576,6 +1653,9 @@ int cmd_ftrace(int argc, const char **argv)
 	OPT_CALLBACK('s', "sort", &profile_sort, "key",
 		     "Sort result by key: total (default), avg, max, count, name.",
 		     parse_sort_key),
+	OPT_CALLBACK(0, "graph-opts", &ftrace, "options",
+		     "Graph tracer options, available options: nosleep-time,noirqs,thresh=<n>,depth=<n>",
+		     parse_graph_tracer_opts),
 	OPT_PARENT(common_options),
 	};
 	const struct option *options = ftrace_options;
@@ -1653,6 +1733,29 @@ int cmd_ftrace(int argc, const char **argv)
 			ret = -EINVAL;
 			goto out_delete_filters;
 		}
+		if (!ftrace.bucket_range && ftrace.min_latency) {
+			pr_err("--min-latency works only with --bucket-range\n");
+			parse_options_usage(ftrace_usage, options,
+					    "min-latency", /*short_opt=*/false);
+			ret = -EINVAL;
+			goto out_delete_filters;
+		}
+		if (ftrace.bucket_range && !ftrace.min_latency) {
+			/* default min latency should be the bucket range */
+			ftrace.min_latency = ftrace.bucket_range;
+		}
+		if (!ftrace.bucket_range && ftrace.max_latency) {
+			pr_err("--max-latency works only with --bucket-range\n");
+			parse_options_usage(ftrace_usage, options,
+					    "max-latency", /*short_opt=*/false);
+			ret = -EINVAL;
+			goto out_delete_filters;
+		}
+		if (ftrace.bucket_range && !ftrace.max_latency) {
+			/* default max latency should depend on bucket range and num_buckets */
+			ftrace.max_latency = (NUM_BUCKET - 2) * ftrace.bucket_range +
+						ftrace.min_latency;
+		}
 		cmd_func = __cmd_latency;
 		break;
 	case PERF_FTRACE_PROFILE:
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 0854d3cd9f6a..7be6fb6df595 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -447,9 +447,7 @@ int cmd_help(int argc, const char **argv)
 #ifdef HAVE_LIBELF_SUPPORT
 		"probe",
 #endif
-#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
 		"trace",
-#endif
 	NULL };
 	const char *builtin_help_usage[] = {
 		"perf help [--all] [--man|--web|--info] [command]",
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index d6989195a061..11e49cafa3af 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2367,10 +2367,10 @@ int cmd_inject(int argc, const char **argv)
 	};
 	int ret;
 	const char *known_build_ids = NULL;
-	bool build_ids;
-	bool build_id_all;
-	bool mmap2_build_ids;
-	bool mmap2_build_id_all;
+	bool build_ids = false;
+	bool build_id_all = false;
+	bool mmap2_build_ids = false;
+	bool mmap2_build_id_all = false;
 
 	struct option options[] = {
 		OPT_BOOLEAN('b', "build-ids", &build_ids,
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 4d8d94146f8d..67fb1946ef13 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -761,6 +761,7 @@ static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample,
 	};
 	struct trace_seq seq;
 	char *str, *pos = NULL;
+	const struct tep_event *tp_format;
 
 	if (nr_gfps) {
 		struct gfp_flag key = {
@@ -772,8 +773,9 @@ static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample,
 	}
 
 	trace_seq_init(&seq);
-	tep_print_event(evsel->tp_format->tep,
-			&seq, &record, "%s", TEP_PRINT_INFO);
+	tp_format = evsel__tp_format(evsel);
+	if (tp_format)
+		tep_print_event(tp_format->tep, &seq, &record, "%s", TEP_PRINT_INFO);
 
 	str = strtok_r(seq.buffer, " ", &pos);
 	while (str) {
@@ -2012,13 +2014,13 @@ int cmd_kmem(int argc, const char **argv)
 
 	if (kmem_page) {
 		struct evsel *evsel = evlist__find_tracepoint_by_name(session->evlist, "kmem:mm_page_alloc");
+		const struct tep_event *tp_format = evsel ? evsel__tp_format(evsel) : NULL;
 
-		if (evsel == NULL) {
+		if (tp_format == NULL) {
 			pr_err(errmsg, "page", "page");
 			goto out_delete;
 		}
-
-		kmem_page_size = tep_get_page_size(evsel->tp_format->tep);
+		kmem_page_size = tep_get_page_size(tp_format->tep);
 		symbol_conf.use_callchain = true;
 	}
 
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 274568d712d1..67fd2b006b0b 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -615,67 +615,6 @@ static const char *get_filename_for_perf_kvm(void)
 
 #if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
 
-void exit_event_get_key(struct evsel *evsel,
-			struct perf_sample *sample,
-			struct event_key *key)
-{
-	key->info = 0;
-	key->key  = evsel__intval(evsel, sample, kvm_exit_reason);
-}
-
-bool kvm_exit_event(struct evsel *evsel)
-{
-	return evsel__name_is(evsel, kvm_exit_trace);
-}
-
-bool exit_event_begin(struct evsel *evsel,
-		      struct perf_sample *sample, struct event_key *key)
-{
-	if (kvm_exit_event(evsel)) {
-		exit_event_get_key(evsel, sample, key);
-		return true;
-	}
-
-	return false;
-}
-
-bool kvm_entry_event(struct evsel *evsel)
-{
-	return evsel__name_is(evsel, kvm_entry_trace);
-}
-
-bool exit_event_end(struct evsel *evsel,
-		    struct perf_sample *sample __maybe_unused,
-		    struct event_key *key __maybe_unused)
-{
-	return kvm_entry_event(evsel);
-}
-
-static const char *get_exit_reason(struct perf_kvm_stat *kvm,
-				   struct exit_reasons_table *tbl,
-				   u64 exit_code)
-{
-	while (tbl->reason != NULL) {
-		if (tbl->exit_code == exit_code)
-			return tbl->reason;
-		tbl++;
-	}
-
-	pr_err("unknown kvm exit code:%lld on %s\n",
-		(unsigned long long)exit_code, kvm->exit_reasons_isa);
-	return "UNKNOWN";
-}
-
-void exit_event_decode_key(struct perf_kvm_stat *kvm,
-			   struct event_key *key,
-			   char *decode)
-{
-	const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
-						  key->key);
-
-	scnprintf(decode, KVM_EVENT_NAME_LEN, "%s", exit_reason);
-}
-
 static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
 {
 	struct kvm_reg_events_ops *events_ops = kvm_reg_events_ops;
diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
index 8234410cba4c..c41a68d073de 100644
--- a/tools/perf/builtin-kwork.c
+++ b/tools/perf/builtin-kwork.c
@@ -6,6 +6,7 @@
  */
 
 #include "builtin.h"
+#include "perf.h"
 
 #include "util/data.h"
 #include "util/evlist.h"
@@ -1103,7 +1104,8 @@ static char *evsel__softirq_name(struct evsel *evsel, u64 num)
 	char *name = NULL;
 	bool found = false;
 	struct tep_print_flag_sym *sym = NULL;
-	struct tep_print_arg *args = evsel->tp_format->print_fmt.args;
+	const struct tep_event *tp_format = evsel__tp_format(evsel);
+	struct tep_print_arg *args = tp_format ? tp_format->print_fmt.args : NULL;
 
 	if ((args == NULL) || (args->next == NULL))
 		return NULL;
@@ -1846,7 +1848,7 @@ static void process_skipped_events(struct perf_kwork *kwork,
 	}
 }
 
-struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,
+static struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,
 				       struct kwork_class *class,
 				       struct kwork_work *key)
 {
@@ -2344,6 +2346,7 @@ int cmd_kwork(int argc, const char **argv)
 		.all_runtime         = 0,
 		.all_count           = 0,
 		.nr_skipped_events   = { 0 },
+		.add_work            = perf_kwork_add_work,
 	};
 	static const char default_report_sort_order[] = "runtime, max, count";
 	static const char default_latency_sort_order[] = "avg, max, count";
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 062e2b56a2ab..5d405cd8e696 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -46,15 +46,6 @@
 static struct perf_session *session;
 static struct target target;
 
-/* based on kernel/lockdep.c */
-#define LOCKHASH_BITS		12
-#define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
-
-static struct hlist_head *lockhash_table;
-
-#define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
-#define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
-
 static struct rb_root		thread_stats;
 
 static bool combine_locks;
@@ -67,24 +58,13 @@ static unsigned long bpf_map_entries = MAX_ENTRIES;
 static int max_stack_depth = CONTENTION_STACK_DEPTH;
 static int stack_skip = CONTENTION_STACK_SKIP;
 static int print_nr_entries = INT_MAX / 2;
-static LIST_HEAD(callstack_filters);
 static const char *output_name = NULL;
 static FILE *lock_output;
 
-struct callstack_filter {
-	struct list_head list;
-	char name[];
-};
-
 static struct lock_filter filters;
 
 static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
 
-static bool needs_callstack(void)
-{
-	return !list_empty(&callstack_filters);
-}
-
 static struct thread_stat *thread_stat_find(u32 tid)
 {
 	struct rb_node *node;
@@ -477,93 +457,6 @@ static struct lock_stat *pop_from_result(void)
 	return container_of(node, struct lock_stat, rb);
 }
 
-struct lock_stat *lock_stat_find(u64 addr)
-{
-	struct hlist_head *entry = lockhashentry(addr);
-	struct lock_stat *ret;
-
-	hlist_for_each_entry(ret, entry, hash_entry) {
-		if (ret->addr == addr)
-			return ret;
-	}
-	return NULL;
-}
-
-struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
-{
-	struct hlist_head *entry = lockhashentry(addr);
-	struct lock_stat *ret, *new;
-
-	hlist_for_each_entry(ret, entry, hash_entry) {
-		if (ret->addr == addr)
-			return ret;
-	}
-
-	new = zalloc(sizeof(struct lock_stat));
-	if (!new)
-		goto alloc_failed;
-
-	new->addr = addr;
-	new->name = strdup(name);
-	if (!new->name) {
-		free(new);
-		goto alloc_failed;
-	}
-
-	new->flags = flags;
-	new->wait_time_min = ULLONG_MAX;
-
-	hlist_add_head(&new->hash_entry, entry);
-	return new;
-
-alloc_failed:
-	pr_err("memory allocation failed\n");
-	return NULL;
-}
-
-bool match_callstack_filter(struct machine *machine, u64 *callstack)
-{
-	struct map *kmap;
-	struct symbol *sym;
-	u64 ip;
-	const char *arch = perf_env__arch(machine->env);
-
-	if (list_empty(&callstack_filters))
-		return true;
-
-	for (int i = 0; i < max_stack_depth; i++) {
-		struct callstack_filter *filter;
-
-		/*
-		 * In powerpc, the callchain saved by kernel always includes
-		 * first three entries as the NIP (next instruction pointer),
-		 * LR (link register), and the contents of LR save area in the
-		 * second stack frame. In certain scenarios its possible to have
-		 * invalid kernel instruction addresses in either LR or the second
-		 * stack frame's LR. In that case, kernel will store that address as
-		 * zero.
-		 *
-		 * The below check will continue to look into callstack,
-		 * incase first or second callstack index entry has 0
-		 * address for powerpc.
-		 */
-		if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
-						(i != 1 && i != 2))))
-			break;
-
-		ip = callstack[i];
-		sym = machine__find_kernel_symbol(machine, ip, &kmap);
-		if (sym == NULL)
-			continue;
-
-		list_for_each_entry(filter, &callstack_filters, list) {
-			if (strstr(sym->name, filter->name))
-				return true;
-		}
-	}
-	return false;
-}
-
 struct trace_lock_handler {
 	/* it's used on CONFIG_LOCKDEP */
 	int (*acquire_event)(struct evsel *evsel,
@@ -1165,7 +1058,7 @@ static int report_lock_contention_begin_event(struct evsel *evsel,
 		if (callstack == NULL)
 			return -ENOMEM;
 
-		if (!match_callstack_filter(machine, callstack)) {
+		if (!match_callstack_filter(machine, callstack, max_stack_depth)) {
 			free(callstack);
 			return 0;
 		}
@@ -1575,8 +1468,13 @@ static void sort_result(void)
 
 static const struct {
 	unsigned int flags;
-	const char *str;
-	const char *name;
+	/*
+	 * Name of the lock flags (access), with delimeter ':'.
+	 * For example, rwsem:R of rwsem:W.
+	 */
+	const char *flags_name;
+	/* Name of the lock (type), for example, rwlock or rwsem. */
+	const char *lock_name;
 } lock_type_table[] = {
 	{ 0,				"semaphore",	"semaphore" },
 	{ LCB_F_SPIN,			"spinlock",	"spinlock" },
@@ -1591,45 +1489,32 @@ static const struct {
 	{ LCB_F_PERCPU | LCB_F_WRITE,	"pcpu-sem:W",	"percpu-rwsem" },
 	{ LCB_F_MUTEX,			"mutex",	"mutex" },
 	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex",	"mutex" },
-	/* alias for get_type_flag() */
-	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex-spin",	"mutex" },
+	/* alias for optimistic spinning only */
+	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex:spin",	"mutex-spin" },
 };
 
-static const char *get_type_str(unsigned int flags)
+static const char *get_type_flags_name(unsigned int flags)
 {
-	flags &= LCB_F_MAX_FLAGS - 1;
+	flags &= LCB_F_TYPE_MASK;
 
 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
 		if (lock_type_table[i].flags == flags)
-			return lock_type_table[i].str;
+			return lock_type_table[i].flags_name;
 	}
 	return "unknown";
 }
 
-static const char *get_type_name(unsigned int flags)
+static const char *get_type_lock_name(unsigned int flags)
 {
-	flags &= LCB_F_MAX_FLAGS - 1;
+	flags &= LCB_F_TYPE_MASK;
 
 	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
 		if (lock_type_table[i].flags == flags)
-			return lock_type_table[i].name;
+			return lock_type_table[i].lock_name;
 	}
 	return "unknown";
 }
 
-static unsigned int get_type_flag(const char *str)
-{
-	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
-		if (!strcmp(lock_type_table[i].name, str))
-			return lock_type_table[i].flags;
-	}
-	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
-		if (!strcmp(lock_type_table[i].str, str))
-			return lock_type_table[i].flags;
-	}
-	return UINT_MAX;
-}
-
 static void lock_filter_finish(void)
 {
 	zfree(&filters.types);
@@ -1646,6 +1531,12 @@ static void lock_filter_finish(void)
 
 	zfree(&filters.cgrps);
 	filters.nr_cgrps = 0;
+
+	for (int i = 0; i < filters.nr_slabs; i++)
+		free(filters.slabs[i]);
+
+	zfree(&filters.slabs);
+	filters.nr_slabs = 0;
 }
 
 static void sort_contention_result(void)
@@ -1732,7 +1623,7 @@ static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat
 
 	switch (aggr_mode) {
 	case LOCK_AGGR_CALLER:
-		fprintf(lock_output, "  %10s   %s\n", get_type_str(st->flags), st->name);
+		fprintf(lock_output, "  %10s   %s\n", get_type_flags_name(st->flags), st->name);
 		break;
 	case LOCK_AGGR_TASK:
 		pid = st->addr;
@@ -1742,7 +1633,7 @@ static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat
 		break;
 	case LOCK_AGGR_ADDR:
 		fprintf(lock_output, "  %016llx   %s (%s)\n", (unsigned long long)st->addr,
-			st->name, get_type_name(st->flags));
+			st->name, get_type_lock_name(st->flags));
 		break;
 	case LOCK_AGGR_CGROUP:
 		fprintf(lock_output, "  %s\n", st->name);
@@ -1783,7 +1674,7 @@ static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *s
 
 	switch (aggr_mode) {
 	case LOCK_AGGR_CALLER:
-		fprintf(lock_output, "%s%s %s", get_type_str(st->flags), sep, st->name);
+		fprintf(lock_output, "%s%s %s", get_type_flags_name(st->flags), sep, st->name);
 		if (verbose <= 0)
 			fprintf(lock_output, "\n");
 		break;
@@ -1795,7 +1686,7 @@ static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *s
 		break;
 	case LOCK_AGGR_ADDR:
 		fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
-			st->name, sep, get_type_name(st->flags));
+			st->name, sep, get_type_lock_name(st->flags));
 		break;
 	case LOCK_AGGR_CGROUP:
 		fprintf(lock_output, "%s\n",st->name);
@@ -2150,7 +2041,8 @@ static int __cmd_contention(int argc, const char **argv)
 				goto out_delete;
 		}
 
-		if (lock_contention_prepare(&con) < 0) {
+		err = lock_contention_prepare(&con);
+		if (err < 0) {
 			pr_err("lock contention BPF setup failed\n");
 			goto out_delete;
 		}
@@ -2171,10 +2063,14 @@ static int __cmd_contention(int argc, const char **argv)
 		}
 	}
 
-	if (setup_output_field(true, output_fields))
+	err = setup_output_field(true, output_fields);
+	if (err) {
+		pr_err("Failed to setup output field\n");
 		goto out_delete;
+	}
 
-	if (select_key(true))
+	err = select_key(true);
+	if (err)
 		goto out_delete;
 
 	if (symbol_conf.field_sep) {
@@ -2350,29 +2246,61 @@ static int parse_lock_type(const struct option *opt __maybe_unused, const char *
 			   int unset __maybe_unused)
 {
 	char *s, *tmp, *tok;
-	int ret = 0;
 
 	s = strdup(str);
 	if (s == NULL)
 		return -1;
 
 	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
-		unsigned int flags = get_type_flag(tok);
+		bool found = false;
 
-		if (flags == -1U) {
-			pr_err("Unknown lock flags: %s\n", tok);
-			ret = -1;
-			break;
+		/* `tok` is a flags name if it contains ':'. */
+		if (strchr(tok, ':')) {
+			for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
+				if (!strcmp(lock_type_table[i].flags_name, tok) &&
+				    add_lock_type(lock_type_table[i].flags)) {
+					found = true;
+					break;
+				}
+			}
+
+			if (!found) {
+				pr_err("Unknown lock flags name: %s\n", tok);
+				free(s);
+				return -1;
+			}
+
+			continue;
 		}
 
-		if (!add_lock_type(flags)) {
-			ret = -1;
-			break;
+		/*
+		 * Otherwise `tok` is a lock name.
+		 * Single lock name could contain multiple flags.
+		 * Replace alias `pcpu-sem` with actual name `percpu-rwsem.
+		 */
+		if (!strcmp(tok, "pcpu-sem"))
+			tok = (char *)"percpu-rwsem";
+		for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
+			if (!strcmp(lock_type_table[i].lock_name, tok)) {
+				if (add_lock_type(lock_type_table[i].flags)) {
+					found = true;
+				} else {
+					free(s);
+					return -1;
+				}
+			}
 		}
+
+		if (!found) {
+			pr_err("Unknown lock name: %s\n", tok);
+			free(s);
+			return -1;
+		}
+
 	}
 
 	free(s);
-	return ret;
+	return 0;
 }
 
 static bool add_lock_addr(unsigned long addr)
@@ -2412,6 +2340,27 @@ static bool add_lock_sym(char *name)
 	return true;
 }
 
+static bool add_lock_slab(char *name)
+{
+	char **tmp;
+	char *sym = strdup(name);
+
+	if (sym == NULL) {
+		pr_err("Memory allocation failure\n");
+		return false;
+	}
+
+	tmp = realloc(filters.slabs, (filters.nr_slabs + 1) * sizeof(*filters.slabs));
+	if (tmp == NULL) {
+		pr_err("Memory allocation failure\n");
+		return false;
+	}
+
+	tmp[filters.nr_slabs++] = sym;
+	filters.slabs = tmp;
+	return true;
+}
+
 static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
 			   int unset __maybe_unused)
 {
@@ -2435,6 +2384,14 @@ static int parse_lock_addr(const struct option *opt __maybe_unused, const char *
 			continue;
 		}
 
+		if (*tok == '&') {
+			if (!add_lock_slab(tok + 1)) {
+				ret = -1;
+				break;
+			}
+			continue;
+		}
+
 		/*
 		 * At this moment, we don't have kernel symbols.  Save the symbols
 		 * in a separate list and resolve them to addresses later.
@@ -2449,34 +2406,6 @@ static int parse_lock_addr(const struct option *opt __maybe_unused, const char *
 	return ret;
 }
 
-static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
-			   int unset __maybe_unused)
-{
-	char *s, *tmp, *tok;
-	int ret = 0;
-
-	s = strdup(str);
-	if (s == NULL)
-		return -1;
-
-	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
-		struct callstack_filter *entry;
-
-		entry = malloc(sizeof(*entry) + strlen(tok) + 1);
-		if (entry == NULL) {
-			pr_err("Memory allocation failure\n");
-			free(s);
-			return -1;
-		}
-
-		strcpy(entry->name, tok);
-		list_add_tail(&entry->list, &callstack_filters);
-	}
-
-	free(s);
-	return ret;
-}
-
 static int parse_output(const struct option *opt __maybe_unused, const char *str,
 			int unset __maybe_unused)
 {
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 651188c1d825..99d5e1491a28 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -4,6 +4,7 @@
 #include <sys/stat.h>
 #include <unistd.h>
 #include "builtin.h"
+#include "perf.h"
 
 #include <subcmd/parse-options.h>
 #include "util/auxtrace.h"
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f83252472921..5db1aedf48df 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -860,7 +860,9 @@ static int record__auxtrace_init(struct record *rec)
 	if (err)
 		return err;
 
-	auxtrace_regroup_aux_output(rec->evlist);
+	err = auxtrace_parse_aux_action(rec->evlist);
+	if (err)
+		return err;
 
 	return auxtrace_parse_filters(rec->evlist);
 }
@@ -1748,10 +1750,8 @@ static void record__init_features(struct record *rec)
 	if (rec->no_buildid)
 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 
-#ifdef HAVE_LIBTRACEEVENT
 	if (!have_tracepoints(&rec->evlist->core.entries))
 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
-#endif
 
 	if (!rec->opts.branch_stack)
 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 048c91960ba9..f5fbd670d619 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -348,11 +348,9 @@ static int process_read_event(const struct perf_tool *tool,
 	struct report *rep = container_of(tool, struct report, tool);
 
 	if (rep->show_threads) {
-		const char *name = evsel__name(evsel);
 		int err = perf_read_values_add_value(&rep->show_threads_values,
 					   event->read.pid, event->read.tid,
-					   evsel->core.idx,
-					   name,
+					   evsel,
 					   event->read.value);
 
 		if (err)
@@ -1422,7 +1420,7 @@ int cmd_report(int argc, const char **argv)
 	OPT_STRING(0, "addr2line", &addr2line_path, "path",
 		   "addr2line binary to use for line numbers"),
 	OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
-		    "Disable symbol demangling"),
+		    "Symbol demangling. Enabled by default, use --no-demangle to disable."),
 	OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
 		    "Enable kernel symbol demangling"),
 	OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 7049c60ebf77..26ece6e9bfd1 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "builtin.h"
+#include "perf.h"
 #include "perf-sys.h"
 
 #include "util/cpumap.h"
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 9e47905f75a6..33667b534634 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -85,15 +85,12 @@ static bool			system_wide;
 static bool			print_flags;
 static const char		*cpu_list;
 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
-static struct perf_stat_config	stat_config;
 static int			max_blocks;
 static bool			native_arch;
 static struct dlfilter		*dlfilter;
 static int			dlargc;
 static char			**dlargv;
 
-unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
-
 enum perf_output_field {
 	PERF_OUTPUT_COMM            = 1ULL << 0,
 	PERF_OUTPUT_TID             = 1ULL << 1,
@@ -224,6 +221,10 @@ enum {
 	OUTPUT_TYPE_MAX
 };
 
+// We need to refactor the evsel->priv use in in 'perf script' to allow for
+// using that area, that is being used only in some cases.
+#define OUTPUT_TYPE_UNSET -1
+
 /* default set to maintain compatibility with current format */
 static struct {
 	bool user_set;
@@ -397,6 +398,14 @@ static inline int output_type(unsigned int type)
 	return OUTPUT_TYPE_OTHER;
 }
 
+static inline int evsel__output_type(struct evsel *evsel)
+{
+	if (evsel->script_output_type == OUTPUT_TYPE_UNSET)
+		evsel->script_output_type = output_type(evsel->core.attr.type);
+
+	return evsel->script_output_type;
+}
+
 static bool output_set_by_user(void)
 {
 	int j;
@@ -421,13 +430,13 @@ static const char *output_field2str(enum perf_output_field field)
 	return str;
 }
 
-#define PRINT_FIELD(x)  (output[output_type(attr->type)].fields & PERF_OUTPUT_##x)
+#define PRINT_FIELD(x)  (output[evsel__output_type(evsel)].fields & PERF_OUTPUT_##x)
 
 static int evsel__do_check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg,
 				 enum perf_output_field field, bool allow_user_set)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
-	int type = output_type(attr->type);
+	int type = evsel__output_type(evsel);
 	const char *evname;
 
 	if (attr->sample_type & sample_type)
@@ -461,7 +470,6 @@ static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *
 
 static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
 {
-	struct perf_event_attr *attr = &evsel->core.attr;
 	bool allow_user_set;
 
 	if (evsel__is_dummy_event(evsel))
@@ -578,9 +586,9 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
 	return 0;
 }
 
-static void set_print_ip_opts(struct perf_event_attr *attr)
+static void evsel__set_print_ip_opts(struct evsel *evsel)
 {
-	unsigned int type = output_type(attr->type);
+	unsigned int type = evsel__output_type(evsel);
 
 	output[type].print_ip_opts = 0;
 	if (PRINT_FIELD(IP))
@@ -610,7 +618,7 @@ static struct evsel *find_first_output_type(struct evlist *evlist,
 	evlist__for_each_entry(evlist, evsel) {
 		if (evsel__is_dummy_event(evsel))
 			continue;
-		if (output_type(evsel->core.attr.type) == (int)type)
+		if (evsel__output_type(evsel) == (int)type)
 			return evsel;
 	}
 	return NULL;
@@ -652,7 +660,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
 		if (output[j].fields & PERF_OUTPUT_DSOFF)
 			output[j].fields |= PERF_OUTPUT_DSO;
 
-		set_print_ip_opts(&evsel->core.attr);
+		evsel__set_print_ip_opts(evsel);
 		tod |= output[j].fields & PERF_OUTPUT_TOD;
 	}
 
@@ -688,7 +696,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
 				output[j].fields |= PERF_OUTPUT_SYM;
 				output[j].fields |= PERF_OUTPUT_SYMOFFSET;
 				output[j].fields |= PERF_OUTPUT_DSO;
-				set_print_ip_opts(&evsel->core.attr);
+				evsel__set_print_ip_opts(evsel);
 				goto out;
 			}
 		}
@@ -792,7 +800,6 @@ static int perf_sample__fprintf_start(struct perf_script *script,
 				      struct evsel *evsel,
 				      u32 type, FILE *fp)
 {
-	struct perf_event_attr *attr = &evsel->core.attr;
 	unsigned long secs;
 	unsigned long long nsecs;
 	int printed = 0;
@@ -944,7 +951,7 @@ static int print_bstack_flags(FILE *fp, struct branch_entry *br)
 
 static int perf_sample__fprintf_brstack(struct perf_sample *sample,
 					struct thread *thread,
-					struct perf_event_attr *attr, FILE *fp)
+					struct evsel *evsel, FILE *fp)
 {
 	struct branch_stack *br = sample->branch_stack;
 	struct branch_entry *entries = perf_sample__branch_entries(sample);
@@ -983,7 +990,7 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
 
 static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
 					   struct thread *thread,
-					   struct perf_event_attr *attr, FILE *fp)
+					   struct evsel *evsel, FILE *fp)
 {
 	struct branch_stack *br = sample->branch_stack;
 	struct branch_entry *entries = perf_sample__branch_entries(sample);
@@ -1021,7 +1028,7 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
 
 static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
 					   struct thread *thread,
-					   struct perf_event_attr *attr, FILE *fp)
+					   struct evsel *evsel, FILE *fp)
 {
 	struct branch_stack *br = sample->branch_stack;
 	struct branch_entry *entries = perf_sample__branch_entries(sample);
@@ -1188,7 +1195,7 @@ out:
 	return ret;
 }
 
-static int any_dump_insn(struct perf_event_attr *attr __maybe_unused,
+static int any_dump_insn(struct evsel *evsel __maybe_unused,
 			 struct perf_insn *x, uint64_t ip,
 			 u8 *inbuf, int inlen, int *lenp,
 			 FILE *fp)
@@ -1216,15 +1223,14 @@ static int add_padding(FILE *fp, int printed, int padding)
 static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
 			    struct perf_insn *x, u8 *inbuf, int len,
 			    int insn, FILE *fp, int *total_cycles,
-			    struct perf_event_attr *attr,
-			    struct thread *thread,
 			    struct evsel *evsel,
+			    struct thread *thread,
 			    u64 br_cntr)
 {
 	int ilen = 0;
 	int printed = fprintf(fp, "\t%016" PRIx64 "\t", ip);
 
-	printed += add_padding(fp, any_dump_insn(attr, x, ip, inbuf, len, &ilen, fp), 30);
+	printed += add_padding(fp, any_dump_insn(evsel, x, ip, inbuf, len, &ilen, fp), 30);
 	printed += fprintf(fp, "\t");
 
 	if (PRINT_FIELD(BRSTACKINSNLEN))
@@ -1280,7 +1286,7 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
 
 static int ip__fprintf_sym(uint64_t addr, struct thread *thread,
 			   u8 cpumode, int cpu, struct symbol **lastsym,
-			   struct perf_event_attr *attr, FILE *fp)
+			   struct evsel *evsel, FILE *fp)
 {
 	struct addr_location al;
 	int off, printed = 0, ret = 0;
@@ -1356,10 +1362,10 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 			machine, thread, &x.is64bit, &x.cpumode, false);
 	if (len > 0) {
 		printed += ip__fprintf_sym(entries[nr - 1].from, thread,
-					   x.cpumode, x.cpu, &lastsym, attr, fp);
+					   x.cpumode, x.cpu, &lastsym, evsel, fp);
 		printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1],
 					    &x, buffer, len, 0, fp, &total_cycles,
-					    attr, thread, evsel, br_cntr);
+					    evsel, thread, br_cntr);
 		if (PRINT_FIELD(SRCCODE))
 			printed += print_srccode(thread, x.cpumode, entries[nr - 1].from);
 	}
@@ -1387,19 +1393,19 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 		for (off = 0; off < (unsigned)len; off += ilen) {
 			uint64_t ip = start + off;
 
-			printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
+			printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, evsel, fp);
 			if (ip == end) {
 				if (PRINT_FIELD(BRCNTR) && sample->branch_stack_cntr)
 					br_cntr = sample->branch_stack_cntr[i];
 				printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp,
-							    &total_cycles, attr, thread, evsel, br_cntr);
+							    &total_cycles, evsel, thread, br_cntr);
 				if (PRINT_FIELD(SRCCODE))
 					printed += print_srccode(thread, x.cpumode, ip);
 				break;
 			} else {
 				ilen = 0;
 				printed += fprintf(fp, "\t%016" PRIx64 "\t", ip);
-				printed += any_dump_insn(attr, &x, ip, buffer + off, len - off, &ilen, fp);
+				printed += any_dump_insn(evsel, &x, ip, buffer + off, len - off, &ilen, fp);
 				if (PRINT_FIELD(BRSTACKINSNLEN))
 					printed += fprintf(fp, "\tilen: %d", ilen);
 				printed += fprintf(fp, "\n");
@@ -1438,7 +1444,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 		end = start + 128;
 	}
 	len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true);
-	printed += ip__fprintf_sym(start, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
+	printed += ip__fprintf_sym(start, thread, x.cpumode, x.cpu, &lastsym, evsel, fp);
 	if (len <= 0) {
 		/* Print at least last IP if basic block did not work */
 		len = grab_bb(buffer, sample->ip, sample->ip,
@@ -1447,7 +1453,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 			goto out;
 		ilen = 0;
 		printed += fprintf(fp, "\t%016" PRIx64 "\t", sample->ip);
-		printed += any_dump_insn(attr, &x, sample->ip, buffer, len, &ilen, fp);
+		printed += any_dump_insn(evsel, &x, sample->ip, buffer, len, &ilen, fp);
 		if (PRINT_FIELD(BRSTACKINSNLEN))
 			printed += fprintf(fp, "\tilen: %d", ilen);
 		printed += fprintf(fp, "\n");
@@ -1458,7 +1464,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 	for (off = 0; off <= end - start; off += ilen) {
 		ilen = 0;
 		printed += fprintf(fp, "\t%016" PRIx64 "\t", start + off);
-		printed += any_dump_insn(attr, &x, start + off, buffer + off, len - off, &ilen, fp);
+		printed += any_dump_insn(evsel, &x, start + off, buffer + off, len - off, &ilen, fp);
 		if (PRINT_FIELD(BRSTACKINSNLEN))
 			printed += fprintf(fp, "\tilen: %d", ilen);
 		printed += fprintf(fp, "\n");
@@ -1482,13 +1488,13 @@ out:
 
 static int perf_sample__fprintf_addr(struct perf_sample *sample,
 				     struct thread *thread,
-				     struct perf_event_attr *attr, FILE *fp)
+				     struct evsel *evsel, FILE *fp)
 {
 	struct addr_location al;
 	int printed = fprintf(fp, "%16" PRIx64, sample->addr);
 
 	addr_location__init(&al);
-	if (!sample_addr_correlates_sym(attr))
+	if (!sample_addr_correlates_sym(&evsel->core.attr))
 		goto out;
 
 	thread__resolve(thread, &al, sample);
@@ -1515,11 +1521,10 @@ static const char *resolve_branch_sym(struct perf_sample *sample,
 				      struct addr_location *addr_al,
 				      u64 *ip)
 {
-	struct perf_event_attr *attr = &evsel->core.attr;
 	const char *name = NULL;
 
 	if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
-		if (sample_addr_correlates_sym(attr)) {
+		if (sample_addr_correlates_sym(&evsel->core.attr)) {
 			if (!addr_al->thread)
 				thread__resolve(thread, addr_al, sample);
 			if (addr_al->sym)
@@ -1545,7 +1550,6 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 					   struct addr_location *addr_al,
 					   FILE *fp)
 {
-	struct perf_event_attr *attr = &evsel->core.attr;
 	size_t depth = thread_stack__depth(thread, sample->cpu);
 	const char *name = NULL;
 	static int spacing;
@@ -1589,19 +1593,6 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 	return len + dlen;
 }
 
-__weak void arch_fetch_insn(struct perf_sample *sample __maybe_unused,
-			    struct thread *thread __maybe_unused,
-			    struct machine *machine __maybe_unused)
-{
-}
-
-void script_fetch_insn(struct perf_sample *sample, struct thread *thread,
-		       struct machine *machine)
-{
-	if (sample->insn_len == 0 && native_arch)
-		arch_fetch_insn(sample, thread, machine);
-}
-
 static int perf_sample__fprintf_insn(struct perf_sample *sample,
 				     struct evsel *evsel,
 				     struct perf_event_attr *attr,
@@ -1611,7 +1602,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
 {
 	int printed = 0;
 
-	script_fetch_insn(sample, thread, machine);
+	script_fetch_insn(sample, thread, machine, native_arch);
 
 	if (PRINT_FIELD(INSNLEN))
 		printed += fprintf(fp, " ilen: %d", sample->insn_len);
@@ -1630,7 +1621,7 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
 }
 
 static int perf_sample__fprintf_ipc(struct perf_sample *sample,
-				    struct perf_event_attr *attr, FILE *fp)
+				    struct evsel *evsel, FILE *fp)
 {
 	unsigned int ipc;
 
@@ -1651,7 +1642,7 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
 				    struct machine *machine, FILE *fp)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
-	unsigned int type = output_type(attr->type);
+	unsigned int type = evsel__output_type(evsel);
 	bool print_srcline_last = false;
 	int printed = 0;
 
@@ -1688,10 +1679,10 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
 	    ((evsel->core.attr.sample_type & PERF_SAMPLE_ADDR) &&
 	     !output[type].user_set)) {
 		printed += fprintf(fp, " => ");
-		printed += perf_sample__fprintf_addr(sample, thread, attr, fp);
+		printed += perf_sample__fprintf_addr(sample, thread, evsel, fp);
 	}
 
-	printed += perf_sample__fprintf_ipc(sample, attr, fp);
+	printed += perf_sample__fprintf_ipc(sample, evsel, fp);
 
 	if (print_srcline_last)
 		printed += map__fprintf_srcline(al->map, al->addr, "\n  ", fp);
@@ -1709,87 +1700,6 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
 	return printed;
 }
 
-static struct {
-	u32 flags;
-	const char *name;
-} sample_flags[] = {
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "jcc"},
-	{PERF_IP_FLAG_BRANCH, "jmp"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT, "int"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT, "iret"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET, "syscall"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET, "sysret"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "async"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |	PERF_IP_FLAG_INTERRUPT, "hw int"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"},
-	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_BRANCH_MISS, "br miss"},
-	{0, NULL}
-};
-
-static const char *sample_flags_to_name(u32 flags)
-{
-	int i;
-
-	for (i = 0; sample_flags[i].name ; i++) {
-		if (sample_flags[i].flags == flags)
-			return sample_flags[i].name;
-	}
-
-	return NULL;
-}
-
-int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz)
-{
-	u32 xf = PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_INTR_DISABLE |
-		 PERF_IP_FLAG_INTR_TOGGLE;
-	const char *chars = PERF_IP_FLAG_CHARS;
-	const size_t n = strlen(PERF_IP_FLAG_CHARS);
-	const char *name = NULL;
-	size_t i, pos = 0;
-	char xs[16] = {0};
-
-	if (flags & xf)
-		snprintf(xs, sizeof(xs), "(%s%s%s)",
-			 flags & PERF_IP_FLAG_IN_TX ? "x" : "",
-			 flags & PERF_IP_FLAG_INTR_DISABLE ? "D" : "",
-			 flags & PERF_IP_FLAG_INTR_TOGGLE ? "t" : "");
-
-	name = sample_flags_to_name(flags & ~xf);
-	if (name)
-		return snprintf(str, sz, "%-15s%6s", name, xs);
-
-	if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
-		name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_BEGIN));
-		if (name)
-			return snprintf(str, sz, "tr strt %-7s%6s", name, xs);
-	}
-
-	if (flags & PERF_IP_FLAG_TRACE_END) {
-		name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_END));
-		if (name)
-			return snprintf(str, sz, "tr end  %-7s%6s", name, xs);
-	}
-
-	for (i = 0; i < n; i++, flags >>= 1) {
-		if ((flags & 1) && pos < sz)
-			str[pos++] = chars[i];
-	}
-	for (; i < 32; i++, flags >>= 1) {
-		if ((flags & 1) && pos < sz)
-			str[pos++] = '?';
-	}
-	if (pos < sz)
-		str[pos] = 0;
-
-	return pos;
-}
-
 static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
 {
 	char str[SAMPLE_FLAGS_BUF_SIZE];
@@ -2254,7 +2164,7 @@ static void process_event(struct perf_script *script,
 {
 	struct thread *thread = al->thread;
 	struct perf_event_attr *attr = &evsel->core.attr;
-	unsigned int type = output_type(attr->type);
+	unsigned int type = evsel__output_type(evsel);
 	struct evsel_script *es = evsel->priv;
 	FILE *fp = es->fp;
 	char str[PAGE_SIZE_NAME_LEN];
@@ -2289,15 +2199,20 @@ static void process_event(struct perf_script *script,
 	}
 #ifdef HAVE_LIBTRACEEVENT
 	if (PRINT_FIELD(TRACE) && sample->raw_data) {
-		event_format__fprintf(evsel->tp_format, sample->cpu,
-				      sample->raw_data, sample->raw_size, fp);
+		const struct tep_event *tp_format = evsel__tp_format(evsel);
+
+		if (tp_format) {
+			event_format__fprintf(tp_format, sample->cpu,
+					      sample->raw_data, sample->raw_size,
+					      fp);
+		}
 	}
 #endif
 	if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH))
 		perf_sample__fprintf_synth(sample, evsel, fp);
 
 	if (PRINT_FIELD(ADDR))
-		perf_sample__fprintf_addr(sample, thread, attr, fp);
+		perf_sample__fprintf_addr(sample, thread, evsel, fp);
 
 	if (PRINT_FIELD(DATA_SRC))
 		data_src__fprintf(sample->data_src, fp);
@@ -2347,11 +2262,11 @@ static void process_event(struct perf_script *script,
 		perf_sample__fprintf_uregs(sample, attr, arch, fp);
 
 	if (PRINT_FIELD(BRSTACK))
-		perf_sample__fprintf_brstack(sample, thread, attr, fp);
+		perf_sample__fprintf_brstack(sample, thread, evsel, fp);
 	else if (PRINT_FIELD(BRSTACKSYM))
-		perf_sample__fprintf_brstacksym(sample, thread, attr, fp);
+		perf_sample__fprintf_brstacksym(sample, thread, evsel, fp);
 	else if (PRINT_FIELD(BRSTACKOFF))
-		perf_sample__fprintf_brstackoff(sample, thread, attr, fp);
+		perf_sample__fprintf_brstackoff(sample, thread, evsel, fp);
 
 	if (evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		perf_sample__fprintf_bpf_output(sample, fp);
@@ -2366,7 +2281,7 @@ static void process_event(struct perf_script *script,
 	if (PRINT_FIELD(CODE_PAGE_SIZE))
 		fprintf(fp, " %s", get_page_size_name(sample->code_page_size, str));
 
-	perf_sample__fprintf_ipc(sample, attr, fp);
+	perf_sample__fprintf_ipc(sample, evsel, fp);
 
 	fprintf(fp, "\n");
 
@@ -2599,14 +2514,14 @@ static int process_attr(const struct perf_tool *tool, union perf_event *event,
 	     sample_type & PERF_SAMPLE_BRANCH_STACK ||
 	     (sample_type & PERF_SAMPLE_REGS_USER &&
 	      sample_type & PERF_SAMPLE_STACK_USER))) {
-		int type = output_type(evsel->core.attr.type);
+		int type = evsel__output_type(evsel);
 
 		if (!(output[type].user_unset_fields & PERF_OUTPUT_IP))
 			output[type].fields |= PERF_OUTPUT_IP;
 		if (!(output[type].user_unset_fields & PERF_OUTPUT_SYM))
 			output[type].fields |= PERF_OUTPUT_SYM;
 	}
-	set_print_ip_opts(&evsel->core.attr);
+	evsel__set_print_ip_opts(evsel);
 	return 0;
 }
 
@@ -2959,79 +2874,18 @@ static int __cmd_script(struct perf_script *script)
 	return ret;
 }
 
-struct script_spec {
-	struct list_head	node;
-	struct scripting_ops	*ops;
-	char			spec[];
-};
-
-static LIST_HEAD(script_specs);
-
-static struct script_spec *script_spec__new(const char *spec,
-					    struct scripting_ops *ops)
-{
-	struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1);
-
-	if (s != NULL) {
-		strcpy(s->spec, spec);
-		s->ops = ops;
-	}
-
-	return s;
-}
-
-static void script_spec__add(struct script_spec *s)
-{
-	list_add_tail(&s->node, &script_specs);
-}
-
-static struct script_spec *script_spec__find(const char *spec)
+static int list_available_languages_cb(struct scripting_ops *ops, const char *spec)
 {
-	struct script_spec *s;
-
-	list_for_each_entry(s, &script_specs, node)
-		if (strcasecmp(s->spec, spec) == 0)
-			return s;
-	return NULL;
-}
-
-int script_spec_register(const char *spec, struct scripting_ops *ops)
-{
-	struct script_spec *s;
-
-	s = script_spec__find(spec);
-	if (s)
-		return -1;
-
-	s = script_spec__new(spec, ops);
-	if (!s)
-		return -1;
-	else
-		script_spec__add(s);
-
+	fprintf(stderr, "  %-42s [%s]\n", spec, ops->name);
 	return 0;
 }
 
-static struct scripting_ops *script_spec__lookup(const char *spec)
-{
-	struct script_spec *s = script_spec__find(spec);
-	if (!s)
-		return NULL;
-
-	return s->ops;
-}
-
 static void list_available_languages(void)
 {
-	struct script_spec *s;
-
 	fprintf(stderr, "\n");
 	fprintf(stderr, "Scripting language extensions (used in "
 		"perf script -s [spec:]script.[spec]):\n\n");
-
-	list_for_each_entry(s, &script_specs, node)
-		fprintf(stderr, "  %-42s [%s]\n", s->spec, s->ops->name);
-
+	script_spec__for_each(&list_available_languages_cb);
 	fprintf(stderr, "\n");
 }
 
@@ -3523,144 +3377,6 @@ static void free_dlarg(void)
 	free(dlargv);
 }
 
-/*
- * Some scripts specify the required events in their "xxx-record" file,
- * this function will check if the events in perf.data match those
- * mentioned in the "xxx-record".
- *
- * Fixme: All existing "xxx-record" are all in good formats "-e event ",
- * which is covered well now. And new parsing code should be added to
- * cover the future complex formats like event groups etc.
- */
-static int check_ev_match(char *dir_name, char *scriptname,
-			struct perf_session *session)
-{
-	char filename[MAXPATHLEN], evname[128];
-	char line[BUFSIZ], *p;
-	struct evsel *pos;
-	int match, len;
-	FILE *fp;
-
-	scnprintf(filename, MAXPATHLEN, "%s/bin/%s-record", dir_name, scriptname);
-
-	fp = fopen(filename, "r");
-	if (!fp)
-		return -1;
-
-	while (fgets(line, sizeof(line), fp)) {
-		p = skip_spaces(line);
-		if (*p == '#')
-			continue;
-
-		while (strlen(p)) {
-			p = strstr(p, "-e");
-			if (!p)
-				break;
-
-			p += 2;
-			p = skip_spaces(p);
-			len = strcspn(p, " \t");
-			if (!len)
-				break;
-
-			snprintf(evname, len + 1, "%s", p);
-
-			match = 0;
-			evlist__for_each_entry(session->evlist, pos) {
-				if (evsel__name_is(pos, evname)) {
-					match = 1;
-					break;
-				}
-			}
-
-			if (!match) {
-				fclose(fp);
-				return -1;
-			}
-		}
-	}
-
-	fclose(fp);
-	return 0;
-}
-
-/*
- * Return -1 if none is found, otherwise the actual scripts number.
- *
- * Currently the only user of this function is the script browser, which
- * will list all statically runnable scripts, select one, execute it and
- * show the output in a perf browser.
- */
-int find_scripts(char **scripts_array, char **scripts_path_array, int num,
-		 int pathlen)
-{
-	struct dirent *script_dirent, *lang_dirent;
-	char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN];
-	DIR *scripts_dir, *lang_dir;
-	struct perf_session *session;
-	struct perf_data data = {
-		.path = input_name,
-		.mode = PERF_DATA_MODE_READ,
-	};
-	char *temp;
-	int i = 0;
-
-	session = perf_session__new(&data, NULL);
-	if (IS_ERR(session))
-		return PTR_ERR(session);
-
-	snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
-
-	scripts_dir = opendir(scripts_path);
-	if (!scripts_dir) {
-		perf_session__delete(session);
-		return -1;
-	}
-
-	for_each_lang(scripts_path, scripts_dir, lang_dirent) {
-		scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path,
-			  lang_dirent->d_name);
-#ifndef HAVE_LIBPERL_SUPPORT
-		if (strstr(lang_path, "perl"))
-			continue;
-#endif
-#ifndef HAVE_LIBPYTHON_SUPPORT
-		if (strstr(lang_path, "python"))
-			continue;
-#endif
-
-		lang_dir = opendir(lang_path);
-		if (!lang_dir)
-			continue;
-
-		for_each_script(lang_path, lang_dir, script_dirent) {
-			/* Skip those real time scripts: xxxtop.p[yl] */
-			if (strstr(script_dirent->d_name, "top."))
-				continue;
-			if (i >= num)
-				break;
-			snprintf(scripts_path_array[i], pathlen, "%s/%s",
-				lang_path,
-				script_dirent->d_name);
-			temp = strchr(script_dirent->d_name, '.');
-			snprintf(scripts_array[i],
-				(temp - script_dirent->d_name) + 1,
-				"%s", script_dirent->d_name);
-
-			if (check_ev_match(lang_path,
-					scripts_array[i], session))
-				continue;
-
-			i++;
-		}
-		closedir(lang_dir);
-	}
-
-	closedir(scripts_dir);
-	perf_session__delete(session);
-	return i;
-}
-
 static char *get_script_path(const char *script_root, const char *suffix)
 {
 	struct dirent *script_dirent, *lang_dirent;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index fdf5172646a5..77e327d4a9a7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -112,8 +112,6 @@ static struct target target = {
 	.uid	= UINT_MAX,
 };
 
-#define METRIC_ONLY_LEN 20
-
 static volatile sig_atomic_t	child_pid			= -1;
 static int			detailed_run			=  0;
 static bool			transaction_run;
@@ -151,21 +149,6 @@ static struct perf_stat		perf_stat;
 
 static volatile sig_atomic_t done = 0;
 
-static struct perf_stat_config stat_config = {
-	.aggr_mode		= AGGR_GLOBAL,
-	.aggr_level		= MAX_CACHE_LVL + 1,
-	.scale			= true,
-	.unit_width		= 4, /* strlen("unit") */
-	.run_count		= 1,
-	.metric_only_len	= METRIC_ONLY_LEN,
-	.walltime_nsecs_stats	= &walltime_nsecs_stats,
-	.ru_stats		= &ru_stats,
-	.big_num		= true,
-	.ctl_fd			= -1,
-	.ctl_fd_ack		= -1,
-	.iostat_run		= false,
-};
-
 /* Options set from the command line. */
 struct opt_aggr_mode {
 	bool node, socket, die, cluster, cache, core, thread, no_aggr;
@@ -1071,16 +1054,6 @@ static void sig_atexit(void)
 	kill(getpid(), signr);
 }
 
-void perf_stat__set_big_num(int set)
-{
-	stat_config.big_num = (set != 0);
-}
-
-void perf_stat__set_no_csv_summary(int set)
-{
-	stat_config.no_csv_summary = (set != 0);
-}
-
 static int stat__set_big_num(const struct option *opt __maybe_unused,
 			     const char *s __maybe_unused, int unset)
 {
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 724a79386321..4fd31d29b2ab 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -267,9 +267,9 @@ static void perf_top__show_details(struct perf_top *top)
 
 	if (top->evlist->enabled) {
 		if (top->zero)
-			symbol__annotate_zero_histogram(symbol, top->sym_evsel->core.idx);
+			symbol__annotate_zero_histogram(symbol, top->sym_evsel);
 		else
-			symbol__annotate_decay_histogram(symbol, top->sym_evsel->core.idx);
+			symbol__annotate_decay_histogram(symbol, top->sym_evsel);
 	}
 	if (more != 0)
 		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
@@ -809,7 +809,7 @@ static void perf_event__process_sample(const struct perf_tool *tool,
 		 * invalid --vmlinux ;-)
 		 */
 		if (!machine->kptr_restrict_warned && !top->vmlinux_warned &&
-		    __map__is_kernel(al.map) && map__has_symbols(al.map)) {
+		    __map__is_kernel(al.map) && !map__has_symbols(al.map)) {
 			if (symbol_conf.vmlinux_name) {
 				char serr[256];
 
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 6a1a128fe645..d466447ae928 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -389,7 +389,12 @@ static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel)
 	}
 
 	if (et->fmt == NULL) {
-		et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
+		const struct tep_event *tp_format = evsel__tp_format(evsel);
+
+		if (tp_format == NULL)
+			goto out_delete;
+
+		et->fmt = calloc(tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt));
 		if (et->fmt == NULL)
 			goto out_delete;
 	}
@@ -1108,7 +1113,6 @@ static bool syscall_arg__strtoul_btf_type(char *bf __maybe_unused, size_t size _
 	    .strtoul	= STUL_STRARRAY_FLAGS, \
 	    .parm	= &strarray__##array, }
 
-#include "trace/beauty/arch_errno_names.c"
 #include "trace/beauty/eventfd.c"
 #include "trace/beauty/futex_op.c"
 #include "trace/beauty/futex_val3.c"
@@ -2069,30 +2073,11 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 	const char *name = syscalltbl__name(trace->sctbl, id);
 	int err;
 
-#ifdef HAVE_SYSCALL_TABLE_SUPPORT
 	if (trace->syscalls.table == NULL) {
 		trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
 		if (trace->syscalls.table == NULL)
 			return -ENOMEM;
 	}
-#else
-	if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) {
-		// When using libaudit we don't know beforehand what is the max syscall id
-		struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
-
-		if (table == NULL)
-			return -ENOMEM;
-
-		// Need to memset from offset 0 and +1 members if brand new
-		if (trace->syscalls.table == NULL)
-			memset(table, 0, (id + 1) * sizeof(*sc));
-		else
-			memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
-
-		trace->syscalls.table	      = table;
-		trace->sctbl->syscalls.max_id = id;
-	}
-#endif
 	sc = trace->syscalls.table + id;
 	if (sc->nonexistent)
 		return -EEXIST;
@@ -2122,8 +2107,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 		return PTR_ERR(sc->tp_format);
 	}
 
+	/*
+	 * The tracepoint format contains __syscall_nr field, so it's one more
+	 * than the actual number of syscall arguments.
+	 */
 	if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ?
-					RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields))
+					RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields - 1))
 		return -ENOMEM;
 
 	sc->args = sc->tp_format->format.fields;
@@ -2154,8 +2143,12 @@ static int evsel__init_tp_arg_scnprintf(struct evsel *evsel, bool *use_btf)
 	struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
 
 	if (fmt != NULL) {
-		syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields, use_btf);
-		return 0;
+		const struct tep_event *tp_format = evsel__tp_format(evsel);
+
+		if (tp_format) {
+			syscall_arg_fmt__init_array(fmt, tp_format->format.fields, use_btf);
+			return 0;
+		}
 	}
 
 	return -ENOMEM;
@@ -2439,18 +2432,7 @@ static struct syscall *trace__syscall_info(struct trace *trace,
 
 	err = -EINVAL;
 
-#ifdef HAVE_SYSCALL_TABLE_SUPPORT
 	if (id > trace->sctbl->syscalls.max_id) {
-#else
-	if (id >= trace->sctbl->syscalls.max_id) {
-		/*
-		 * With libaudit we don't know beforehand what is the max_id,
-		 * so we let trace__read_syscall_info() figure that out as we
-		 * go on reading syscalls.
-		 */
-		err = trace__read_syscall_info(trace, id);
-		if (err)
-#endif
 		goto out_cant_read;
 	}
 
@@ -2581,7 +2563,6 @@ static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
 
 static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size)
 {
-	void *augmented_args = NULL;
 	/*
 	 * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
 	 * and there we get all 6 syscall args plus the tracepoint common fields
@@ -2599,10 +2580,24 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam
 	int args_size = raw_augmented_args_size ?: sc->args_size;
 
 	*augmented_args_size = sample->raw_size - args_size;
-	if (*augmented_args_size > 0)
-		augmented_args = sample->raw_data + args_size;
+	if (*augmented_args_size > 0) {
+		static uintptr_t argbuf[1024]; /* assuming single-threaded */
 
-	return augmented_args;
+		if ((size_t)(*augmented_args_size) > sizeof(argbuf))
+			return NULL;
+
+		/*
+		 * The perf ring-buffer is 8-byte aligned but sample->raw_data
+		 * is not because it's preceded by u32 size.  Later, beautifier
+		 * will use the augmented args with stricter alignments like in
+		 * some struct.  To make sure it's aligned, let's copy the args
+		 * into a static buffer as it's single-threaded for now.
+		 */
+		memcpy(argbuf, sample->raw_data + args_size, *augmented_args_size);
+
+		return argbuf;
+	}
+	return NULL;
 }
 
 static void syscall__exit(struct syscall *sc)
@@ -3027,7 +3022,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
 {
 	char bf[2048];
 	size_t size = sizeof(bf);
-	struct tep_format_field *field = evsel->tp_format->format.fields;
+	const struct tep_event *tp_format = evsel__tp_format(evsel);
+	struct tep_format_field *field = tp_format ? tp_format->format.fields : NULL;
 	struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel);
 	size_t printed = 0, btf_printed;
 	unsigned long val;
@@ -3145,11 +3141,13 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
 
 	if (evsel__is_bpf_output(evsel)) {
 		bpf_output__fprintf(trace, sample);
-	} else if (evsel->tp_format) {
-		if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
-		    trace__fprintf_sys_enter(trace, evsel, sample)) {
+	} else {
+		const struct tep_event *tp_format = evsel__tp_format(evsel);
+
+		if (tp_format && (strncmp(tp_format->name, "sys_enter_", 10) ||
+				  trace__fprintf_sys_enter(trace, evsel, sample))) {
 			if (trace->libtraceevent_print) {
-				event_format__fprintf(evsel->tp_format, sample->cpu,
+				event_format__fprintf(tp_format, sample->cpu,
 						      sample->raw_data, sample->raw_size,
 						      trace->output);
 			} else {
@@ -4077,17 +4075,23 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
 static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg,
 								   char **type)
 {
-	struct tep_format_field *field;
 	struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
+	const struct tep_event *tp_format;
 
-	if (evsel->tp_format == NULL || fmt == NULL)
+	if (!fmt)
 		return NULL;
 
-	for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt)
+	tp_format = evsel__tp_format(evsel);
+	if (!tp_format)
+		return NULL;
+
+	for (const struct tep_format_field *field = tp_format->format.fields; field;
+	     field = field->next, ++fmt) {
 		if (strcmp(field->name, arg) == 0) {
 			*type = field->type;
 			return fmt;
 		}
+	}
 
 	return NULL;
 }
@@ -4843,13 +4847,18 @@ static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name)
 		const struct syscall_fmt *scfmt = syscall_fmt__find(name);
 
 		if (scfmt) {
-			int skip = 0;
+			const struct tep_event *tp_format = evsel__tp_format(evsel);
 
-			if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 ||
-			    strcmp(evsel->tp_format->format.fields->name, "nr") == 0)
-				++skip;
+			if (tp_format) {
+				int skip = 0;
 
-			memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt));
+				if (strcmp(tp_format->format.fields->name, "__syscall_nr") == 0 ||
+				    strcmp(tp_format->format.fields->name, "nr") == 0)
+					++skip;
+
+				memcpy(fmt + skip, scfmt->arg,
+				       (tp_format->format.nr_fields - skip) * sizeof(*fmt));
+			}
 		}
 	}
 }
@@ -4859,10 +4868,16 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist, bool *use_btf)
 	struct evsel *evsel;
 
 	evlist__for_each_entry(evlist, evsel) {
-		if (evsel->priv || !evsel->tp_format)
+		const struct tep_event *tp_format;
+
+		if (evsel->priv)
+			continue;
+
+		tp_format = evsel__tp_format(evsel);
+		if (!tp_format)
 			continue;
 
-		if (strcmp(evsel->tp_format->system, "syscalls")) {
+		if (strcmp(tp_format->system, "syscalls")) {
 			evsel__init_tp_arg_scnprintf(evsel, use_btf);
 			continue;
 		}
@@ -4870,20 +4885,24 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist, bool *use_btf)
 		if (evsel__init_syscall_tp(evsel))
 			return -1;
 
-		if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
+		if (!strncmp(tp_format->name, "sys_enter_", 10)) {
 			struct syscall_tp *sc = __evsel__syscall_tp(evsel);
 
 			if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
 				return -1;
 
-			evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1);
-		} else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
+			evsel__set_syscall_arg_fmt(evsel,
+						   tp_format->name + sizeof("sys_enter_") - 1);
+		} else if (!strncmp(tp_format->name, "sys_exit_", 9)) {
 			struct syscall_tp *sc = __evsel__syscall_tp(evsel);
 
-			if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
+			if (__tp_field__init_uint(&sc->ret, sizeof(u64),
+						  sc->id.offset + sizeof(u64),
+						  evsel->needs_swap))
 				return -1;
 
-			evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1);
+			evsel__set_syscall_arg_fmt(evsel,
+						   tp_format->name + sizeof("sys_exit_") - 1);
 		}
 	}
 
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 94f4b3769bf7..a07e93c53848 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -2,10 +2,6 @@
 #ifndef BUILTIN_H
 #define BUILTIN_H
 
-#include <stddef.h>
-#include <linux/compiler.h>
-#include <tools/config.h>
-
 struct feature_status {
 	const char *name;
 	const char *macro;
@@ -56,6 +52,4 @@ int cmd_ftrace(int argc, const char **argv);
 int cmd_daemon(int argc, const char **argv);
 int cmd_kwork(int argc, const char **argv);
 
-int find_scripts(char **scripts_array, char **scripts_path_array, int num,
-		 int pathlen);
 #endif
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index a05c1c105c51..a4499e5a6f9c 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -26,8 +26,6 @@ FILES=(
   "include/linux/hash.h"
   "include/linux/list-sort.h"
   "include/uapi/linux/hw_breakpoint.h"
-  "arch/x86/include/asm/disabled-features.h"
-  "arch/x86/include/asm/required-features.h"
   "arch/x86/include/asm/cpufeatures.h"
   "arch/x86/include/asm/inat_types.h"
   "arch/x86/include/asm/emulate_prefix.h"
@@ -71,6 +69,7 @@ FILES=(
   "include/uapi/asm-generic/ioctls.h"
   "include/uapi/asm-generic/mman-common.h"
   "include/uapi/asm-generic/unistd.h"
+  "scripts/syscall.tbl"
 )
 
 declare -a SYNC_CHECK_FILES
@@ -201,6 +200,14 @@ check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscall
 check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl
 check_2 tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl
 check_2 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl
+check_2 tools/perf/arch/arm/entry/syscalls/syscall.tbl arch/arm/tools/syscall.tbl
+check_2 tools/perf/arch/sh/entry/syscalls/syscall.tbl arch/sh/kernel/syscalls/syscall.tbl
+check_2 tools/perf/arch/sparc/entry/syscalls/syscall.tbl arch/sparc/kernel/syscalls/syscall.tbl
+check_2 tools/perf/arch/xtensa/entry/syscalls/syscall.tbl arch/xtensa/kernel/syscalls/syscall.tbl
+check_2 tools/perf/arch/alpha/entry/syscalls/syscall.tbl arch/alpha/entry/syscalls/syscall.tbl
+check_2 tools/perf/arch/parisc/entry/syscalls/syscall.tbl arch/parisc/entry/syscalls/syscall.tbl
+check_2 tools/perf/arch/arm64/entry/syscalls/syscall_32.tbl arch/arm64/entry/syscalls/syscall_32.tbl
+check_2 tools/perf/arch/arm64/entry/syscalls/syscall_64.tbl arch/arm64/entry/syscalls/syscall_64.tbl
 
 for i in "${BEAUTY_FILES[@]}"
 do
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index a2987f2cfe1a..f0617cc41f5f 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -84,7 +84,7 @@ static struct cmd_struct commands[] = {
 #endif
 	{ "kvm",	cmd_kvm,	0 },
 	{ "test",	cmd_test,	0 },
-#if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT))
+#if defined(HAVE_LIBTRACEEVENT)
 	{ "trace",	cmd_trace,	0 },
 #endif
 	{ "inject",	cmd_inject,	0 },
@@ -514,10 +514,6 @@ int main(int argc, const char **argv)
 		fprintf(stderr,
 			"trace command not available: missing libtraceevent devel package at build time.\n");
 		goto out;
-#elif !defined(HAVE_LIBAUDIT_SUPPORT) && !defined(HAVE_SYSCALL_TABLE_SUPPORT)
-		fprintf(stderr,
-			"trace command not available: missing audit-libs devel package at build time.\n");
-		goto out;
 #else
 		setup_path();
 		argv[0] = "trace";
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index c004dd4e65a3..3cb40965549f 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -3,7 +3,7 @@
 #define _PERF_PERF_H
 
 #ifndef MAX_NR_CPUS
-#define MAX_NR_CPUS			2048
+#define MAX_NR_CPUS			4096
 #endif
 
 enum perf_affinity {
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json
index 4404b8e91690..7126fbf292e0 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json
@@ -5,7 +5,7 @@
     },
     {
         "ArchStdEvent": "EXC_RETURN",
-        "PublicDescription": "Counts any architecturally executed exception return instructions. Eg: AArch64: ERET"
+        "PublicDescription": "Counts any architecturally executed exception return instructions. For example: AArch64: ERET"
     },
     {
         "ArchStdEvent": "EXC_UNDEF",
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/general.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/general.json
index 428810f855b8..c5dcdcf43c58 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/general.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/general.json
@@ -5,6 +5,6 @@
     },
     {
         "ArchStdEvent": "CNT_CYCLES",
-        "PublicDescription": "Counts constant frequency cycles"
+        "PublicDescription": "Increments at a constant frequency equal to the rate of increment of the System Counter, CNTPCT_EL0."
     }
 ]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1d_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1d_cache.json
index da7c129f2569..799d106d5173 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1d_cache.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l1d_cache.json
@@ -1,11 +1,11 @@
 [
     {
         "ArchStdEvent": "L1D_CACHE_REFILL",
-        "PublicDescription": "Counts level 1 data cache refills caused by speculatively executed load or store operations that missed in the level 1 data cache. This event only counts one event per cache line. This event does not count cache line allocations from preload instructions or from hardware cache prefetching."
+        "PublicDescription": "Counts level 1 data cache refills caused by speculatively executed load or store operations that missed in the level 1 data cache. This event only counts one event per cache line."
     },
     {
         "ArchStdEvent": "L1D_CACHE",
-        "PublicDescription": "Counts level 1 data cache accesses from any load/store operations. Atomic operations that resolve in the CPUs caches (near atomic operations) count as both a write access and read access. Each access to a cache line is counted including the multiple accesses caused by single instructions such as LDM or STM. Each access to other level 1 data or unified memory structures, for example refill buffers, write buffers, and write-back buffers, are also counted."
+        "PublicDescription": "Counts level 1 data cache accesses from any load/store operations. Atomic operations that resolve in the CPUs caches (near atomic operations) counts as both a write access and read access. Each access to a cache line is counted including the multiple accesses caused by single instructions such as LDM or STM. Each access to other level 1 data or unified memory structures, for example refill buffers, write buffers, and write-back buffers, are also counted."
     },
     {
         "ArchStdEvent": "L1D_CACHE_WB",
@@ -17,7 +17,7 @@
     },
     {
         "ArchStdEvent": "L1D_CACHE_RD",
-        "PublicDescription": "Counts level 1 data cache accesses from any load operation. Atomic load operations that resolve in the CPUs caches count as both a write access and read access."
+        "PublicDescription": "Counts level 1 data cache accesses from any load operation. Atomic load operations that resolve in the CPUs caches counts as both a write access and read access."
     },
     {
         "ArchStdEvent": "L1D_CACHE_WR",
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l2_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l2_cache.json
index 0e31d0daf88b..ed8291ab9737 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l2_cache.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l2_cache.json
@@ -1,11 +1,11 @@
 [
     {
         "ArchStdEvent": "L2D_CACHE",
-        "PublicDescription": "Counts level 2 cache accesses. level 2 cache is a unified cache for data and instruction accesses. Accesses are for misses in the first level caches or translation resolutions due to accesses. This event also counts write back of dirty data from level 1 data cache to the L2 cache."
+        "PublicDescription": "Counts accesses to the level 2 cache due to data accesses. Level 2 cache is a unified cache for data and instruction accesses. Accesses are for misses in the first level data cache or translation resolutions due to accesses. This event also counts write back of dirty data from level 1 data cache to the L2 cache."
     },
     {
         "ArchStdEvent": "L2D_CACHE_REFILL",
-        "PublicDescription": "Counts cache line refills into the level 2 cache. level 2 cache is a unified cache for data and instruction accesses. Accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+        "PublicDescription": "Counts cache line refills into the level 2 cache. Level 2 cache is a unified cache for data and instruction accesses. Accesses are for misses in the level 1 data cache or translation resolutions due to accesses."
     },
     {
         "ArchStdEvent": "L2D_CACHE_WB",
@@ -13,23 +13,23 @@
     },
     {
         "ArchStdEvent": "L2D_CACHE_ALLOCATE",
-        "PublicDescription": "TBD"
+        "PublicDescription": "Counts level 2 cache line allocates that do not fetch data from outside the level 2 data or unified cache."
     },
     {
         "ArchStdEvent": "L2D_CACHE_RD",
-        "PublicDescription": "Counts level 2 cache accesses due to memory read operations. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+        "PublicDescription": "Counts level 2 data cache accesses due to memory read operations. Level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 data cache or translation resolutions due to accesses."
     },
     {
         "ArchStdEvent": "L2D_CACHE_WR",
-        "PublicDescription": "Counts level 2 cache accesses due to memory write operations. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+        "PublicDescription": "Counts level 2 cache accesses due to memory write operations. Level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 data cache or translation resolutions due to accesses."
     },
     {
         "ArchStdEvent": "L2D_CACHE_REFILL_RD",
-        "PublicDescription": "Counts refills for memory accesses due to memory read operation counted by L2D_CACHE_RD. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+        "PublicDescription": "Counts refills for memory accesses due to memory read operation counted by L2D_CACHE_RD. Level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 data cache or translation resolutions due to accesses."
     },
     {
         "ArchStdEvent": "L2D_CACHE_REFILL_WR",
-        "PublicDescription": "Counts refills for memory accesses due to memory write operation counted by L2D_CACHE_WR. level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 caches or translation resolutions due to accesses."
+        "PublicDescription": "Counts refills for memory accesses due to memory write operation counted by L2D_CACHE_WR. Level 2 cache is a unified cache for data and instruction accesses, accesses are for misses in the level 1 data cache or translation resolutions due to accesses."
     },
     {
         "ArchStdEvent": "L2D_CACHE_WB_VICTIM",
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l3_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l3_cache.json
index 45bfba532df7..4a2e72fc5ada 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l3_cache.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/l3_cache.json
@@ -9,11 +9,11 @@
     },
     {
         "ArchStdEvent": "L3D_CACHE",
-        "PublicDescription": "Counts level 3 cache accesses. level 3 cache is a unified cache for data and instruction accesses. Accesses are for misses in the lower level caches or translation resolutions due to accesses."
+        "PublicDescription": "Counts level 3 cache accesses. Level 3 cache is a unified cache for data and instruction accesses. Accesses are for misses in the lower level caches or translation resolutions due to accesses."
     },
     {
         "ArchStdEvent": "L3D_CACHE_RD",
-        "PublicDescription": "TBD"
+        "PublicDescription": "Counts level 3 cache accesses caused by any memory read operation. Level 3 cache is a unified cache for data and instruction accesses. Accesses are for misses in the lower level caches or translation resolutions due to accesses."
     },
     {
         "ArchStdEvent": "L3D_CACHE_LMISS_RD",
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/ll_cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/ll_cache.json
index bb712d57d58a..fd5a2e0099b8 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/ll_cache.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/ll_cache.json
@@ -1,10 +1,10 @@
 [
     {
         "ArchStdEvent": "LL_CACHE_RD",
-        "PublicDescription": "Counts read transactions that were returned from outside the core cluster. This event counts when the system register CPUECTLR.EXTLLC bit is set. This event counts read transactions returned from outside the core if those transactions are either hit in the system level cache or missed in the SLC and are returned from any other external sources."
+        "PublicDescription": "Counts read transactions that were returned from outside the core cluster. This event counts for external last level cache  when the system register CPUECTLR.EXTLLC bit is set, otherwise it counts for the L3 cache. This event counts read transactions returned from outside the core if those transactions are either hit in the system level cache or missed in the SLC and are returned from any other external sources."
     },
     {
         "ArchStdEvent": "LL_CACHE_MISS_RD",
-        "PublicDescription": "Counts read transactions that were returned from outside the core cluster but missed in the system level cache. This event counts when the system register CPUECTLR.EXTLLC bit is set. This event counts read transactions returned from outside the core if those transactions are missed in the System level Cache. The data source of the transaction is indicated by a field in the CHI transaction returning to the CPU. This event does not count reads caused by cache maintenance operations."
+        "PublicDescription": "Counts read transactions that were returned from outside the core cluster but missed in the system level cache. This event counts for external last level cache when the system register CPUECTLR.EXTLLC bit is set, otherwise it counts for L3 cache. This event counts read transactions returned from outside the core if those transactions are missed in the System level Cache. The data source of the transaction is indicated by a field in the CHI transaction returning to the CPU. This event does not count reads caused by cache maintenance operations."
     }
 ]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json
index 106a97f8b2e7..bb3491012a8f 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json
@@ -33,7 +33,7 @@
     },
     {
         "ArchStdEvent": "MEM_ACCESS_CHECKED",
-        "PublicDescription": "Counts the number of memory read and write accesses in a cycle that are tag checked by the Memory Tagging Extension (MTE)."
+        "PublicDescription": "Counts the number of memory read and write accesses counted by MEM_ACCESS that are tag checked by the Memory Tagging Extension (MTE). This event is implemented as the sum of MEM_ACCESS_CHECKED_RD and MEM_ACCESS_CHECKED_WR"
     },
     {
         "ArchStdEvent": "MEM_ACCESS_CHECKED_RD",
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json
index 5f449270b448..97d352f94323 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/metrics.json
@@ -5,7 +5,7 @@
     },
     {
         "MetricName": "backend_stalled_cycles",
-        "MetricExpr": "((STALL_BACKEND / CPU_CYCLES) * 100)",
+        "MetricExpr": "STALL_BACKEND / CPU_CYCLES * 100",
         "BriefDescription": "This metric is the percentage of cycles that were stalled due to resource constraints in the backend unit of the processor.",
         "MetricGroup": "Cycle_Accounting",
         "ScaleUnit": "1percent of cycles"
@@ -16,45 +16,45 @@
     },
     {
         "MetricName": "branch_misprediction_ratio",
-        "MetricExpr": "(BR_MIS_PRED_RETIRED / BR_RETIRED)",
+        "MetricExpr": "BR_MIS_PRED_RETIRED / BR_RETIRED",
         "BriefDescription": "This metric measures the ratio of branches mispredicted to the total number of branches architecturally executed. This gives an indication of the effectiveness of the branch prediction unit.",
         "MetricGroup": "Miss_Ratio;Branch_Effectiveness",
-        "ScaleUnit": "1per branch"
+        "ScaleUnit": "100percent of branches"
     },
     {
         "MetricName": "branch_mpki",
-        "MetricExpr": "((BR_MIS_PRED_RETIRED / INST_RETIRED) * 1000)",
+        "MetricExpr": "BR_MIS_PRED_RETIRED / INST_RETIRED * 1000",
         "BriefDescription": "This metric measures the number of branch mispredictions per thousand instructions executed.",
         "MetricGroup": "MPKI;Branch_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
         "MetricName": "branch_percentage",
-        "MetricExpr": "(((BR_IMMED_SPEC + BR_INDIRECT_SPEC) / INST_SPEC) * 100)",
+        "MetricExpr": "(BR_IMMED_SPEC + BR_INDIRECT_SPEC) / INST_SPEC * 100",
         "BriefDescription": "This metric measures branch operations as a percentage of operations speculatively executed.",
         "MetricGroup": "Operation_Mix",
         "ScaleUnit": "1percent of operations"
     },
     {
         "MetricName": "crypto_percentage",
-        "MetricExpr": "((CRYPTO_SPEC / INST_SPEC) * 100)",
+        "MetricExpr": "CRYPTO_SPEC / INST_SPEC * 100",
         "BriefDescription": "This metric measures crypto operations as a percentage of operations speculatively executed.",
         "MetricGroup": "Operation_Mix",
         "ScaleUnit": "1percent of operations"
     },
     {
         "MetricName": "dtlb_mpki",
-        "MetricExpr": "((DTLB_WALK / INST_RETIRED) * 1000)",
+        "MetricExpr": "DTLB_WALK / INST_RETIRED * 1000",
         "BriefDescription": "This metric measures the number of data TLB Walks per thousand instructions executed.",
         "MetricGroup": "MPKI;DTLB_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
         "MetricName": "dtlb_walk_ratio",
-        "MetricExpr": "(DTLB_WALK / L1D_TLB)",
+        "MetricExpr": "DTLB_WALK / L1D_TLB",
         "BriefDescription": "This metric measures the ratio of data TLB Walks to the total number of data TLB accesses. This gives an indication of the effectiveness of the data TLB accesses.",
         "MetricGroup": "Miss_Ratio;DTLB_Effectiveness",
-        "ScaleUnit": "1per TLB access"
+        "ScaleUnit": "100percent of TLB accesses"
     },
     {
         "ArchStdEvent": "frontend_bound",
@@ -62,147 +62,147 @@
     },
     {
         "MetricName": "frontend_stalled_cycles",
-        "MetricExpr": "((STALL_FRONTEND / CPU_CYCLES) * 100)",
+        "MetricExpr": "STALL_FRONTEND / CPU_CYCLES * 100",
         "BriefDescription": "This metric is the percentage of cycles that were stalled due to resource constraints in the frontend unit of the processor.",
         "MetricGroup": "Cycle_Accounting",
         "ScaleUnit": "1percent of cycles"
     },
     {
         "MetricName": "integer_dp_percentage",
-        "MetricExpr": "((DP_SPEC / INST_SPEC) * 100)",
+        "MetricExpr": "DP_SPEC / INST_SPEC * 100",
         "BriefDescription": "This metric measures scalar integer operations as a percentage of operations speculatively executed.",
         "MetricGroup": "Operation_Mix",
         "ScaleUnit": "1percent of operations"
     },
     {
         "MetricName": "ipc",
-        "MetricExpr": "(INST_RETIRED / CPU_CYCLES)",
+        "MetricExpr": "INST_RETIRED / CPU_CYCLES",
         "BriefDescription": "This metric measures the number of instructions retired per cycle.",
         "MetricGroup": "General",
         "ScaleUnit": "1per cycle"
     },
     {
         "MetricName": "itlb_mpki",
-        "MetricExpr": "((ITLB_WALK / INST_RETIRED) * 1000)",
+        "MetricExpr": "ITLB_WALK / INST_RETIRED * 1000",
         "BriefDescription": "This metric measures the number of instruction TLB Walks per thousand instructions executed.",
         "MetricGroup": "MPKI;ITLB_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
         "MetricName": "itlb_walk_ratio",
-        "MetricExpr": "(ITLB_WALK / L1I_TLB)",
+        "MetricExpr": "ITLB_WALK / L1I_TLB",
         "BriefDescription": "This metric measures the ratio of instruction TLB Walks to the total number of instruction TLB accesses. This gives an indication of the effectiveness of the instruction TLB accesses.",
         "MetricGroup": "Miss_Ratio;ITLB_Effectiveness",
-        "ScaleUnit": "1per TLB access"
+        "ScaleUnit": "100percent of TLB accesses"
     },
     {
         "MetricName": "l1d_cache_miss_ratio",
-        "MetricExpr": "(L1D_CACHE_REFILL / L1D_CACHE)",
+        "MetricExpr": "L1D_CACHE_REFILL / L1D_CACHE",
         "BriefDescription": "This metric measures the ratio of level 1 data cache accesses missed to the total number of level 1 data cache accesses. This gives an indication of the effectiveness of the level 1 data cache.",
         "MetricGroup": "Miss_Ratio;L1D_Cache_Effectiveness",
-        "ScaleUnit": "1per cache access"
+        "ScaleUnit": "100percent of cache accesses"
     },
     {
         "MetricName": "l1d_cache_mpki",
-        "MetricExpr": "((L1D_CACHE_REFILL / INST_RETIRED) * 1000)",
+        "MetricExpr": "L1D_CACHE_REFILL / INST_RETIRED * 1000",
         "BriefDescription": "This metric measures the number of level 1 data cache accesses missed per thousand instructions executed.",
         "MetricGroup": "MPKI;L1D_Cache_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
         "MetricName": "l1d_tlb_miss_ratio",
-        "MetricExpr": "(L1D_TLB_REFILL / L1D_TLB)",
+        "MetricExpr": "L1D_TLB_REFILL / L1D_TLB",
         "BriefDescription": "This metric measures the ratio of level 1 data TLB accesses missed to the total number of level 1 data TLB accesses. This gives an indication of the effectiveness of the level 1 data TLB.",
         "MetricGroup": "Miss_Ratio;DTLB_Effectiveness",
-        "ScaleUnit": "1per TLB access"
+        "ScaleUnit": "100percent of TLB accesses"
     },
     {
         "MetricName": "l1d_tlb_mpki",
-        "MetricExpr": "((L1D_TLB_REFILL / INST_RETIRED) * 1000)",
-        "BriefDescription": "This metric measures the number of level 1 instruction TLB accesses missed per thousand instructions executed.",
+        "MetricExpr": "L1D_TLB_REFILL / INST_RETIRED * 1000",
+        "BriefDescription": "This metric measures the number of level 1 data TLB accesses missed per thousand instructions executed.",
         "MetricGroup": "MPKI;DTLB_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
         "MetricName": "l1i_cache_miss_ratio",
-        "MetricExpr": "(L1I_CACHE_REFILL / L1I_CACHE)",
+        "MetricExpr": "L1I_CACHE_REFILL / L1I_CACHE",
         "BriefDescription": "This metric measures the ratio of level 1 instruction cache accesses missed to the total number of level 1 instruction cache accesses. This gives an indication of the effectiveness of the level 1 instruction cache.",
         "MetricGroup": "Miss_Ratio;L1I_Cache_Effectiveness",
-        "ScaleUnit": "1per cache access"
+        "ScaleUnit": "100percent of cache accesses"
     },
     {
         "MetricName": "l1i_cache_mpki",
-        "MetricExpr": "((L1I_CACHE_REFILL / INST_RETIRED) * 1000)",
+        "MetricExpr": "L1I_CACHE_REFILL / INST_RETIRED * 1000",
         "BriefDescription": "This metric measures the number of level 1 instruction cache accesses missed per thousand instructions executed.",
         "MetricGroup": "MPKI;L1I_Cache_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
         "MetricName": "l1i_tlb_miss_ratio",
-        "MetricExpr": "(L1I_TLB_REFILL / L1I_TLB)",
+        "MetricExpr": "L1I_TLB_REFILL / L1I_TLB",
         "BriefDescription": "This metric measures the ratio of level 1 instruction TLB accesses missed to the total number of level 1 instruction TLB accesses. This gives an indication of the effectiveness of the level 1 instruction TLB.",
         "MetricGroup": "Miss_Ratio;ITLB_Effectiveness",
-        "ScaleUnit": "1per TLB access"
+        "ScaleUnit": "100percent of TLB accesses"
     },
     {
         "MetricName": "l1i_tlb_mpki",
-        "MetricExpr": "((L1I_TLB_REFILL / INST_RETIRED) * 1000)",
+        "MetricExpr": "L1I_TLB_REFILL / INST_RETIRED * 1000",
         "BriefDescription": "This metric measures the number of level 1 instruction TLB accesses missed per thousand instructions executed.",
         "MetricGroup": "MPKI;ITLB_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
         "MetricName": "l2_cache_miss_ratio",
-        "MetricExpr": "(L2D_CACHE_REFILL / L2D_CACHE)",
+        "MetricExpr": "L2D_CACHE_REFILL / L2D_CACHE",
         "BriefDescription": "This metric measures the ratio of level 2 cache accesses missed to the total number of level 2 cache accesses. This gives an indication of the effectiveness of the level 2 cache, which is a unified cache that stores both data and instruction. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.",
         "MetricGroup": "Miss_Ratio;L2_Cache_Effectiveness",
-        "ScaleUnit": "1per cache access"
+        "ScaleUnit": "100percent of cache accesses"
     },
     {
         "MetricName": "l2_cache_mpki",
-        "MetricExpr": "((L2D_CACHE_REFILL / INST_RETIRED) * 1000)",
+        "MetricExpr": "L2D_CACHE_REFILL / INST_RETIRED * 1000",
         "BriefDescription": "This metric measures the number of level 2 unified cache accesses missed per thousand instructions executed. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a unified cache.",
         "MetricGroup": "MPKI;L2_Cache_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
         "MetricName": "l2_tlb_miss_ratio",
-        "MetricExpr": "(L2D_TLB_REFILL / L2D_TLB)",
+        "MetricExpr": "L2D_TLB_REFILL / L2D_TLB",
         "BriefDescription": "This metric measures the ratio of level 2 unified TLB accesses missed to the total number of level 2 unified TLB accesses. This gives an indication of the effectiveness of the level 2 TLB.",
         "MetricGroup": "Miss_Ratio;ITLB_Effectiveness;DTLB_Effectiveness",
-        "ScaleUnit": "1per TLB access"
+        "ScaleUnit": "100percent of TLB accesses"
     },
     {
         "MetricName": "l2_tlb_mpki",
-        "MetricExpr": "((L2D_TLB_REFILL / INST_RETIRED) * 1000)",
+        "MetricExpr": "L2D_TLB_REFILL / INST_RETIRED * 1000",
         "BriefDescription": "This metric measures the number of level 2 unified TLB accesses missed per thousand instructions executed.",
         "MetricGroup": "MPKI;ITLB_Effectiveness;DTLB_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
         "MetricName": "ll_cache_read_hit_ratio",
-        "MetricExpr": "((LL_CACHE_RD - LL_CACHE_MISS_RD) / LL_CACHE_RD)",
+        "MetricExpr": "(LL_CACHE_RD - LL_CACHE_MISS_RD) / LL_CACHE_RD",
         "BriefDescription": "This metric measures the ratio of last level cache read accesses hit in the cache to the total number of last level cache accesses. This gives an indication of the effectiveness of the last level cache for read traffic. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a system level cache.",
         "MetricGroup": "LL_Cache_Effectiveness",
-        "ScaleUnit": "1per cache access"
+        "ScaleUnit": "100percent of cache accesses"
     },
     {
         "MetricName": "ll_cache_read_miss_ratio",
-        "MetricExpr": "(LL_CACHE_MISS_RD / LL_CACHE_RD)",
+        "MetricExpr": "LL_CACHE_MISS_RD / LL_CACHE_RD",
         "BriefDescription": "This metric measures the ratio of last level cache read accesses missed to the total number of last level cache accesses. This gives an indication of the effectiveness of the last level cache for read traffic. Note that cache accesses in this cache are either data memory access or instruction fetch as this is a system level cache.",
         "MetricGroup": "Miss_Ratio;LL_Cache_Effectiveness",
-        "ScaleUnit": "1per cache access"
+        "ScaleUnit": "100percent of cache accesses"
     },
     {
         "MetricName": "ll_cache_read_mpki",
-        "MetricExpr": "((LL_CACHE_MISS_RD / INST_RETIRED) * 1000)",
+        "MetricExpr": "LL_CACHE_MISS_RD / INST_RETIRED * 1000",
         "BriefDescription": "This metric measures the number of last level cache read accesses missed per thousand instructions executed.",
         "MetricGroup": "MPKI;LL_Cache_Effectiveness",
         "ScaleUnit": "1MPKI"
     },
     {
         "MetricName": "load_percentage",
-        "MetricExpr": "((LD_SPEC / INST_SPEC) * 100)",
+        "MetricExpr": "LD_SPEC / INST_SPEC * 100",
         "BriefDescription": "This metric measures load operations as a percentage of operations speculatively executed.",
         "MetricGroup": "Operation_Mix",
         "ScaleUnit": "1percent of operations"
@@ -213,21 +213,21 @@
     },
     {
         "MetricName": "scalar_fp_percentage",
-        "MetricExpr": "((VFP_SPEC / INST_SPEC) * 100)",
+        "MetricExpr": "VFP_SPEC / INST_SPEC * 100",
         "BriefDescription": "This metric measures scalar floating point operations as a percentage of operations speculatively executed.",
         "MetricGroup": "Operation_Mix",
         "ScaleUnit": "1percent of operations"
     },
     {
         "MetricName": "simd_percentage",
-        "MetricExpr": "((ASE_SPEC / INST_SPEC) * 100)",
+        "MetricExpr": "ASE_SPEC / INST_SPEC * 100",
         "BriefDescription": "This metric measures advanced SIMD operations as a percentage of total operations speculatively executed.",
         "MetricGroup": "Operation_Mix",
         "ScaleUnit": "1percent of operations"
     },
     {
         "MetricName": "store_percentage",
-        "MetricExpr": "((ST_SPEC / INST_SPEC) * 100)",
+        "MetricExpr": "ST_SPEC / INST_SPEC * 100",
         "BriefDescription": "This metric measures store operations as a percentage of operations speculatively executed.",
         "MetricGroup": "Operation_Mix",
         "ScaleUnit": "1percent of operations"
@@ -300,5 +300,12 @@
         "MetricGroup": "Operation_Mix",
         "MetricName": "branch_indirect_spec_rate",
         "ScaleUnit": "100%"
+    },
+    {
+        "MetricName": "sve_all_percentage",
+        "MetricExpr": "SVE_INST_SPEC / INST_SPEC * 100",
+        "BriefDescription": "This metric measures scalable vector operations, including loads and stores, as a percentage of operations speculatively executed.",
+        "MetricGroup": "Operation_Mix",
+        "ScaleUnit": "1percent of operations"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/retired.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/retired.json
index f297b049b62f..337e6a916f2b 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/retired.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/retired.json
@@ -9,7 +9,7 @@
     },
     {
         "ArchStdEvent": "CID_WRITE_RETIRED",
-        "PublicDescription": "Counts architecturally executed writes to the CONTEXTIDR register, which usually contain the kernel PID and can be output with hardware trace."
+        "PublicDescription": "Counts architecturally executed writes to the CONTEXTIDR_EL1 register, which usually contain the kernel PID and can be output with hardware trace."
     },
     {
         "ArchStdEvent": "TTBR_WRITE_RETIRED",
@@ -17,7 +17,7 @@
     },
     {
         "ArchStdEvent": "BR_RETIRED",
-        "PublicDescription": "Counts architecturally executed branches, whether the branch is taken or not. Instructions that explicitly write to the PC are also counted."
+        "PublicDescription": "Counts architecturally executed branches, whether the branch is taken or not. Instructions that explicitly write to the PC are also counted. Note that exception generating instructions, exception return instructions and context synchronization instructions are not counted."
     },
     {
         "ArchStdEvent": "BR_MIS_PRED_RETIRED",
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spec_operation.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spec_operation.json
index 1af961f8a6c8..a7ea0d4c4ea4 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spec_operation.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spec_operation.json
@@ -5,7 +5,7 @@
     },
     {
         "ArchStdEvent": "BR_PRED",
-        "PublicDescription": "Counts branches speculatively executed and were predicted right."
+        "PublicDescription": "Counts all speculatively executed branches."
     },
     {
         "ArchStdEvent": "INST_SPEC",
@@ -29,7 +29,7 @@
     },
     {
         "ArchStdEvent": "LDREX_SPEC",
-        "PublicDescription": "Counts Load-Exclusive operations that have been speculatively executed. Eg: LDREX, LDX"
+        "PublicDescription": "Counts Load-Exclusive operations that have been speculatively executed. For example: LDREX, LDX"
     },
     {
         "ArchStdEvent": "STREX_PASS_SPEC",
@@ -73,15 +73,15 @@
     },
     {
         "ArchStdEvent": "BR_IMMED_SPEC",
-        "PublicDescription": "Counts immediate branch operations which are speculatively executed."
+        "PublicDescription": "Counts direct branch operations which are speculatively executed."
     },
     {
         "ArchStdEvent": "BR_RETURN_SPEC",
-        "PublicDescription": "Counts procedure return operations (RET) which are speculatively executed."
+        "PublicDescription": "Counts procedure return operations (RET, RETAA and RETAB) which are speculatively executed."
     },
     {
         "ArchStdEvent": "BR_INDIRECT_SPEC",
-        "PublicDescription": "Counts indirect branch operations including procedure returns, which are speculatively executed. This includes operations that force a software change of the PC, other than exception-generating operations.  Eg: BR Xn, RET"
+        "PublicDescription": "Counts indirect branch operations including procedure returns, which are speculatively executed. This includes operations that force a software change of the PC, other than exception-generating operations and direct branch instructions. Some examples of the instructions counted by this event include BR Xn, RET, etc..."
     },
     {
         "ArchStdEvent": "ISB_SPEC",
@@ -97,11 +97,11 @@
     },
     {
         "ArchStdEvent": "RC_LD_SPEC",
-        "PublicDescription": "Counts any load acquire operations that are speculatively executed. Eg: LDAR, LDARH, LDARB"
+        "PublicDescription": "Counts any load acquire operations that are speculatively executed. For example: LDAR, LDARH, LDARB"
     },
     {
         "ArchStdEvent": "RC_ST_SPEC",
-        "PublicDescription": "Counts any store release operations that are speculatively executed. Eg: STLR, STLRH, STLRB'"
+        "PublicDescription": "Counts any store release operations that are speculatively executed. For example: STLR, STLRH, STLRB"
     },
     {
         "ArchStdEvent": "ASE_INST_SPEC",
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/stall.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/stall.json
index bbbebc805034..1fcba19dfb7d 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/stall.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/stall.json
@@ -1,7 +1,7 @@
 [
     {
         "ArchStdEvent": "STALL_FRONTEND",
-        "PublicDescription": "Counts cycles when frontend could not send any micro-operations to the rename stage because of frontend resource stalls caused by fetch memory latency or branch prediction flow stalls. All the frontend slots were empty during the cycle when this event counts."
+        "PublicDescription": "Counts cycles when frontend could not send any micro-operations to the rename stage because of frontend resource stalls caused by fetch memory latency or branch prediction flow stalls. STALL_FRONTEND_SLOTS counts SLOTS during the cycle when this event counts."
     },
     {
         "ArchStdEvent": "STALL_BACKEND",
@@ -9,11 +9,11 @@
     },
     {
         "ArchStdEvent": "STALL",
-        "PublicDescription": "Counts cycles when no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall)."
+        "PublicDescription": "Counts cycles when no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall). This event is the sum of STALL_FRONTEND and STALL_BACKEND"
     },
     {
         "ArchStdEvent": "STALL_SLOT_BACKEND",
-        "PublicDescription": "Counts slots per cycle in which no operations are sent from the rename unit to the backend due to backend resource constraints."
+        "PublicDescription": "Counts slots per cycle in which no operations are sent from the rename unit to the backend due to backend resource constraints. STALL_BACKEND counts during the cycle when STALL_SLOT_BACKEND counts at least 1."
     },
     {
         "ArchStdEvent": "STALL_SLOT_FRONTEND",
@@ -21,7 +21,7 @@
     },
     {
         "ArchStdEvent": "STALL_SLOT",
-        "PublicDescription": "Counts slots per cycle in which no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall)."
+        "PublicDescription": "Counts slots per cycle in which no operations are sent to the rename unit from the frontend or from the rename unit to the backend for any reason (either frontend or backend stall). STALL_SLOT is the sum of STALL_SLOT_FRONTEND and STALL_SLOT_BACKEND."
     },
     {
         "ArchStdEvent": "STALL_BACKEND_MEM",
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/tlb.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/tlb.json
index b550af1831f5..5704f1e83af9 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/tlb.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/tlb.json
@@ -25,11 +25,11 @@
     },
     {
         "ArchStdEvent": "DTLB_WALK",
-        "PublicDescription": "Counts data memory translation table walks caused by a miss in the L2 TLB driven by a memory access. Note that partial translations that also cause a table walk are counted. This event does not count table walks caused by TLB maintenance operations."
+        "PublicDescription": "Counts number of demand data translation table walks caused by a miss in the L2 TLB and performing at least one memory access. Translation table walks are counted even if the translation ended up taking a translation fault for reasons different than EPD, E0PD and NFD. Note that partial translations that cause a translation table walk are also counted. Also note that this event counts walks triggered by software preloads, but not walks triggered by hardware prefetchers, and that this event does not count walks triggered by TLB maintenance operations."
     },
     {
         "ArchStdEvent": "ITLB_WALK",
-        "PublicDescription": "Counts instruction memory translation table walks caused by a miss in the L2 TLB driven by a memory access. Partial translations that also cause a table walk are counted. This event does not count table walks caused by TLB maintenance operations."
+        "PublicDescription": "Counts number of instruction translation table walks caused by a miss in the L2 TLB and performing at least one memory access. Translation table walks are counted even if the translation ended up taking a translation fault for reasons different than EPD, E0PD and NFD. Note that partial translations that cause a translation table walk are also counted. Also note that this event does not count walks triggered by TLB maintenance operations."
     },
     {
         "ArchStdEvent": "L1D_TLB_REFILL_RD",
diff --git a/tools/perf/pmu-events/arch/arm64/common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json
index 492083b99256..dddecc946575 100644
--- a/tools/perf/pmu-events/arch/arm64/common-and-microarch.json
+++ b/tools/perf/pmu-events/arch/arm64/common-and-microarch.json
@@ -534,6 +534,11 @@
         "BriefDescription": "SVE operations speculatively executed"
     },
     {
+        "EventCode": "0x8007",
+        "EventName": "ASE_SVE_INST_SPEC",
+        "BriefDescription": "Operation speculatively executed, Advanced SIMD or SVE."
+    },
+    {
         "PublicDescription": "Microarchitectural operation, Operations speculatively executed.",
         "EventCode": "0x8008",
         "EventName": "UOP_SPEC",
@@ -552,48 +557,393 @@
         "BriefDescription": "Floating-point Operations speculatively executed."
     },
     {
+        "EventCode": "0x8011",
+        "EventName": "ASE_FP_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD."
+    },
+    {
+        "EventCode": "0x8012",
+        "EventName": "SVE_FP_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, SVE."
+    },
+    {
+        "EventCode": "0x8013",
+        "EventName": "ASE_SVE_FP_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE."
+    },
+    {
         "PublicDescription": "Floating-point half-precision operations speculatively executed",
         "EventCode": "0x8014",
         "EventName": "FP_HP_SPEC",
         "BriefDescription": "Floating-point half-precision operations speculatively executed"
     },
     {
+        "EventCode": "0x8015",
+        "EventName": "ASE_FP_HP_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD half precision."
+    },
+    {
+        "EventCode": "0x8016",
+        "EventName": "SVE_FP_HP_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, SVE half precision."
+    },
+    {
+        "EventCode": "0x8017",
+        "EventName": "ASE_SVE_FP_HP_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE half precision."
+    },
+    {
         "PublicDescription": "Floating-point single-precision operations speculatively executed",
         "EventCode": "0x8018",
         "EventName": "FP_SP_SPEC",
         "BriefDescription": "Floating-point single-precision operations speculatively executed"
     },
     {
+        "EventCode": "0x8019",
+        "EventName": "ASE_FP_SP_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD single precision."
+    },
+    {
+        "EventCode": "0x801A",
+        "EventName": "SVE_FP_SP_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, SVE single precision."
+    },
+    {
+        "EventCode": "0x801B",
+        "EventName": "ASE_SVE_FP_SP_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE single precision."
+    },
+    {
         "PublicDescription": "Floating-point double-precision operations speculatively executed",
         "EventCode": "0x801C",
         "EventName": "FP_DP_SPEC",
         "BriefDescription": "Floating-point double-precision operations speculatively executed"
     },
     {
+        "EventCode": "0x801D",
+        "EventName": "ASE_FP_DP_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD double precision."
+    },
+    {
+        "EventCode": "0x801E",
+        "EventName": "SVE_FP_DP_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, SVE double precision."
+    },
+    {
+        "EventCode": "0x801F",
+        "EventName": "ASE_SVE_FP_DP_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE double precision."
+    },
+    {
+        "EventCode": "0x8020",
+        "EventName": "FP_DIV_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, divide."
+    },
+    {
+        "EventCode": "0x8021",
+        "EventName": "ASE_FP_DIV_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD divide."
+    },
+    {
+        "EventCode": "0x8022",
+        "EventName": "SVE_FP_DIV_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, SVE divide."
+    },
+    {
+        "EventCode": "0x8023",
+        "EventName": "ASE_SVE_FP_DIV_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE divide."
+    },
+    {
+        "EventCode": "0x8024",
+        "EventName": "FP_SQRT_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, square root."
+    },
+    {
+        "EventCode": "0x8025",
+        "EventName": "ASE_FP_SQRT_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD square root."
+    },
+    {
+        "EventCode": "0x8026",
+        "EventName": "SVE_FP_SQRT_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, SVE square root."
+    },
+    {
+        "EventCode": "0x8027",
+        "EventName": "ASE_SVE_FP_SQRT_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE square-root."
+    },
+    {
         "PublicDescription": "Floating-point FMA Operations speculatively executed.",
         "EventCode": "0x8028",
         "EventName": "FP_FMA_SPEC",
         "BriefDescription": "Floating-point FMA Operations speculatively executed."
     },
     {
+        "EventCode": "0x8029",
+        "EventName": "ASE_FP_FMA_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD FMA."
+    },
+    {
+        "EventCode": "0x802A",
+        "EventName": "SVE_FP_FMA_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, SVE FMA."
+    },
+    {
+        "EventCode": "0x802B",
+        "EventName": "ASE_SVE_FP_FMA_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE FMA."
+    },
+    {
+        "EventCode": "0x802C",
+        "EventName": "FP_MUL_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, multiply."
+    },
+    {
+        "EventCode": "0x802D",
+        "EventName": "ASE_FP_MUL_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD multiply."
+    },
+    {
+        "EventCode": "0x802E",
+        "EventName": "SVE_FP_MUL_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, SVE multiply."
+    },
+    {
+        "EventCode": "0x802F",
+        "EventName": "ASE_SVE_FP_MUL_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE multiply."
+    },
+    {
+        "EventCode": "0x8030",
+        "EventName": "FP_ADDSUB_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, add or subtract."
+    },
+    {
+        "EventCode": "0x8031",
+        "EventName": "ASE_FP_ADDSUB_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD add or subtract."
+    },
+    {
+        "EventCode": "0x8032",
+        "EventName": "SVE_FP_ADDSUB_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, SVE add or subtract."
+    },
+    {
+        "EventCode": "0x8033",
+        "EventName": "ASE_SVE_FP_ADDSUB_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE add or subtract."
+    },
+    {
         "PublicDescription": "Floating-point reciprocal estimate Operations speculatively executed.",
         "EventCode": "0x8034",
         "EventName": "FP_RECPE_SPEC",
         "BriefDescription": "Floating-point reciprocal estimate Operations speculatively executed."
     },
     {
+        "EventCode": "0x8035",
+        "EventName": "ASE_FP_RECPE_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD reciprocal estimate."
+    },
+    {
+        "EventCode": "0x8036",
+        "EventName": "SVE_FP_RECPE_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, SVE reciprocal estimate."
+    },
+    {
+        "EventCode": "0x8037",
+        "EventName": "ASE_SVE_FP_RECPE_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE reciprocal estimate."
+    },
+    {
         "PublicDescription": "floating-point convert Operations speculatively executed.",
         "EventCode": "0x8038",
         "EventName": "FP_CVT_SPEC",
         "BriefDescription": "floating-point convert Operations speculatively executed."
     },
     {
+        "EventCode": "0x8039",
+        "EventName": "ASE_FP_CVT_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD convert."
+    },
+    {
+        "EventCode": "0x803A",
+        "EventName": "SVE_FP_CVT_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, SVE convert."
+    },
+    {
+        "EventCode": "0x803B",
+        "EventName": "ASE_SVE_FP_CVT_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE convert."
+    },
+    {
+        "EventCode": "0x803C",
+        "EventName": "SVE_FP_AREDUCE_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, SVE accumulating reduction."
+    },
+    {
+        "EventCode": "0x803D",
+        "EventName": "ASE_FP_PREDUCE_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD pairwise add step."
+    },
+    {
+        "EventCode": "0x803E",
+        "EventName": "SVE_FP_VREDUCE_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, SVE vector reduction."
+    },
+    {
+        "EventCode": "0x803F",
+        "EventName": "ASE_SVE_FP_VREDUCE_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE vector reduction."
+    },
+    {
+        "EventCode": "0x8040",
+        "EventName": "INT_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed."
+    },
+    {
+        "EventCode": "0x8041",
+        "EventName": "ASE_INT_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, Advanced SIMD."
+    },
+    {
+        "EventCode": "0x8042",
+        "EventName": "SVE_INT_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, SVE."
+    },
+    {
         "PublicDescription": "Advanced SIMD and SVE integer Operations speculatively executed.",
         "EventCode": "0x8043",
         "EventName": "ASE_SVE_INT_SPEC",
         "BriefDescription": "Advanced SIMD and SVE integer Operations speculatively executed."
     },
     {
+        "EventCode": "0x8044",
+        "EventName": "INT_DIV_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, divide."
+    },
+    {
+        "EventCode": "0x8045",
+        "EventName": "INT_DIV64_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, 64-bit divide."
+    },
+    {
+        "EventCode": "0x8046",
+        "EventName": "SVE_INT_DIV_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, SVE divide."
+    },
+    {
+        "EventCode": "0x8047",
+        "EventName": "SVE_INT_DIV64_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, SVE 64-bit divide."
+    },
+    {
+        "EventCode": "0x8048",
+        "EventName": "INT_MUL_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, multiply."
+    },
+    {
+        "EventCode": "0x8049",
+        "EventName": "ASE_INT_MUL_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, Advanced SIMD multiply."
+    },
+    {
+        "EventCode": "0x804A",
+        "EventName": "SVE_INT_MUL_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, SVE multiply."
+    },
+    {
+        "EventCode": "0x804B",
+        "EventName": "ASE_SVE_INT_MUL_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, Advanced SIMD or SVE multiply."
+    },
+    {
+        "EventCode": "0x804C",
+        "EventName": "INT_MUL64_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, 64\u00d764 multiply."
+    },
+    {
+        "EventCode": "0x804D",
+        "EventName": "SVE_INT_MUL64_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, SVE 64\u00d764 multiply."
+    },
+    {
+        "EventCode": "0x804E",
+        "EventName": "INT_MULH64_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, 64\u00d764 multiply returning high part."
+    },
+    {
+        "EventCode": "0x804F",
+        "EventName": "SVE_INT_MULH64_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, SVE 64\u00d764 multiply high part."
+    },
+    {
+        "EventCode": "0x8058",
+        "EventName": "NONFP_SPEC",
+        "BriefDescription": "Non-floating-point Operation speculatively executed."
+    },
+    {
+        "EventCode": "0x8059",
+        "EventName": "ASE_NONFP_SPEC",
+        "BriefDescription": "Non-floating-point Operation speculatively executed, Advanced SIMD."
+    },
+    {
+        "EventCode": "0x805A",
+        "EventName": "SVE_NONFP_SPEC",
+        "BriefDescription": "Non-floating-point Operation speculatively executed, SVE."
+    },
+    {
+        "EventCode": "0x805B",
+        "EventName": "ASE_SVE_NONFP_SPEC",
+        "BriefDescription": "Non-floating-point Operation speculatively executed, Advanced SIMD or SVE."
+    },
+    {
+        "EventCode": "0x805D",
+        "EventName": "ASE_INT_VREDUCE_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, Advanced SIMD reduction."
+    },
+    {
+        "EventCode": "0x805E",
+        "EventName": "SVE_INT_VREDUCE_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, SVE reduction."
+    },
+    {
+        "EventCode": "0x805F",
+        "EventName": "ASE_SVE_INT_VREDUCE_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, Advanced SIMD or SVE reduction."
+    },
+    {
+        "EventCode": "0x8060",
+        "EventName": "SVE_PERM_SPEC",
+        "BriefDescription": "Operation speculatively executed, SVE permute."
+    },
+    {
+        "EventCode": "0x8065",
+        "EventName": "SVE_XPIPE_Z2R_SPEC",
+        "BriefDescription": "Operation speculatively executed, SVE vector to scalar cross-pipe."
+    },
+    {
+        "EventCode": "0x8066",
+        "EventName": "SVE_XPIPE_R2Z_SPEC",
+        "BriefDescription": "Operation speculatively executed, SVE scalar to vector cross-pipe."
+    },
+    {
+        "EventCode": "0x8068",
+        "EventName": "SVE_PGEN_SPEC",
+        "BriefDescription": "Operation speculatively executed, SVE predicate generating."
+    },
+    {
+        "EventCode": "0x8069",
+        "EventName": "SVE_PGEN_FLG_SPEC",
+        "BriefDescription": "Operation speculatively executed, SVE predicate flag setting."
+    },
+    {
+        "EventCode": "0x806D",
+        "EventName": "SVE_PPERM_SPEC",
+        "BriefDescription": "Operation speculatively executed, SVE predicate permute."
+    },
+    {
         "PublicDescription": "SVE predicated Operations speculatively executed.",
         "EventCode": "0x8074",
         "EventName": "SVE_PRED_SPEC",
@@ -630,6 +980,16 @@
         "BriefDescription": "SVE MOVPRFX Operations speculatively executed."
     },
     {
+        "EventCode": "0x807D",
+        "EventName": "SVE_MOVPRFX_Z_SPEC",
+        "BriefDescription": "Operation speculatively executed, SVE MOVPRFX zeroing predication."
+    },
+    {
+        "EventCode": "0x807E",
+        "EventName": "SVE_MOVPRFX_M_SPEC",
+        "BriefDescription": "Operation speculatively executed, SVE MOVPRFX merging predication."
+    },
+    {
         "PublicDescription": "SVE MOVPRFX unfused Operations speculatively executed.",
         "EventCode": "0x807F",
         "EventName": "SVE_MOVPRFX_U_SPEC",
@@ -696,6 +1056,16 @@
         "BriefDescription": "SVE contiguous prefetch element Operations speculatively executed."
     },
     {
+        "EventCode": "0x80A1",
+        "EventName": "SVE_LDNT_CONTIG_SPEC",
+        "BriefDescription": "Operation speculatively executed, SVE non-temporal contiguous load element."
+    },
+    {
+        "EventCode": "0x80A2",
+        "EventName": "SVE_STNT_CONTIG_SPEC",
+        "BriefDescription": "Operation speculatively executed, SVE non-temporal contiguous store element."
+    },
+    {
         "PublicDescription": "Advanced SIMD and SVE contiguous load multiple vector Operations speculatively executed.",
         "EventCode": "0x80A5",
         "EventName": "ASE_SVE_LD_MULTI_SPEC",
@@ -786,6 +1156,16 @@
         "BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed."
     },
     {
+        "EventCode": "0x80C8",
+        "EventName": "INT_SCALE_OPS_SPEC",
+        "BriefDescription": "Scalable integer element arithmetic operations Speculatively executed."
+    },
+    {
+        "EventCode": "0x80C9",
+        "EventName": "INT_FIXED_OPS_SPEC",
+        "BriefDescription": "Non-scalable integer element arithmetic operations Speculatively executed."
+    },
+    {
         "PublicDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed",
         "EventCode": "0x80E3",
         "EventName": "ASE_SVE_INT8_SPEC",
@@ -808,5 +1188,340 @@
         "EventCode": "0x80EF",
         "EventName": "ASE_SVE_INT64_SPEC",
         "BriefDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed"
+    },
+    {
+        "EventCode": "0x80F3",
+        "EventName": "ASE_SVE_FP_DOT_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE dot-product."
+    },
+    {
+        "EventCode": "0x80F7",
+        "EventName": "ASE_SVE_FP_MMLA_SPEC",
+        "BriefDescription": "Floating-point Operation speculatively executed, Advanced SIMD or SVE matrix multiply."
+    },
+    {
+        "EventCode": "0x80FB",
+        "EventName": "ASE_SVE_INT_DOT_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, Advanced SIMD or SVE dot-product."
+    },
+    {
+        "EventCode": "0x80FF",
+        "EventName": "ASE_SVE_INT_MMLA_SPEC",
+        "BriefDescription": "Integer Operation speculatively executed, Advanced SIMD or SVE matrix multiply."
+    },
+    {
+        "EventCode": "0x8128",
+        "EventName": "DTLB_WALK_PERCYC",
+        "BriefDescription": "Data translation table walks in progress."
+    },
+    {
+        "EventCode": "0x8129",
+        "EventName": "ITLB_WALK_PERCYC",
+        "BriefDescription": "Instruction translation table walks in progress."
+    },
+    {
+        "EventCode": "0x8136",
+        "EventName": "DTLB_STEP",
+        "BriefDescription": "Data TLB translation table walk, step."
+    },
+    {
+        "EventCode": "0x8137",
+        "EventName": "ITLB_STEP",
+        "BriefDescription": "Instruction TLB translation table walk, step."
+    },
+    {
+        "EventCode": "0x8138",
+        "EventName": "DTLB_WALK_LARGE",
+        "BriefDescription": "Data TLB large page translation table walk."
+    },
+    {
+        "EventCode": "0x8139",
+        "EventName": "ITLB_WALK_LARGE",
+        "BriefDescription": "Instruction TLB large page translation table walk."
+    },
+    {
+        "EventCode": "0x813A",
+        "EventName": "DTLB_WALK_SMALL",
+        "BriefDescription": "Data TLB small page translation table walk."
+    },
+    {
+        "EventCode": "0x813B",
+        "EventName": "ITLB_WALK_SMALL",
+        "BriefDescription": "Instruction TLB small page translation table walk."
+    },
+    {
+        "EventCode": "0x8144",
+        "EventName": "L1D_CACHE_MISS",
+        "BriefDescription": "Level 1 data cache demand access miss."
+    },
+    {
+        "EventCode": "0x8145",
+        "EventName": "L1I_CACHE_HWPRF",
+        "BriefDescription": "Level 1 instruction cache hardware prefetch."
+    },
+    {
+        "EventCode": "0x814C",
+        "EventName": "L2D_CACHE_MISS",
+        "BriefDescription": "Level 2 data cache demand access miss."
+    },
+    {
+        "EventCode": "0x8154",
+        "EventName": "L1D_CACHE_HWPRF",
+        "BriefDescription": "Level 1 data cache hardware prefetch."
+    },
+    {
+        "EventCode": "0x8155",
+        "EventName": "L2D_CACHE_HWPRF",
+        "BriefDescription": "Level 2 data cache hardware prefetch."
+    },
+    {
+        "EventCode": "0x8158",
+        "EventName": "STALL_FRONTEND_MEMBOUND",
+        "BriefDescription": "Frontend stall cycles, memory bound."
+    },
+    {
+        "EventCode": "0x8159",
+        "EventName": "STALL_FRONTEND_L1I",
+        "BriefDescription": "Frontend stall cycles, level 1 instruction cache."
+    },
+    {
+        "EventCode": "0x815A",
+        "EventName": "STALL_FRONTEND_L2I",
+        "BriefDescription": "Frontend stall cycles, level 2 instruction cache."
+    },
+    {
+        "EventCode": "0x815B",
+        "EventName": "STALL_FRONTEND_MEM",
+        "BriefDescription": "Frontend stall cycles, last level PE cache or memory."
+    },
+    {
+        "EventCode": "0x815C",
+        "EventName": "STALL_FRONTEND_TLB",
+        "BriefDescription": "Frontend stall cycles, TLB."
+    },
+    {
+        "EventCode": "0x8160",
+        "EventName": "STALL_FRONTEND_CPUBOUND",
+        "BriefDescription": "Frontend stall cycles, processor bound."
+    },
+    {
+        "EventCode": "0x8161",
+        "EventName": "STALL_FRONTEND_FLOW",
+        "BriefDescription": "Frontend stall cycles, flow control."
+    },
+    {
+        "EventCode": "0x8162",
+        "EventName": "STALL_FRONTEND_FLUSH",
+        "BriefDescription": "Frontend stall cycles, flush recovery."
+    },
+    {
+        "EventCode": "0x8163",
+        "EventName": "STALL_FRONTEND_RENAME",
+        "BriefDescription": "Frontend stall cycles, rename full."
+    },
+    {
+        "EventCode": "0x8164",
+        "EventName": "STALL_BACKEND_MEMBOUND",
+        "BriefDescription": "Backend stall cycles, memory bound."
+    },
+    {
+        "EventCode": "0x8165",
+        "EventName": "STALL_BACKEND_L1D",
+        "BriefDescription": "Backend stall cycles, level 1 data cache."
+    },
+    {
+        "EventCode": "0x8166",
+        "EventName": "STALL_BACKEND_L2D",
+        "BriefDescription": "Backend stall cycles, level 2 data cache."
+    },
+    {
+        "EventCode": "0x8167",
+        "EventName": "STALL_BACKEND_TLB",
+        "BriefDescription": "Backend stall cycles, TLB."
+    },
+    {
+        "EventCode": "0x8168",
+        "EventName": "STALL_BACKEND_ST",
+        "BriefDescription": "Backend stall cycles, store."
+    },
+    {
+        "EventCode": "0x816A",
+        "EventName": "STALL_BACKEND_CPUBOUND",
+        "BriefDescription": "Backend stall cycles, processor bound."
+    },
+    {
+        "EventCode": "0x816B",
+        "EventName": "STALL_BACKEND_BUSY",
+        "BriefDescription": "Backend stall cycles, backend busy."
+    },
+    {
+        "EventCode": "0x816C",
+        "EventName": "STALL_BACKEND_ILOCK",
+        "BriefDescription": "Backend stall cycles, input dependency."
+    },
+    {
+        "EventCode": "0x816D",
+        "EventName": "STALL_BACKEND_RENAME",
+        "BriefDescription": "Backend stall cycles, rename full."
+    },
+    {
+        "EventCode": "0x816E",
+        "EventName": "STALL_BACKEND_ATOMIC",
+        "BriefDescription": "Backend stall cycles, atomic operation."
+    },
+    {
+        "EventCode": "0x816F",
+        "EventName": "STALL_BACKEND_MEMCPYSET",
+        "BriefDescription": "Backend stall cycles, Memory Copy or Set operation."
+    },
+    {
+        "EventCode": "0x8186",
+        "EventName": "UOP_RETIRED",
+        "BriefDescription": "Micro-operation architecturally executed."
+    },
+    {
+        "EventCode": "0x8188",
+        "EventName": "DTLB_WALK_BLOCK",
+        "BriefDescription": "Data TLB block translation table walk."
+    },
+    {
+        "EventCode": "0x8189",
+        "EventName": "ITLB_WALK_BLOCK",
+        "BriefDescription": "Instruction TLB block translation table walk."
+    },
+    {
+        "EventCode": "0x818A",
+        "EventName": "DTLB_WALK_PAGE",
+        "BriefDescription": "Data TLB page translation table walk."
+    },
+    {
+        "EventCode": "0x818B",
+        "EventName": "ITLB_WALK_PAGE",
+        "BriefDescription": "Instruction TLB page translation table walk."
+    },
+    {
+        "EventCode": "0x81B8",
+        "EventName": "L1I_CACHE_REFILL_HWPRF",
+        "BriefDescription": "Level 1 instruction cache refill, hardware prefetch."
+    },
+    {
+        "EventCode": "0x81BC",
+        "EventName": "L1D_CACHE_REFILL_HWPRF",
+        "BriefDescription": "Level 1 data cache refill, hardware prefetch."
+    },
+    {
+        "EventCode": "0x81BD",
+        "EventName": "L2D_CACHE_REFILL_HWPRF",
+        "BriefDescription": "Level 2 data cache refill, hardware prefetch."
+    },
+    {
+        "EventCode": "0x81C0",
+        "EventName": "L1I_CACHE_HIT_RD",
+        "BriefDescription": "Level 1 instruction cache demand fetch hit."
+    },
+    {
+        "EventCode": "0x81C4",
+        "EventName": "L1D_CACHE_HIT_RD",
+        "BriefDescription": "Level 1 data cache demand access hit, read."
+    },
+    {
+        "EventCode": "0x81C5",
+        "EventName": "L2D_CACHE_HIT_RD",
+        "BriefDescription": "Level 2 data cache demand access hit, read."
+    },
+    {
+        "EventCode": "0x81C8",
+        "EventName": "L1D_CACHE_HIT_WR",
+        "BriefDescription": "Level 1 data cache demand access hit, write."
+    },
+    {
+        "EventCode": "0x81C9",
+        "EventName": "L2D_CACHE_HIT_WR",
+        "BriefDescription": "Level 2 data cache demand access hit, write."
+    },
+    {
+        "EventCode": "0x8200",
+        "EventName": "L1I_CACHE_HIT",
+        "BriefDescription": "Level 1 instruction cache hit."
+    },
+    {
+        "EventCode": "0x8204",
+        "EventName": "L1D_CACHE_HIT",
+        "BriefDescription": "Level 1 data cache hit."
+    },
+    {
+        "EventCode": "0x8205",
+        "EventName": "L2D_CACHE_HIT",
+        "BriefDescription": "Level 2 data cache hit."
+    },
+    {
+        "EventCode": "0x8240",
+        "EventName": "L1I_LFB_HIT_RD",
+        "BriefDescription": "Level 1 instruction cache demand fetch line-fill buffer hit."
+    },
+    {
+        "EventCode": "0x8244",
+        "EventName": "L1D_LFB_HIT_RD",
+        "BriefDescription": "Level 1 data cache demand access line-fill buffer hit, read."
+    },
+    {
+        "EventCode": "0x8245",
+        "EventName": "L2D_LFB_HIT_RD",
+        "BriefDescription": "Level 2 data cache demand access line-fill buffer hit, read."
+    },
+    {
+        "EventCode": "0x8248",
+        "EventName": "L1D_LFB_HIT_WR",
+        "BriefDescription": "Level 1 data cache demand access line-fill buffer hit, write."
+    },
+    {
+        "EventCode": "0x8249",
+        "EventName": "L2D_LFB_HIT_WR",
+        "BriefDescription": "Level 2 data cache demand access line-fill buffer hit, write."
+    },
+    {
+        "EventCode": "0x8280",
+        "EventName": "L1I_CACHE_PRF",
+        "BriefDescription": "Level 1 instruction cache, preload or prefetch hit."
+    },
+    {
+        "EventCode": "0x8284",
+        "EventName": "L1D_CACHE_PRF",
+        "BriefDescription": "Level 1 data cache, preload or prefetch hit."
+    },
+    {
+        "EventCode": "0x8285",
+        "EventName": "L2D_CACHE_PRF",
+        "BriefDescription": "Level 2 data cache, preload or prefetch hit."
+    },
+    {
+        "EventCode": "0x8288",
+        "EventName": "L1I_CACHE_REFILL_PRF",
+        "BriefDescription": "Level 1 instruction cache refill, preload or prefetch hit."
+    },
+    {
+        "EventCode": "0x828C",
+        "EventName": "L1D_CACHE_REFILL_PRF",
+        "BriefDescription": "Level 1 data cache refill, preload or prefetch hit."
+    },
+    {
+        "EventCode": "0x828D",
+        "EventName": "L2D_CACHE_REFILL_PRF",
+        "BriefDescription": "Level 2 data cache refill, preload or prefetch hit."
+    },
+    {
+        "EventCode": "0x8320",
+        "EventName": "L1D_CACHE_REFILL_PERCYC",
+        "BriefDescription": "Level 1 data or unified cache refills in progress."
+    },
+    {
+        "EventCode": "0x8321",
+        "EventName": "L2D_CACHE_REFILL_PERCYC",
+        "BriefDescription": "Level 2 data or unified cache refills in progress."
+    },
+    {
+        "EventCode": "0x8324",
+        "EventName": "L1I_CACHE_REFILL_PERCYC",
+        "BriefDescription": "Level 1 instruction or unified cache refills in progress."
     }
 ]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/core-imp-def.json
new file mode 100644
index 000000000000..52f5ca1482fe
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/core-imp-def.json
@@ -0,0 +1,6 @@
+[
+    {
+        "ArchStdEvent": "L1I_CACHE_PRF",
+        "BriefDescription": "This event counts fetch counted by either Level 1 instruction hardware prefetch or Level 1 instruction software prefetch."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/cycle_accounting.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/cycle_accounting.json
new file mode 100644
index 000000000000..24ff5d8dbb98
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/cycle_accounting.json
@@ -0,0 +1,122 @@
+[
+    {
+        "EventCode": "0x0182",
+        "EventName": "LD_COMP_WAIT_L1_MISS",
+        "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache access."
+    },
+    {
+        "EventCode": "0x0183",
+        "EventName": "LD_COMP_WAIT_L1_MISS_EX",
+        "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache access."
+    },
+    {
+        "EventCode": "0x0184",
+        "EventName": "LD_COMP_WAIT",
+        "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access."
+    },
+    {
+        "EventCode": "0x0185",
+        "EventName": "LD_COMP_WAIT_EX",
+        "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access."
+    },
+    {
+        "EventCode": "0x0186",
+        "EventName": "LD_COMP_WAIT_PFP_BUSY",
+        "BriefDescription": "This event counts every cycle that no instruction was committed due to the lack of an available prefetch port."
+    },
+    {
+        "EventCode": "0x0187",
+        "EventName": "LD_COMP_WAIT_PFP_BUSY_EX",
+        "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation."
+    },
+    {
+        "EventCode": "0x0188",
+        "EventName": "LD_COMP_WAIT_PFP_BUSY_SWPF",
+        "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction."
+    },
+    {
+        "EventCode": "0x0189",
+        "EventName": "EU_COMP_WAIT",
+        "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is an integer or floating-point/SIMD instruction."
+    },
+    {
+        "EventCode": "0x018A",
+        "EventName": "FL_COMP_WAIT",
+        "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a floating-point/SIMD instruction."
+    },
+    {
+        "EventCode": "0x018B",
+        "EventName": "BR_COMP_WAIT",
+        "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a branch instruction."
+    },
+    {
+        "EventCode": "0x018C",
+        "EventName": "ROB_EMPTY",
+        "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty."
+    },
+    {
+        "EventCode": "0x018D",
+        "EventName": "ROB_EMPTY_STQ_BUSY",
+        "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty and the store port (SP) is full."
+    },
+    {
+        "EventCode": "0x018E",
+        "EventName": "WFE_WFI_CYCLE",
+        "BriefDescription": "This event counts every cycle that the instruction unit is halted by the WFE/WFI instruction."
+    },
+    {
+        "EventCode": "0x018F",
+        "EventName": "RETENTION_CYCLE",
+        "BriefDescription": "This event counts every cycle that the instruction unit is halted by the RETENTION state."
+    },
+    {
+        "EventCode": "0x0190",
+        "EventName": "_0INST_COMMIT",
+        "BriefDescription": "This event counts every cycle that no instruction was committed, but counts at the time when commits MOVPRFX only."
+    },
+    {
+        "EventCode": "0x0191",
+        "EventName": "_1INST_COMMIT",
+        "BriefDescription": "This event counts every cycle that one instruction is committed."
+    },
+    {
+        "EventCode": "0x0192",
+        "EventName": "_2INST_COMMIT",
+        "BriefDescription": "This event counts every cycle that two instructions are committed."
+    },
+    {
+        "EventCode": "0x0193",
+        "EventName": "_3INST_COMMIT",
+        "BriefDescription": "This event counts every cycle that three instructions are committed."
+    },
+    {
+        "EventCode": "0x0194",
+        "EventName": "_4INST_COMMIT",
+        "BriefDescription": "This event counts every cycle that four instructions are committed."
+    },
+    {
+        "EventCode": "0x0195",
+        "EventName": "_5INST_COMMIT",
+        "BriefDescription": "This event counts every cycle that five instructions are committed."
+    },
+    {
+        "EventCode": "0x0198",
+        "EventName": "UOP_ONLY_COMMIT",
+        "BriefDescription": "This event counts every cycle that only any micro-operations are committed."
+    },
+    {
+        "EventCode": "0x0199",
+        "EventName": "SINGLE_MOVPRFX_COMMIT",
+        "BriefDescription": "This event counts every cycle that only the MOVPRFX instruction is committed."
+    },
+    {
+        "EventCode": "0x019C",
+        "EventName": "LD_COMP_WAIT_L2_MISS",
+        "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache miss."
+    },
+    {
+        "EventCode": "0x019D",
+        "EventName": "LD_COMP_WAIT_L2_MISS_EX",
+        "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache miss."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/energy.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/energy.json
new file mode 100644
index 000000000000..b55173f71e42
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/energy.json
@@ -0,0 +1,17 @@
+[
+    {
+        "EventCode": "0x01F0",
+        "EventName": "EA_CORE",
+        "BriefDescription": "This event counts energy consumption of core."
+    },
+    {
+        "EventCode": "0x03F0",
+        "EventName": "EA_L3",
+        "BriefDescription": "This event counts energy consumption of L3 cache."
+    },
+    {
+        "EventCode": "0x03F1",
+        "EventName": "EA_LDO_LOSS",
+        "BriefDescription": "This event counts energy consumption of LDO loss."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/exception.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/exception.json
new file mode 100644
index 000000000000..f231712fe261
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/exception.json
@@ -0,0 +1,42 @@
+[
+    {
+        "ArchStdEvent": "EXC_TAKEN",
+        "BriefDescription": "This event counts each exception taken."
+    },
+    {
+        "ArchStdEvent": "EXC_RETURN",
+        "BriefDescription": "This event counts each executed exception return instruction."
+    },
+    {
+        "ArchStdEvent": "EXC_UNDEF",
+        "BriefDescription": "This event counts only other synchronous exceptions that are taken locally."
+    },
+    {
+        "ArchStdEvent": "EXC_SVC",
+        "BriefDescription": "This event counts only Supervisor Call exceptions that are taken locally."
+    },
+    {
+        "ArchStdEvent": "EXC_PABORT",
+        "BriefDescription": "This event counts only Instruction Abort exceptions that are taken locally."
+    },
+    {
+        "ArchStdEvent": "EXC_DABORT",
+        "BriefDescription": "This event counts only Data Abort or SError interrupt exceptions that are taken locally."
+    },
+    {
+        "ArchStdEvent": "EXC_IRQ",
+        "BriefDescription": "This event counts only IRQ exceptions that are taken locally, including Virtual IRQ exceptions."
+    },
+    {
+        "ArchStdEvent": "EXC_FIQ",
+        "BriefDescription": "This event counts only FIQ exceptions that are taken locally, including Virtual FIQ exceptions."
+    },
+    {
+        "ArchStdEvent": "EXC_SMC",
+        "BriefDescription": "This event counts only Secure Monitor Call exceptions. The counter does not increment on SMC instructions trapped as a Hyp Trap exception."
+    },
+    {
+        "ArchStdEvent": "EXC_HVC",
+        "BriefDescription": "This event counts for both Hypervisor Call exceptions taken locally in the hypervisor and those taken as an exception from Non-secure EL1."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/fp_operation.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/fp_operation.json
new file mode 100644
index 000000000000..a3c368959199
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/fp_operation.json
@@ -0,0 +1,209 @@
+[
+    {
+        "EventCode": "0x0105",
+        "EventName": "FP_MV_SPEC",
+        "BriefDescription": "This event counts architecturally executed floating-point move operations."
+    },
+    {
+        "EventCode": "0x0112",
+        "EventName": "FP_LD_SPEC",
+        "BriefDescription": "This event counts architecturally executed NOSIMD load operations that using SIMD&FP registers."
+    },
+    {
+        "EventCode": "0x0113",
+        "EventName": "FP_ST_SPEC",
+        "BriefDescription": "This event counts architecturally executed NOSIMD store operations that using SIMD&FP registers."
+    },
+    {
+        "ArchStdEvent": "ASE_FP_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point operation."
+    },
+    {
+        "ArchStdEvent": "SVE_FP_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE floating-point operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point operations."
+    },
+    {
+        "ArchStdEvent": "FP_HP_SPEC",
+        "BriefDescription": "This event counts architecturally executed half-precision floating-point operation."
+    },
+    {
+        "ArchStdEvent": "ASE_FP_HP_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD half-precision floating-point operation."
+    },
+    {
+        "ArchStdEvent": "SVE_FP_HP_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE half-precision floating-point operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_HP_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE half-precision floating-point operations."
+    },
+    {
+        "ArchStdEvent": "FP_SP_SPEC",
+        "BriefDescription": "This event counts architecturally executed single-precision floating-point operation."
+    },
+    {
+        "ArchStdEvent": "ASE_FP_SP_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD single-precision floating-point operation."
+    },
+    {
+        "ArchStdEvent": "SVE_FP_SP_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE single-precision floating-point operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_SP_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE single-precision floating-point operations."
+    },
+    {
+        "ArchStdEvent": "FP_DP_SPEC",
+        "BriefDescription": "This event counts architecturally executed double-precision floating-point operation."
+    },
+    {
+        "ArchStdEvent": "ASE_FP_DP_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD double-precision floating-point operation."
+    },
+    {
+        "ArchStdEvent": "SVE_FP_DP_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE double-precision floating-point operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_DP_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE double-precision floating-point operations."
+    },
+    {
+        "ArchStdEvent": "FP_DIV_SPEC",
+        "BriefDescription": "This event counts architecturally executed floating-point divide operation."
+    },
+    {
+        "ArchStdEvent": "ASE_FP_DIV_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point divide operation."
+    },
+    {
+        "ArchStdEvent": "SVE_FP_DIV_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE floating-point divide operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_DIV_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point divide operations."
+    },
+    {
+        "ArchStdEvent": "FP_SQRT_SPEC",
+        "BriefDescription": "This event counts architecturally executed floating-point square root operation."
+    },
+    {
+        "ArchStdEvent": "ASE_FP_SQRT_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point square root operation."
+    },
+    {
+        "ArchStdEvent": "SVE_FP_SQRT_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE floating-point square root operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_SQRT_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point square root operations."
+    },
+    {
+        "ArchStdEvent": "ASE_FP_FMA_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point FMA operation."
+    },
+    {
+        "ArchStdEvent": "SVE_FP_FMA_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE floating-point FMA operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_FMA_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point FMA operations."
+    },
+    {
+        "ArchStdEvent": "FP_MUL_SPEC",
+        "BriefDescription": "This event counts architecturally executed floating-point multiply operations."
+    },
+    {
+        "ArchStdEvent": "ASE_FP_MUL_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point multiply operation."
+    },
+    {
+        "ArchStdEvent": "SVE_FP_MUL_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE floating-point multiply operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_MUL_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point multiply operations."
+    },
+    {
+        "ArchStdEvent": "FP_ADDSUB_SPEC",
+        "BriefDescription": "This event counts architecturally executed floating-point add or subtract operations."
+    },
+    {
+        "ArchStdEvent": "ASE_FP_ADDSUB_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point add or subtract operation."
+    },
+    {
+        "ArchStdEvent": "SVE_FP_ADDSUB_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE floating-point add or subtract operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_ADDSUB_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point add or subtract operations."
+    },
+    {
+        "ArchStdEvent": "ASE_FP_RECPE_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point reciprocal estimate operations."
+    },
+    {
+        "ArchStdEvent": "SVE_FP_RECPE_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE floating-point reciprocal estimate operations."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_RECPE_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point reciprocal estimate operations."
+    },
+    {
+        "ArchStdEvent": "ASE_FP_CVT_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point convert operation."
+    },
+    {
+        "ArchStdEvent": "SVE_FP_CVT_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE floating-point convert operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_CVT_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point convert operations."
+    },
+    {
+        "ArchStdEvent": "SVE_FP_AREDUCE_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE floating-point accumulating reduction operations."
+    },
+    {
+        "ArchStdEvent": "ASE_FP_PREDUCE_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD floating-point pairwise add step operations."
+    },
+    {
+        "ArchStdEvent": "SVE_FP_VREDUCE_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE floating-point vector reduction operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_VREDUCE_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE floating-point vector reduction operations."
+    },
+    {
+        "ArchStdEvent": "FP_SCALE_OPS_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE arithmetic operations. See FP_SCALE_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by (128 / CSIZE) and by twice that amount for operations that would also be counted by SVE_FP_FMA_SPEC."
+    },
+    {
+        "ArchStdEvent": "FP_FIXED_OPS_SPEC",
+        "BriefDescription": "This event counts architecturally executed v8SIMD&FP arithmetic operations. See FP_FIXED_OPS_SPEC of ARMv9 Reference Manual for more information. The event counter is incremented by the specified number of elements for Advanced SIMD operations or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_DOT_SPEC",
+        "BriefDescription": "This event counts architecturally executed microarchitectural Advanced SIMD or SVE floating-point dot-product operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_FP_MMLA_SPEC",
+        "BriefDescription": "This event counts architecturally executed microarchitectural Advanced SIMD or SVE floating-point matrix multiply operation."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/gcycle.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/gcycle.json
new file mode 100644
index 000000000000..b4ceddc0d25e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/gcycle.json
@@ -0,0 +1,97 @@
+[
+    {
+        "EventCode": "0x0880",
+        "EventName": "GCYCLES",
+        "BriefDescription": "This event counts the number of cycles at 100MHz."
+    },
+    {
+        "EventCode": "0x0890",
+        "EventName": "FL0_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 0."
+    },
+    {
+        "EventCode": "0x0891",
+        "EventName": "FL1_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 1."
+    },
+    {
+        "EventCode": "0x0892",
+        "EventName": "FL2_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 2."
+    },
+    {
+        "EventCode": "0x0893",
+        "EventName": "FL3_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 3."
+    },
+    {
+        "EventCode": "0x0894",
+        "EventName": "FL4_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 4."
+    },
+    {
+        "EventCode": "0x0895",
+        "EventName": "FL5_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 5."
+    },
+    {
+        "EventCode": "0x0896",
+        "EventName": "FL6_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 6."
+    },
+    {
+        "EventCode": "0x0897",
+        "EventName": "FL7_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 7."
+    },
+    {
+        "EventCode": "0x0898",
+        "EventName": "FL8_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 8."
+    },
+    {
+        "EventCode": "0x0899",
+        "EventName": "FL9_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 9."
+    },
+    {
+        "EventCode": "0x089A",
+        "EventName": "FL10_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 10."
+    },
+    {
+        "EventCode": "0x089B",
+        "EventName": "FL11_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 11."
+    },
+    {
+        "EventCode": "0x089C",
+        "EventName": "FL12_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 12."
+    },
+    {
+        "EventCode": "0x089D",
+        "EventName": "FL13_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 13."
+    },
+    {
+        "EventCode": "0x089E",
+        "EventName": "FL14_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 14."
+    },
+    {
+        "EventCode": "0x089F",
+        "EventName": "FL15_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the Frequency Level 15."
+    },
+    {
+        "EventCode": "0x08A0",
+        "EventName": "RETENTION_GCYCLES",
+        "BriefDescription": "This event counts the number of cycles where the measured core is staying in the RETENTION state."
+    },
+    {
+        "EventCode": "0x08A1",
+        "EventName": "RETENTION_COUNT",
+        "BriefDescription": "This event counts the number of changes from the normal state to the RETENTION state."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/general.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/general.json
new file mode 100644
index 000000000000..32f0fbfc4de4
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/general.json
@@ -0,0 +1,10 @@
+[
+    {
+        "ArchStdEvent": "CPU_CYCLES",
+        "BriefDescription": "This event counts every cycle."
+    },
+    {
+        "ArchStdEvent": "CNT_CYCLES",
+        "BriefDescription": "This event counts the constant frequency cycles counter increments at a constant frequency equal to the rate of increment of the System counter."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/hwpf.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/hwpf.json
new file mode 100644
index 000000000000..a784a032f353
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/hwpf.json
@@ -0,0 +1,52 @@
+[
+    {
+        "EventCode": "0x0230",
+        "EventName": "L1HWPF_STREAM_PF",
+        "BriefDescription": "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher."
+    },
+    {
+        "EventCode": "0x0231",
+        "EventName": "L1HWPF_STRIDE_PF",
+        "BriefDescription": "This event counts stride prefetch requests to L1D cache generated by hardware prefetcher."
+    },
+    {
+        "EventCode": "0x0232",
+        "EventName": "L1HWPF_PFTGT_PF",
+        "BriefDescription": "This event counts LDS prefetch requests to L1D cache generated by hardware prefetcher."
+    },
+    {
+        "EventCode": "0x0234",
+        "EventName": "L2HWPF_STREAM_PF",
+        "BriefDescription": "This event counts streaming prefetch requests to L2 cache generated by hardware prefetcher."
+    },
+    {
+        "EventCode": "0x0235",
+        "EventName": "L2HWPF_STRIDE_PF",
+        "BriefDescription": "This event counts stride prefetch requests to L2 cache generated by hardware prefetcher."
+    },
+    {
+        "EventCode": "0x0237",
+        "EventName": "L2HWPF_OTHER",
+        "BriefDescription": "This event counts prefetch requests to L2 cache generated by the other causes."
+    },
+    {
+        "EventCode": "0x0238",
+        "EventName": "L3HWPF_STREAM_PF",
+        "BriefDescription": "This event counts streaming prefetch requests to L3 cache generated by hardware prefetcher."
+    },
+    {
+        "EventCode": "0x0239",
+        "EventName": "L3HWPF_STRIDE_PF",
+        "BriefDescription": "This event counts stride prefetch requests to L3 cache generated by hardware prefetcher."
+    },
+    {
+        "EventCode": "0x023B",
+        "EventName": "L3HWPF_OTHER",
+        "BriefDescription": "This event counts prefetch requests to L3 cache generated by the other causes."
+    },
+    {
+        "EventCode": "0x023C",
+        "EventName": "L1IHWPF_NEXTLINE_PF",
+        "BriefDescription": "This event counts next line's prefetch requests to L1I cache generated by hardware prefetcher."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1d_cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1d_cache.json
new file mode 100644
index 000000000000..b0818a2fedb0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1d_cache.json
@@ -0,0 +1,113 @@
+[
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL",
+        "BriefDescription": "This event counts operations that cause a refill of the L1D cache. See L1D_CACHE_REFILL of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE",
+        "BriefDescription": "This event counts operations that cause a cache access to the L1D cache. See L1D_CACHE of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WB",
+        "BriefDescription": "This event counts every write-back of data from the L1D cache. See L1D_CACHE_WB of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_LMISS_RD",
+        "BriefDescription": "This event counts operations that cause a refill of the L1D cache that incurs additional latency."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_RD",
+        "BriefDescription": "This event counts L1D CACHE caused by read access."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_WR",
+        "BriefDescription": "This event counts L1D CACHE caused by write access."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_RD",
+        "BriefDescription": "This event counts L1D_CACHE_REFILL caused by read access."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_WR",
+        "BriefDescription": "This event counts L1D_CACHE_REFILL caused by write access."
+    },
+    {
+        "EventCode": "0x0200",
+        "EventName": "L1D_CACHE_DM",
+        "BriefDescription": "This event counts L1D_CACHE caused by demand access."
+    },
+    {
+        "EventCode": "0x0201",
+        "EventName": "L1D_CACHE_DM_RD",
+        "BriefDescription": "This event counts L1D_CACHE caused by demand read access."
+    },
+    {
+        "EventCode": "0x0202",
+        "EventName": "L1D_CACHE_DM_WR",
+        "BriefDescription": "This event counts L1D_CACHE caused by demand write access."
+    },
+    {
+        "EventCode": "0x0208",
+        "EventName": "L1D_CACHE_REFILL_DM",
+        "BriefDescription": "This event counts L1D_CACHE_REFILL caused by demand access."
+    },
+    {
+        "EventCode": "0x0209",
+        "EventName": "L1D_CACHE_REFILL_DM_RD",
+        "BriefDescription": "This event counts L1D_CACHE_REFILL caused by demand read access."
+    },
+    {
+        "EventCode": "0x020A",
+        "EventName": "L1D_CACHE_REFILL_DM_WR",
+        "BriefDescription": "This event counts L1D_CACHE_REFILL caused by demand write access."
+    },
+    {
+        "EventCode": "0x020D",
+        "EventName": "L1D_CACHE_BTC",
+        "BriefDescription": "This event counts demand access that hits cache line with shared status and requests exclusive access in the Level 1 data cache, causing a coherence access to outside of the Level 1 caches of this PE."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_MISS",
+        "BriefDescription": "This event counts demand access that misses in the Level 1 data cache, causing an access to outside of the Level 1 caches of this PE."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_HWPRF",
+        "BriefDescription": "This event counts access counted by L1D_CACHE that is due to a hardware prefetch."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_HWPRF",
+        "BriefDescription": "This event counts hardware prefetch counted by L1D_CACHE_HWPRF that causes a refill of the Level 1 data cache from outside of the Level 1 data cache."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_HIT_RD",
+        "BriefDescription": "This event counts demand read counted by L1D_CACHE_RD that hits in the Level 1 data cache."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_HIT_WR",
+        "BriefDescription": "This event counts demand write counted by L1D_CACHE_WR that hits in the Level 1 data cache."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_HIT",
+        "BriefDescription": "This event counts access counted by L1D_CACHE that hits in the Level 1 data cache."
+    },
+    {
+        "ArchStdEvent": "L1D_LFB_HIT_RD",
+        "BriefDescription": "This event counts demand access counted by L1D_CACHE_HIT_RD that hits a cache line that is in the process of being loaded into the Level 1 data cache."
+    },
+    {
+        "ArchStdEvent": "L1D_LFB_HIT_WR",
+        "BriefDescription": "This event counts demand access counted by L1D_CACHE_HIT_WR that hits a cache line that is in the process of being loaded into the Level 1 data cache."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_PRF",
+        "BriefDescription": "This event counts fetch counted by either Level 1 data hardware prefetch or Level 1 data software prefetch."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_PRF",
+        "BriefDescription": "This event counts hardware prefetch counted by L1D_CACHE_PRF that causes a refill of the Level 1 data cache from outside of the Level 1 data cache."
+    },
+    {
+        "ArchStdEvent": "L1D_CACHE_REFILL_PERCYC",
+        "BriefDescription": "The counter counts by the number of cache refills counted by L1D_CACHE_REFILL in progress on each Processor cycle."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1i_cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1i_cache.json
new file mode 100644
index 000000000000..8680d8ec461d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l1i_cache.json
@@ -0,0 +1,52 @@
+[
+    {
+        "ArchStdEvent": "L1I_CACHE_REFILL",
+        "BriefDescription": "This event counts operations that cause a refill of the L1I cache. See L1I_CACHE_REFILL of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "L1I_CACHE",
+        "BriefDescription": "This event counts operations that cause a cache access to the L1I cache. See L1I_CACHE of ARMv9 Reference Manual for more information."
+    },
+    {
+        "EventCode": "0x0207",
+        "EventName": "L1I_CACHE_DM_RD",
+        "BriefDescription": "This event counts L1I_CACHE caused by demand read access."
+    },
+    {
+        "EventCode": "0x020F",
+        "EventName": "L1I_CACHE_REFILL_DM_RD",
+        "BriefDescription": "This event counts L1I_CACHE_REFILL caused by demand read access."
+    },
+    {
+        "ArchStdEvent": "L1I_CACHE_LMISS",
+        "BriefDescription": "This event counts operations that cause a refill of the L1I cache that incurs additional latency."
+    },
+    {
+        "ArchStdEvent": "L1I_CACHE_HWPRF",
+        "BriefDescription": "This event counts access counted by L1I_CACHE that is due to a hardware prefetch."
+    },
+    {
+        "ArchStdEvent": "L1I_CACHE_REFILL_HWPRF",
+        "BriefDescription": "This event counts hardware prefetch counted by L1I_CACHE_HWPRF that causes a refill of the Level 1 instruction cache from outside of the Level 1 instruction cache."
+    },
+    {
+        "ArchStdEvent": "L1I_CACHE_HIT_RD",
+        "BriefDescription": "This event counts demand fetch counted by L1I_CACHE_DM_RD that hits in the Level 1 instruction cache."
+    },
+    {
+        "ArchStdEvent": "L1I_CACHE_HIT",
+        "BriefDescription": "This event counts access counted by L1I_CACHE that hits in the Level 1 instruction cache."
+    },
+    {
+        "ArchStdEvent": "L1I_LFB_HIT_RD",
+        "BriefDescription": "This event counts demand access counted by L1I_CACHE_HIT_RD that hits a cache line that is in the process of being loaded into the Level 1 instruction cache."
+    },
+    {
+        "ArchStdEvent": "L1I_CACHE_REFILL_PRF",
+        "BriefDescription": "This event counts hardware prefetch counted by L1I_CACHE_PRF that causes a refill of the Level 1 instruction cache from outside of the Level 1 instruction cache."
+    },
+    {
+        "ArchStdEvent": "L1I_CACHE_REFILL_PERCYC",
+        "BriefDescription": "The counter counts by the number of cache refills counted by L1I_CACHE_REFILL in progress on each Processor cycle."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l2_cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l2_cache.json
new file mode 100644
index 000000000000..9e092752e6db
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l2_cache.json
@@ -0,0 +1,160 @@
+[
+    {
+        "ArchStdEvent": "L2D_CACHE",
+        "BriefDescription": "This event counts operations that cause a cache access to the L2 cache. See L2D_CACHE of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL",
+        "BriefDescription": "This event counts operations that cause a refill of the L2 cache. See L2D_CACHE_REFILL of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB",
+        "BriefDescription": "This event counts every write-back of data from the L2 cache caused by L2 replace, non-temporal-store and DC ZVA."
+    },
+    {
+        "ArchStdEvent": "L2I_TLB_REFILL",
+        "BriefDescription": "This event counts operations that cause a TLB refill of the L2I TLB. See L2I_TLB_REFILL of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "L2I_TLB",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2I TLB. See L2I_TLB of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_RD",
+        "BriefDescription": "This event counts L2D CACHE caused by read access."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WR",
+        "BriefDescription": "This event counts L2D CACHE caused by write access."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_RD",
+        "BriefDescription": "This event counts L2D CACHE_REFILL caused by read access."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_WR",
+        "BriefDescription": "This event counts L2D CACHE_REFILL caused by write access."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_WB_VICTIM",
+        "BriefDescription": "This event counts every write-back of data from the L2 cache caused by L2 replace."
+    },
+    {
+        "EventCode": "0x0300",
+        "EventName": "L2D_CACHE_DM",
+        "BriefDescription": "This event counts L2D_CACHE caused by demand access."
+    },
+    {
+        "EventCode": "0x0301",
+        "EventName": "L2D_CACHE_DM_RD",
+        "BriefDescription": "This event counts L2D_CACHE caused by demand read access."
+    },
+    {
+        "EventCode": "0x0302",
+        "EventName": "L2D_CACHE_DM_WR",
+        "BriefDescription": "This event counts L2D_CACHE caused by demand write access."
+    },
+    {
+        "EventCode": "0x0305",
+        "EventName": "L2D_CACHE_HWPRF_ADJACENT",
+        "BriefDescription": "This event counts L2D_CACHE caused by hardware adjacent prefetch access."
+    },
+    {
+        "EventCode": "0x0308",
+        "EventName": "L2D_CACHE_REFILL_DM",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL caused by demand access."
+    },
+    {
+        "EventCode": "0x0309",
+        "EventName": "L2D_CACHE_REFILL_DM_RD",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL caused by demand read access."
+    },
+    {
+        "EventCode": "0x030A",
+        "EventName": "L2D_CACHE_REFILL_DM_WR",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL caused by demand write access."
+    },
+    {
+        "EventCode": "0x030B",
+        "EventName": "L2D_CACHE_REFILL_DM_WR_EXCL",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL caused by demand write exclusive access."
+    },
+    {
+        "EventCode": "0x030C",
+        "EventName": "L2D_CACHE_REFILL_DM_WR_ATOM",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL caused by demand write atomic access."
+    },
+    {
+        "EventCode": "0x030D",
+        "EventName": "L2D_CACHE_BTC",
+        "BriefDescription": "This event counts demand access that hits cache line with shared status and requests exclusive access in the Level 1 data and Level 2 caches, causing a coherence access to outside of the Level 1 and Level 2 caches of this PE."
+    },
+    {
+        "EventCode": "0x03B0",
+        "EventName": "L2D_CACHE_WB_VICTIM_CLEAN",
+        "BriefDescription": "This event counts every write-back of data from the L2 cache caused by L2 replace where the data is clean. In this case, the data will usually be written to L3 cache."
+    },
+    {
+        "EventCode": "0x03B1",
+        "EventName": "L2D_CACHE_WB_NT",
+        "BriefDescription": "This event counts every write-back of data from the L2 cache caused by non-temporal-store."
+    },
+    {
+        "EventCode": "0x03B2",
+        "EventName": "L2D_CACHE_WB_DCZVA",
+        "BriefDescription": "This event counts every write-back of data from the L2 cache caused by DC ZVA."
+    },
+    {
+        "EventCode": "0x03B3",
+        "EventName": "L2D_CACHE_FB",
+        "BriefDescription": "This event counts every flush-back (drop) of data from the L2 cache."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_LMISS_RD",
+        "BriefDescription": "This event counts operations that cause a refill of the L2D cache that incurs additional latency."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_MISS",
+        "BriefDescription": "This event counts demand access that misses in the Level 1 data and Level 2 caches, causing an access to outside of the Level 1 and Level 2 caches of this PE."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_HWPRF",
+        "BriefDescription": "This event counts access counted by L2D_CACHE that is due to a hardware prefetch."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_HWPRF",
+        "BriefDescription": "This event counts hardware prefetch counted by L2D_CACHE_HWPRF that causes a refill of the Level 2 cache, or any Level 1 data and instruction cache of this PE, from outside of those caches."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_HIT_RD",
+        "BriefDescription": "This event counts demand read counted by L2D_CACHE_RD that hits in the Level 2 data cache."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_HIT_WR",
+        "BriefDescription": "This event counts demand write counted by L2D_CACHE_WR that hits in the Level 2 data cache."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_HIT",
+        "BriefDescription": "This event counts access counted by L2D_CACHE that hits in the Level 2 data cache."
+    },
+    {
+        "ArchStdEvent": "L2D_LFB_HIT_RD",
+        "BriefDescription": "This event counts demand access counted by L2D_CACHE_HIT_RD that hits a recently fetched line in the Level 2 cache."
+    },
+    {
+        "ArchStdEvent": "L2D_LFB_HIT_WR",
+        "BriefDescription": "This event counts demand access counted by L2D_CACHE_HIT_WR that hits a recently fetched line in the Level 2 cache."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_PRF",
+        "BriefDescription": "This event counts fetch counted by either Level 2 data hardware prefetch or Level 2 data software prefetch."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_PRF",
+        "BriefDescription": "This event counts hardware prefetch counted by L2D_CACHE_PRF that causes a refill of the Level 2 data cache from outside of the Level 1 data cache."
+    },
+    {
+        "ArchStdEvent": "L2D_CACHE_REFILL_PERCYC",
+        "BriefDescription": "The counter counts by the number of cache refills counted by L2D_CACHE_REFILL in progress on each Processor cycle."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l3_cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l3_cache.json
new file mode 100644
index 000000000000..3f3e0d22ac68
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/l3_cache.json
@@ -0,0 +1,159 @@
+[
+    {
+        "ArchStdEvent": "L3D_CACHE",
+        "BriefDescription": "This event counts operations that cause a cache access to the L3 cache, as defined by the sum of L2D_CACHE_REFILL_L3D_CACHE and L2D_CACHE_WB_VICTIM_CLEAN events."
+    },
+    {
+        "ArchStdEvent": "L3D_CACHE_RD",
+        "BriefDescription": "This event counts access counted by L3D_CACHE that is a Memory-read operation, as defined by the L2D_CACHE_REFILL_L3D_CACHE events."
+    },
+    {
+        "EventCode": "0x0390",
+        "EventName": "L2D_CACHE_REFILL_L3D_CACHE",
+        "BriefDescription": "This event counts operations that cause a cache access to the L3 cache."
+    },
+    {
+        "EventCode": "0x0391",
+        "EventName": "L2D_CACHE_REFILL_L3D_CACHE_DM",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_CACHE caused by demand access."
+    },
+    {
+        "EventCode": "0x0392",
+        "EventName": "L2D_CACHE_REFILL_L3D_CACHE_DM_RD",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_CACHE caused by demand read access."
+    },
+    {
+        "EventCode": "0x0393",
+        "EventName": "L2D_CACHE_REFILL_L3D_CACHE_DM_WR",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_CACHE caused by demand write access."
+    },
+    {
+        "EventCode": "0x0394",
+        "EventName": "L2D_CACHE_REFILL_L3D_CACHE_PRF",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_CACHE caused by prefetch access."
+    },
+    {
+        "EventCode": "0x0395",
+        "EventName": "L2D_CACHE_REFILL_L3D_CACHE_HWPRF",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_CACHE caused by hardware prefetch access."
+    },
+    {
+        "EventCode": "0x0396",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS",
+        "BriefDescription": "This event counts operations that cause a miss of the L3 cache."
+    },
+    {
+        "EventCode": "0x0397",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_DM",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS caused by demand access."
+    },
+    {
+        "EventCode": "0x0398",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_DM_RD",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS caused by demand read access."
+    },
+    {
+        "EventCode": "0x0399",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_DM_WR",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS caused by demand write access."
+    },
+    {
+        "EventCode": "0x039A",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_PRF",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS caused by prefetch access."
+    },
+    {
+        "EventCode": "0x039B",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_HWPRF",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS caused by hardware prefetch access."
+    },
+    {
+        "EventCode": "0x039C",
+        "EventName": "L2D_CACHE_REFILL_L3D_HIT",
+        "BriefDescription": "This event counts operations that cause a hit of the L3 cache."
+    },
+    {
+        "EventCode": "0x039D",
+        "EventName": "L2D_CACHE_REFILL_L3D_HIT_DM",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_HIT caused by demand access."
+    },
+    {
+        "EventCode": "0x039E",
+        "EventName": "L2D_CACHE_REFILL_L3D_HIT_DM_RD",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_HIT caused by demand read access."
+    },
+    {
+        "EventCode": "0x039F",
+        "EventName": "L2D_CACHE_REFILL_L3D_HIT_DM_WR",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_HIT caused by demand write access."
+    },
+    {
+        "EventCode": "0x03A0",
+        "EventName": "L2D_CACHE_REFILL_L3D_HIT_PRF",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_HIT caused by prefetch access."
+    },
+    {
+        "EventCode": "0x03A1",
+        "EventName": "L2D_CACHE_REFILL_L3D_HIT_HWPRF",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_HIT caused by hardware prefetch access."
+    },
+    {
+        "EventCode": "0x03A2",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT",
+        "BriefDescription": "This event counts the number of L3 cache misses where the requests hit the PFTGT buffer."
+    },
+    {
+        "EventCode": "0x03A3",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT_DM",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT caused by demand access."
+    },
+    {
+        "EventCode": "0x03A4",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT_DM_RD",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT caused by demand read access."
+    },
+    {
+        "EventCode": "0x03A5",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT_DM_WR",
+        "BriefDescription": "This event counts L2D_CACHE_REFILL_L3D_MISS_PFTGT_HIT caused by demand write access."
+    },
+    {
+        "EventCode": "0x03A6",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_L_MEM",
+        "BriefDescription": "This event counts the number of L3 cache misses where the requests access the memory in the same socket as the requests."
+    },
+    {
+        "EventCode": "0x03A7",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_FR_MEM",
+        "BriefDescription": "This event counts the number of L3 cache misses where the requests access the memory in the different socket from the requests."
+    },
+    {
+        "EventCode": "0x03A8",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_L_L2",
+        "BriefDescription": "This event counts the number of L3 cache misses where the requests access the different L2 cache from the requests in the same Numa nodes as the requests."
+    },
+    {
+        "EventCode": "0x03A9",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_NR_L2",
+        "BriefDescription": "This event counts the number of L3 cache misses where the requests access L2 cache in the different Numa nodes from the requests in the same socket as the requests."
+    },
+    {
+        "EventCode": "0x03AA",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_NR_L3",
+        "BriefDescription": "This event counts the number of L3 cache misses where the requests access L3 cache in the different Numa nodes from the requests in the same socket as the requests."
+    },
+    {
+        "EventCode": "0x03AB",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_FR_L2",
+        "BriefDescription": "This event counts the number of L3 cache misses where the requests access L2 cache in the different socket from the requests."
+    },
+    {
+        "EventCode": "0x03AC",
+        "EventName": "L2D_CACHE_REFILL_L3D_MISS_FR_L3",
+        "BriefDescription": "This event counts the number of L3 cache misses where the requests access L3 cache in the different socket from the requests."
+    },
+    {
+        "ArchStdEvent": "L3D_CACHE_LMISS_RD",
+        "BriefDescription": "This event counts access counted by L3D_CACHE that is not completed by the L3D cache, and  a Memory-read operation, as defined by the L2D_CACHE_REFILL_L3D_MISS events."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/ll_cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/ll_cache.json
new file mode 100644
index 000000000000..a441b84729ab
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/ll_cache.json
@@ -0,0 +1,10 @@
+[
+    {
+        "ArchStdEvent": "LL_CACHE_RD",
+        "BriefDescription": "This event counts access counted by L3D_CACHE that is a Memory-read operation, as defined by the L2D_CACHE_REFILL_L3D_CACHE events."
+    },
+    {
+        "ArchStdEvent": "LL_CACHE_MISS_RD",
+        "BriefDescription": "This event counts access counted by L3D_CACHE that is not completed by the L3D cache, and a Memory-read operation, as defined by the L2D_CACHE_REFILL_L3D_MISS events."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/memory.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/memory.json
new file mode 100644
index 000000000000..4ef125e3a253
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/memory.json
@@ -0,0 +1,10 @@
+[
+    {
+        "ArchStdEvent": "MEM_ACCESS",
+        "BriefDescription": "This event counts architecturally executed memory-reading instructions and memory-writing instructions, as defined by the LDST_SPEC events."
+    },
+    {
+        "ArchStdEvent": "MEM_ACCESS_RD",
+        "BriefDescription": "This event counts architecturally executed memory-reading instructions, as defined by the LD_SPEC events."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/pipeline.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/pipeline.json
new file mode 100644
index 000000000000..3cc3105f4a5e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/pipeline.json
@@ -0,0 +1,208 @@
+[
+    {
+        "EventCode": "0x01A0",
+        "EventName": "EAGA_VAL",
+        "BriefDescription": "This event counts valid cycles of EAGA pipeline."
+    },
+    {
+        "EventCode": "0x01A1",
+        "EventName": "EAGB_VAL",
+        "BriefDescription": "This event counts valid cycles of EAGB pipeline."
+    },
+    {
+        "EventCode": "0x01A3",
+        "EventName": "PRX_VAL",
+        "BriefDescription": "This event counts valid cycles of PRX pipeline."
+    },
+    {
+        "EventCode": "0x01A4",
+        "EventName": "EXA_VAL",
+        "BriefDescription": "This event counts valid cycles of EXA pipeline."
+    },
+    {
+        "EventCode": "0x01A5",
+        "EventName": "EXB_VAL",
+        "BriefDescription": "This event counts valid cycles of EXB pipeline."
+    },
+    {
+        "EventCode": "0x01A6",
+        "EventName": "EXC_VAL",
+        "BriefDescription": "This event counts valid cycles of EXC pipeline."
+    },
+    {
+        "EventCode": "0x01A7",
+        "EventName": "EXD_VAL",
+        "BriefDescription": "This event counts valid cycles of EXD pipeline."
+    },
+    {
+        "EventCode": "0x01A8",
+        "EventName": "FLA_VAL",
+        "BriefDescription": "This event counts valid cycles of FLA pipeline."
+    },
+    {
+        "EventCode": "0x01A9",
+        "EventName": "FLB_VAL",
+        "BriefDescription": "This event counts valid cycles of FLB pipeline."
+    },
+    {
+        "EventCode": "0x01AA",
+        "EventName": "STEA_VAL",
+        "BriefDescription": "This event counts valid cycles of STEA pipeline."
+    },
+    {
+        "EventCode": "0x01AB",
+        "EventName": "STEB_VAL",
+        "BriefDescription": "This event counts valid cycles of STEB pipeline."
+    },
+    {
+        "EventCode": "0x01AC",
+        "EventName": "STFL_VAL",
+        "BriefDescription": "This event counts valid cycles of STFL pipeline."
+    },
+    {
+        "EventCode": "0x01AD",
+        "EventName": "STPX_VAL",
+        "BriefDescription": "This event counts valid cycles of STPX pipeline."
+    },
+    {
+        "EventCode": "0x01B0",
+        "EventName": "FLA_VAL_PRD_CNT",
+        "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLA pipeline, where it is corrected so that it becomes 32 when all bits are 1."
+    },
+    {
+        "EventCode": "0x01B1",
+        "EventName": "FLB_VAL_PRD_CNT",
+        "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLB pipeline, where it is corrected so that it becomes 32 when all bits are 1."
+    },
+    {
+        "EventCode": "0x01B2",
+        "EventName": "FLA_VAL_FOR_PRD",
+        "BriefDescription": "This event counts valid cycles of FLA pipeline."
+    },
+    {
+        "EventCode": "0x01B3",
+        "EventName": "FLB_VAL_FOR_PRD",
+        "BriefDescription": "This event counts valid cycles of FLB pipeline."
+    },
+    {
+        "EventCode": "0x0240",
+        "EventName": "L1_PIPE0_VAL",
+        "BriefDescription": "This event counts valid cycles of L1D cache pipeline#0."
+    },
+    {
+        "EventCode": "0x0241",
+        "EventName": "L1_PIPE1_VAL",
+        "BriefDescription": "This event counts valid cycles of L1D cache pipeline#1."
+    },
+    {
+        "EventCode": "0x0242",
+        "EventName": "L1_PIPE2_VAL",
+        "BriefDescription": "This event counts valid cycles of L1D cache pipeline#2."
+    },
+    {
+        "EventCode": "0x0250",
+        "EventName": "L1_PIPE0_COMP",
+        "BriefDescription": "This event counts completed requests in L1D cache pipeline#0."
+    },
+    {
+        "EventCode": "0x0251",
+        "EventName": "L1_PIPE1_COMP",
+        "BriefDescription": "This event counts completed requests in L1D cache pipeline#1."
+    },
+    {
+        "EventCode": "0x025A",
+        "EventName": "L1_PIPE_ABORT_STLD_INTLK",
+        "BriefDescription": "This event counts aborted requests in L1D pipelines that due to store-load interlock."
+    },
+    {
+        "EventCode": "0x026C",
+        "EventName": "L1I_PIPE_COMP",
+        "BriefDescription": "This event counts completed requests in L1I cache pipeline."
+    },
+    {
+        "EventCode": "0x026D",
+        "EventName": "L1I_PIPE_VAL",
+        "BriefDescription": "This event counts valid cycles of L1I cache pipeline."
+    },
+    {
+        "EventCode": "0x0278",
+        "EventName": "L1_PIPE0_VAL_IU_TAG_ADRS_SCE",
+        "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1."
+    },
+    {
+        "EventCode": "0x0279",
+        "EventName": "L1_PIPE1_VAL_IU_TAG_ADRS_SCE",
+        "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1."
+    },
+    {
+        "EventCode": "0x02A0",
+        "EventName": "L1_PIPE0_VAL_IU_NOT_SEC0",
+        "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0."
+    },
+    {
+        "EventCode": "0x02A1",
+        "EventName": "L1_PIPE1_VAL_IU_NOT_SEC0",
+        "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0."
+    },
+    {
+        "EventCode": "0x02B0",
+        "EventName": "L1_PIPE_COMP_GATHER_2FLOW",
+        "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined."
+    },
+    {
+        "EventCode": "0x02B1",
+        "EventName": "L1_PIPE_COMP_GATHER_1FLOW",
+        "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined."
+    },
+    {
+        "EventCode": "0x02B2",
+        "EventName": "L1_PIPE_COMP_GATHER_0FLOW",
+        "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0."
+    },
+    {
+        "EventCode": "0x02B3",
+        "EventName": "L1_PIPE_COMP_SCATTER_1FLOW",
+        "BriefDescription": "This event counts the number of flows of the scatter instructions."
+    },
+    {
+        "EventCode": "0x02B8",
+        "EventName": "L1_PIPE0_COMP_PRD_CNT",
+        "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#0, where it is corrected so that it becomes 64 when all bits are 1."
+    },
+    {
+        "EventCode": "0x02B9",
+        "EventName": "L1_PIPE1_COMP_PRD_CNT",
+        "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#1, where it is corrected so that it becomes 64 when all bits are 1."
+    },
+    {
+        "EventCode": "0x0330",
+        "EventName": "L2_PIPE_VAL",
+        "BriefDescription": "This event counts valid cycles of L2 cache pipeline."
+    },
+    {
+        "EventCode": "0x0350",
+        "EventName": "L2_PIPE_COMP_ALL",
+        "BriefDescription": "This event counts completed requests in L2 cache pipeline."
+    },
+    {
+        "EventCode": "0x0370",
+        "EventName": "L2_PIPE_COMP_PF_L2MIB_MCH",
+        "BriefDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access."
+    },
+    {
+        "ArchStdEvent": "STALL_FRONTEND_TLB",
+        "BriefDescription": "This event counts every cycle counted by STALL_FRONTEND_MEMBOUND when there is a demand instruction miss in the instruction TLB."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_TLB",
+        "BriefDescription": "This event counts every cycle counted by STALL_BACKEND_MEMBOUND when there is a demand data miss in the data TLB."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_ST",
+        "BriefDescription": "This event counts every cycle counted by STALL_BACKEND_MEMBOUND when the backend is stalled waiting for a store."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_ILOCK",
+        "BriefDescription": "This event counts every cycle counted by STALL_BACKEND when operations are available from the frontend but at least one is not ready to be sent to the backend because of an input dependency."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/pmu.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/pmu.json
new file mode 100644
index 000000000000..65bd6cdd0dd5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/pmu.json
@@ -0,0 +1,10 @@
+[
+    {
+        "ArchStdEvent": "PMU_OVFS",
+        "BriefDescription": "This event counts the event generated each time one of the condition occurs described in Arm Architecture Reference Manual for A-profile architecture. This event is only for output to the trace unit."
+    },
+    {
+        "ArchStdEvent": "PMU_HOVFS",
+        "BriefDescription": "This event counts the event generated each time an event is counted by an event counter <n> and all of the condition occur described in Arm Architecture Reference Manual for A-profile architecture. This event is only for output to the trace unit."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/retired.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/retired.json
new file mode 100644
index 000000000000..de56aafec2dc
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/retired.json
@@ -0,0 +1,30 @@
+[
+    {
+        "ArchStdEvent": "SW_INCR",
+        "BriefDescription": "This event counts on writes to the PMSWINC register."
+    },
+    {
+        "ArchStdEvent": "INST_RETIRED",
+        "BriefDescription": "This event counts every architecturally executed instruction."
+    },
+    {
+        "ArchStdEvent": "CID_WRITE_RETIRED",
+        "BriefDescription": "This event counts every write to CONTEXTIDR."
+    },
+    {
+        "ArchStdEvent": "BR_RETIRED",
+        "BriefDescription": "This event counts architecturally executed branch instruction."
+    },
+    {
+        "ArchStdEvent": "BR_MIS_PRED_RETIRED",
+        "BriefDescription": "This event counts architecturally executed branch instruction which was mispredicted."
+    },
+    {
+        "ArchStdEvent": "OP_RETIRED",
+        "BriefDescription": "This event counts every architecturally executed micro-operation."
+    },
+    {
+        "ArchStdEvent": "UOP_RETIRED",
+        "BriefDescription": "This event counts micro-operation that would be executed in a Simple sequential execution of the program."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/spec_operation.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/spec_operation.json
new file mode 100644
index 000000000000..4841b43e2871
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/spec_operation.json
@@ -0,0 +1,171 @@
+[
+    {
+        "ArchStdEvent": "BR_MIS_PRED",
+        "BriefDescription": "This event counts each correction to the predicted program flow that occurs because of a misprediction from, or no prediction from, the branch prediction resources and that relates to instructions that the branch prediction resources are capable of predicting."
+    },
+    {
+        "ArchStdEvent": "BR_PRED",
+        "BriefDescription": "This event counts every branch or other change in the program flow that the branch prediction resources are capable of predicting."
+    },
+    {
+        "ArchStdEvent": "INST_SPEC",
+        "BriefDescription": "This event counts every architecturally executed instruction."
+    },
+    {
+        "ArchStdEvent": "OP_SPEC",
+        "BriefDescription": "This event counts every speculatively executed micro-operation."
+    },
+    {
+        "ArchStdEvent": "LDREX_SPEC",
+        "BriefDescription": "This event counts architecturally executed load-exclusive instructions."
+    },
+    {
+        "ArchStdEvent": "STREX_SPEC",
+        "BriefDescription": "This event counts architecturally executed store-exclusive instructions."
+    },
+    {
+        "ArchStdEvent": "LD_SPEC",
+        "BriefDescription": "This event counts architecturally executed memory-reading instructions, as defined by the LD_RETIRED event."
+    },
+    {
+        "ArchStdEvent": "ST_SPEC",
+        "BriefDescription": "This event counts architecturally executed memory-writing instructions, as defined by the ST_RETIRED event. This event counts DCZVA as a store operation."
+    },
+    {
+        "ArchStdEvent": "LDST_SPEC",
+        "BriefDescription": "This event counts architecturally executed memory-reading instructions and memory-writing instructions, as defined by the LD_RETIRED and ST_RETIRED events."
+    },
+    {
+        "ArchStdEvent": "DP_SPEC",
+        "BriefDescription": "This event counts architecturally executed integer data-processing instructions. See DP_SPEC of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "ASE_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD data-processing instructions."
+    },
+    {
+        "ArchStdEvent": "VFP_SPEC",
+        "BriefDescription": "This event counts architecturally executed floating-point data-processing instructions."
+    },
+    {
+        "ArchStdEvent": "PC_WRITE_SPEC",
+        "BriefDescription": "This event counts only software changes of the PC that defined by the instruction architecturally executed, condition code check pass, software change of the PC event."
+    },
+    {
+        "ArchStdEvent": "CRYPTO_SPEC",
+        "BriefDescription": "This event counts architecturally executed cryptographic instructions, except PMULL and VMULL."
+    },
+    {
+        "ArchStdEvent": "BR_IMMED_SPEC",
+        "BriefDescription": "This event counts architecturally executed immediate branch instructions."
+    },
+    {
+        "ArchStdEvent": "BR_RETURN_SPEC",
+        "BriefDescription": "This event counts architecturally executed procedure return operations that defined by the BR_RETURN_RETIRED event."
+    },
+    {
+        "ArchStdEvent": "BR_INDIRECT_SPEC",
+        "BriefDescription": "This event counts architecturally executed indirect branch instructions that includes software change of the PC other than exception-generating instructions and immediate branch instructions."
+    },
+    {
+        "ArchStdEvent": "ISB_SPEC",
+        "BriefDescription": "This event counts architecturally executed Instruction Synchronization Barrier instructions."
+    },
+    {
+        "ArchStdEvent": "DSB_SPEC",
+        "BriefDescription": "This event counts architecturally executed Data Synchronization Barrier instructions."
+    },
+    {
+        "ArchStdEvent": "DMB_SPEC",
+        "BriefDescription": "This event counts architecturally executed Data Memory Barrier instructions, excluding the implied barrier operations of load/store operations with release consistency semantics."
+    },
+    {
+        "ArchStdEvent": "CSDB_SPEC",
+        "BriefDescription": "This event counts speculatively executed control speculation barrier instructions."
+    },
+    {
+        "EventCode": "0x0108",
+        "EventName": "PRD_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that using predicate register."
+    },
+    {
+        "EventCode": "0x0109",
+        "EventName": "IEL_SPEC",
+        "BriefDescription": "This event counts architecturally executed inter-element manipulation operations."
+    },
+    {
+        "EventCode": "0x010A",
+        "EventName": "IREG_SPEC",
+        "BriefDescription": "This event counts architecturally executed inter-register manipulation operations."
+    },
+    {
+        "EventCode": "0x011A",
+        "EventName": "BC_LD_SPEC",
+        "BriefDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations."
+    },
+    {
+        "EventCode": "0x011B",
+        "EventName": "DCZVA_SPEC",
+        "BriefDescription": "This event counts architecturally executed zero blocking operations due to the DC ZVA instruction."
+    },
+    {
+        "EventCode": "0x0121",
+        "EventName": "EFFECTIVE_INST_SPEC",
+        "BriefDescription": "This event counts architecturally executed instructions, excluding the MOVPRFX instruction."
+    },
+    {
+        "EventCode": "0x0123",
+        "EventName": "PRE_INDEX_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that uses pre-index as its addressing mode."
+    },
+    {
+        "EventCode": "0x0124",
+        "EventName": "POST_INDEX_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that uses post-index as its addressing mode."
+    },
+    {
+        "EventCode": "0x0139",
+        "EventName": "UOP_SPLIT",
+        "BriefDescription": "This event counts the occurrence count of the micro-operation split."
+    },
+    {
+        "ArchStdEvent": "ASE_INST_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD operations."
+    },
+    {
+        "ArchStdEvent": "INT_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations due to scalar, Advanced SIMD, and SVE instructions listed in Integer instructions section of ARMv9 Reference Manual."
+    },
+    {
+        "ArchStdEvent": "INT_DIV_SPEC",
+        "BriefDescription": "This event counts architecturally executed integer divide operation."
+    },
+    {
+        "ArchStdEvent": "INT_DIV64_SPEC",
+        "BriefDescription": "This event counts architecturally executed 64-bit integer divide operation."
+    },
+    {
+        "ArchStdEvent": "INT_MUL_SPEC",
+        "BriefDescription": "This event counts architecturally executed integer multiply operation."
+    },
+    {
+        "ArchStdEvent": "INT_MUL64_SPEC",
+        "BriefDescription": "This event counts architecturally executed integer 64-bit x 64-bit multiply operation."
+    },
+    {
+        "ArchStdEvent": "INT_MULH64_SPEC",
+        "BriefDescription": "This event counts architecturally executed integer 64-bit x 64-bit multiply returning high part operation."
+    },
+    {
+        "ArchStdEvent": "NONFP_SPEC",
+        "BriefDescription": "This event counts architecturally executed non-floating-point operations."
+    },
+    {
+        "ArchStdEvent": "INT_SCALE_OPS_SPEC",
+        "BriefDescription": "This event counts each integer ALU operation counted by SVE_INT_SPEC. See ALU operation counts section of ARMv9 Reference Manual for information on the counter increment for different types of instruction."
+    },
+    {
+        "ArchStdEvent": "INT_FIXED_OPS_SPEC",
+        "BriefDescription": "This event counts each integer ALU operation counted by INT_SPEC that is not counted by SVE_INT_SPEC. See ALU operation counts section of ARMv9 Reference Manual for information on the counter increment for different types of instruction."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/stall.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/stall.json
new file mode 100644
index 000000000000..5fb81e2a0a07
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/stall.json
@@ -0,0 +1,94 @@
+[
+    {
+        "ArchStdEvent": "STALL_FRONTEND",
+        "BriefDescription": "This event counts every cycle counted by the CPU_CYCLES event on that no operation was issued because there are no operations available to issue for this PE from the frontend."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND",
+        "BriefDescription": "This event counts every cycle counted by the CPU_CYCLES event on that no operation was issued because the backend is unable to accept any operations."
+    },
+    {
+        "ArchStdEvent": "STALL",
+        "BriefDescription": "This event counts every cycle that no instruction was dispatched from decode unit."
+    },
+    {
+        "ArchStdEvent": "STALL_SLOT_BACKEND",
+        "BriefDescription": "This event counts every cycle that no instruction was dispatched from decode unit due to the backend."
+    },
+    {
+        "ArchStdEvent": "STALL_SLOT_FRONTEND",
+        "BriefDescription": "This event counts every cycle that no instruction was dispatched from decode unit due to the frontend."
+    },
+    {
+        "ArchStdEvent": "STALL_SLOT",
+        "BriefDescription": "This event counts every cycle that no instruction or operation Slot was dispatched from decode unit."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_MEM",
+        "BriefDescription": "This event counts every cycle that no instruction was dispatched from decode unit due to memory stall."
+    },
+    {
+        "ArchStdEvent": "STALL_FRONTEND_MEMBOUND",
+        "BriefDescription": "This event counts every cycle counted by STALL_FRONTEND when no instructions are delivered from the memory system."
+    },
+    {
+        "ArchStdEvent": "STALL_FRONTEND_L1I",
+        "BriefDescription": "This event counts every cycle counted by STALL_FRONTEND_MEMBOUND when there is a demand instruction miss in the first level of instruction cache."
+    },
+    {
+        "ArchStdEvent": "STALL_FRONTEND_L2I",
+        "BriefDescription": "This event counts every cycle counted by STALL_FRONTEND_MEMBOUND when there is a demand instruction miss in the second level of instruction cache."
+    },
+    {
+        "ArchStdEvent": "STALL_FRONTEND_MEM",
+        "BriefDescription": "This event counts every cycle counted by STALL_FRONTEND_MEMBOUND when there is a demand instruction miss in the last level of instruction cache within the PE clock domain or a non-cacheable instruction fetch in progress."
+    },
+    {
+        "ArchStdEvent": "STALL_FRONTEND_CPUBOUND",
+        "BriefDescription": "This event counts every cycle counted by STALL_FRONTEND when the frontend is stalled on a frontend processor resource, not including memory."
+    },
+    {
+        "ArchStdEvent": "STALL_FRONTEND_FLOW",
+        "BriefDescription": "This event counts every cycle counted by STALL_FRONTEND_CPUBOUND when the frontend is stalled on unavailability of prediction flow resources."
+    },
+    {
+        "ArchStdEvent": "STALL_FRONTEND_FLUSH",
+        "BriefDescription": "This event counts every cycle counted by STALL_FRONTEND_CPUBOUND when the frontend is recovering from a pipeline flush."
+    },
+    {
+        "ArchStdEvent": "STALL_FRONTEND_RENAME",
+        "BriefDescription": "This event counts every cycle counted by STALL_FRONTEND_CPUBOUND when operations are available from the frontend but at least one is not ready to be sent to the backend because no rename register is available."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_MEMBOUND",
+        "BriefDescription": "This event counts every cycle counted by STALL_BACKEND when the backend is waiting for a memory access to complete."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_L1D",
+        "BriefDescription": "This event counts every cycle counted by STALL_BACKEND_MEMBOUND when there is a demand data miss in L1D cache."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_L2D",
+        "BriefDescription": "This event counts every cycle counted by STALL_BACKEND_MEMBOUND when there is a demand data miss in L2D cache."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_CPUBOUND",
+        "BriefDescription": "This event counts every cycle counted by STALL_BACKEND when the backend is stalled on a processor resource, not including memory."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_BUSY",
+        "BriefDescription": "This event counts every cycle counted by STALL_BACKEND when operations are available from the frontend but the backend is not able to accept an operation because an execution unit is busy."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_RENAME",
+        "BriefDescription": "This event counts every cycle counted by STALL_BACKEND_CPUBOUND when operations are available from the frontend but at least one is not ready to be sent to the backend because no rename register is available."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_ATOMIC",
+        "BriefDescription": "This event counts every cycle counted by STALL_BACKEND_MEMBOUND when the backend is processing an Atomic operation."
+    },
+    {
+        "ArchStdEvent": "STALL_BACKEND_MEMCPYSET",
+        "BriefDescription": "This event counts every cycle counted by STALL_BACKEND_MEMBOUND when the backend is processing a Memory Copy or Set instruction."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/sve.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/sve.json
new file mode 100644
index 000000000000..e66b5af00f90
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/sve.json
@@ -0,0 +1,254 @@
+[
+    {
+        "ArchStdEvent": "SIMD_INST_RETIRED",
+        "BriefDescription": "This event counts architecturally executed SIMD instructions, excluding the Advanced SIMD scalar instructions and the instructions listed in Non-SIMD SVE instructions section of ARMv9 Reference Manual."
+    },
+    {
+        "ArchStdEvent": "SVE_INST_RETIRED",
+        "BriefDescription": "This event counts architecturally executed SVE instructions, including the instructions listed in Non-SIMD SVE instructions section of ARMv9 Reference Manual."
+    },
+    {
+        "ArchStdEvent": "SVE_INST_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE instructions, including the instructions listed in Non-SIMD SVE instructions section of ARMv9 Reference Manual."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_INST_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE operations."
+    },
+    {
+        "ArchStdEvent": "UOP_SPEC",
+        "BriefDescription": "This event counts all architecturally executed micro-operations."
+    },
+    {
+        "ArchStdEvent": "SVE_MATH_SPEC",
+        "BriefDescription": "This event counts architecturally executed math function operations due to the SVE FTSMUL, FTMAD, FTSSEL, and FEXPA instructions."
+    },
+    {
+        "ArchStdEvent": "FP_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations due to scalar, Advanced SIMD, and SVE instructions listed in Floating-point instructions section of ARMv9 Reference Manual."
+    },
+    {
+        "ArchStdEvent": "FP_FMA_SPEC",
+        "BriefDescription": "This event counts architecturally executed floating-point fused multiply-add and multiply-subtract operations."
+    },
+    {
+        "ArchStdEvent": "FP_RECPE_SPEC",
+        "BriefDescription": "This event counts architecturally executed floating-point reciprocal estimate operations due to the Advanced SIMD scalar, Advanced SIMD vector, and SVE FRECPE and FRSQRTE instructions."
+    },
+    {
+        "ArchStdEvent": "FP_CVT_SPEC",
+        "BriefDescription": "This event counts architecturally executed floating-point convert operations due to the scalar, Advanced SIMD, and SVE floating-point conversion instructions listed in Floating-point conversions section of ARMv9 Reference Manual."
+    },
+    {
+        "ArchStdEvent": "ASE_INT_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD integer operations."
+    },
+    {
+        "ArchStdEvent": "SVE_INT_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE integer operations."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_INT_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE integer operations."
+    },
+    {
+        "ArchStdEvent": "SVE_INT_DIV_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE integer divide operation."
+    },
+    {
+        "ArchStdEvent": "SVE_INT_DIV64_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE 64-bit integer divide operation."
+    },
+    {
+        "ArchStdEvent": "ASE_INT_MUL_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD integer multiply operation."
+    },
+    {
+        "ArchStdEvent": "SVE_INT_MUL_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE integer multiply operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_INT_MUL_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE integer multiply operations."
+    },
+    {
+        "ArchStdEvent": "SVE_INT_MUL64_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE integer 64-bit x 64-bit multiply operation."
+    },
+    {
+        "ArchStdEvent": "SVE_INT_MULH64_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE integer 64-bit x 64-bit multiply returning high part operations."
+    },
+    {
+        "ArchStdEvent": "ASE_NONFP_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD non-floating-point operations."
+    },
+    {
+        "ArchStdEvent": "SVE_NONFP_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE non-floating-point operations."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_NONFP_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE non-floating-point operations."
+    },
+    {
+        "ArchStdEvent": "ASE_INT_VREDUCE_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD integer reduction operation."
+    },
+    {
+        "ArchStdEvent": "SVE_INT_VREDUCE_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE integer reduction operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_INT_VREDUCE_SPEC",
+        "BriefDescription": "This event counts architecturally executed Advanced SIMD and SVE integer reduction operations."
+    },
+    {
+        "ArchStdEvent": "SVE_PERM_SPEC",
+        "BriefDescription": "This event counts architecturally executed vector or predicate permute operation."
+    },
+    {
+        "ArchStdEvent": "SVE_XPIPE_Z2R_SPEC",
+        "BriefDescription": "This event counts architecturally executed vector to general-purpose scalar cross-pipeline transfer operation."
+    },
+    {
+        "ArchStdEvent": "SVE_XPIPE_R2Z_SPEC",
+        "BriefDescription": "This event counts architecturally executed general-purpose scalar to vector cross-pipeline transfer operation."
+    },
+    {
+        "ArchStdEvent": "SVE_PGEN_SPEC",
+        "BriefDescription": "This event counts architecturally executed predicate-generating operation."
+    },
+    {
+        "ArchStdEvent": "SVE_PGEN_FLG_SPEC",
+        "BriefDescription": "This event counts architecturally executed predicate-generating operation that sets condition flags."
+    },
+    {
+        "ArchStdEvent": "SVE_PPERM_SPEC",
+        "BriefDescription": "This event counts architecturally executed predicate permute operation."
+    },
+    {
+        "ArchStdEvent": "SVE_PRED_SPEC",
+        "BriefDescription": "This event counts architecturally executed SIMD data-processing and load/store operations due to SVE instructions with a Governing predicate operand that determines the Active elements."
+    },
+    {
+        "ArchStdEvent": "SVE_MOVPRFX_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations due to MOVPRFX instructions, whether or not they were fused with the prefixed instruction."
+    },
+    {
+        "ArchStdEvent": "SVE_MOVPRFX_Z_SPEC",
+        "BriefDescription": "This event counts architecturally executed operation counted by SVE_MOVPRFX_SPEC where the operation uses zeroing predication."
+    },
+    {
+        "ArchStdEvent": "SVE_MOVPRFX_M_SPEC",
+        "BriefDescription": "This event counts architecturally executed operation counted by SVE_MOVPRFX_SPEC where the operation uses merging predication."
+    },
+    {
+        "ArchStdEvent": "SVE_MOVPRFX_U_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations due to MOVPRFX instructions that were not fused with the prefixed instruction."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_LD_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that read from memory due to SVE and Advanced SIMD load instructions."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_ST_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that write to memory due to SVE and Advanced SIMD store instructions."
+    },
+    {
+        "ArchStdEvent": "PRF_SPEC",
+        "BriefDescription": "This event counts architecturally executed prefetch operations due to scalar PRFM, PRFUM and SVE PRF instructions."
+    },
+    {
+        "ArchStdEvent": "BASE_LD_REG_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that read from memory due to an instruction that loads a general-purpose register."
+    },
+    {
+        "ArchStdEvent": "BASE_ST_REG_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that write to memory due to an instruction that stores a general-purpose register, excluding the DC ZVA instruction."
+    },
+    {
+        "ArchStdEvent": "SVE_LDR_REG_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that read from memory due to an SVE LDR instruction."
+    },
+    {
+        "ArchStdEvent": "SVE_STR_REG_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that write to memory due to an SVE STR instruction."
+    },
+    {
+        "ArchStdEvent": "SVE_LDR_PREG_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that read from memory due to an SVE LDR (predicate) instruction."
+    },
+    {
+        "ArchStdEvent": "SVE_STR_PREG_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that write to memory due to an SVE STR (predicate) instruction."
+    },
+    {
+        "ArchStdEvent": "SVE_PRF_CONTIG_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that prefetch memory due to an SVE predicated single contiguous element prefetch instruction."
+    },
+    {
+        "ArchStdEvent": "SVE_LDNT_CONTIG_SPEC",
+        "BriefDescription": "This event counts architecturally executed operation that reads from memory with a non-temporal hint due to an SVE non-temporal contiguous element load instruction."
+    },
+    {
+        "ArchStdEvent": "SVE_STNT_CONTIG_SPEC",
+        "BriefDescription": "This event counts architecturally executed operation that writes to memory with a non-temporal hint due to an SVE non-temporal contiguous element store instruction."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_LD_MULTI_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that read from memory due to SVE and Advanced SIMD multiple vector contiguous structure load instructions."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_ST_MULTI_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that write to memory due to SVE and Advanced SIMD multiple vector contiguous structure store instructions."
+    },
+    {
+        "ArchStdEvent": "SVE_LD_GATHER_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that read from memory due to SVE non-contiguous gather-load instructions."
+    },
+    {
+        "ArchStdEvent": "SVE_ST_SCATTER_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that write to memory due to SVE non-contiguous scatter-store instructions."
+    },
+    {
+        "ArchStdEvent": "SVE_PRF_GATHER_SPEC",
+        "BriefDescription": "This event counts architecturally executed operations that prefetch memory due to SVE non-contiguous gather-prefetch instructions."
+    },
+    {
+        "ArchStdEvent": "SVE_LDFF_SPEC",
+        "BriefDescription": "This event counts architecturally executed memory read operations due to SVE First-fault and Non-fault load instructions."
+    },
+    {
+        "ArchStdEvent": "FP_HP_SCALE_OPS_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE half-precision arithmetic operations. See FP_HP_SCALE_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by 8, or by 16 for operations that would also be counted by SVE_FP_FMA_SPEC."
+    },
+    {
+        "ArchStdEvent": "FP_HP_FIXED_OPS_SPEC",
+        "BriefDescription": "This event counts architecturally executed v8SIMD&FP half-precision arithmetic operations. See FP_HP_FIXED_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by the number of 16-bit elements for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC."
+    },
+    {
+        "ArchStdEvent": "FP_SP_SCALE_OPS_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE single-precision arithmetic operations. See FP_SP_SCALE_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by 4, or by 8 for operations that would also be counted by SVE_FP_FMA_SPEC."
+    },
+    {
+        "ArchStdEvent": "FP_SP_FIXED_OPS_SPEC",
+        "BriefDescription": "This event counts architecturally executed v8SIMD&FP single-precision arithmetic operations. See FP_SP_FIXED_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by the number of 32-bit elements for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC."
+    },
+    {
+        "ArchStdEvent": "FP_DP_SCALE_OPS_SPEC",
+        "BriefDescription": "This event counts architecturally executed SVE double-precision arithmetic operations. See FP_DP_SCALE_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by 2, or by 4 for operations that would also be counted by SVE_FP_FMA_SPEC."
+    },
+    {
+        "ArchStdEvent": "FP_DP_FIXED_OPS_SPEC",
+        "BriefDescription": "This event counts architecturally executed v8SIMD&FP double-precision arithmetic operations. See FP_DP_FIXED_OPS_SPEC of ARMv9 Reference Manual for more information. This event counter is incremented by 2 for Advanced SIMD operations, or by 1 for scalar operations, and by twice those amounts for operations that would also be counted by FP_FMA_SPEC."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_INT_DOT_SPEC",
+        "BriefDescription": "This event counts architecturally executed microarchitectural Advanced SIMD or SVE integer dot-product operation."
+    },
+    {
+        "ArchStdEvent": "ASE_SVE_INT_MMLA_SPEC",
+        "BriefDescription": "This event counts architecturally executed microarchitectural Advanced SIMD or SVE integer matrix multiply operation."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/tlb.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/tlb.json
new file mode 100644
index 000000000000..edc7cb8696c8
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/tlb.json
@@ -0,0 +1,362 @@
+[
+    {
+        "ArchStdEvent": "L1I_TLB_REFILL",
+        "BriefDescription": "This event counts operations that cause a TLB refill of the L1I TLB. See L1I_TLB_REFILL of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "L1D_TLB_REFILL",
+        "BriefDescription": "This event counts operations that cause a TLB refill of the L1D TLB. See L1D_TLB_REFILL of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "L1D_TLB",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1D TLB. See L1D_TLB of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "L1I_TLB",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1I TLB. See L1I_TLB of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "L2D_TLB_REFILL",
+        "BriefDescription": "This event counts operations that cause a TLB refill of the L2D TLB. See L2D_TLB_REFILL of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "L2D_TLB",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2D TLB. See L2D_TLB of ARMv9 Reference Manual for more information."
+    },
+    {
+        "ArchStdEvent": "DTLB_WALK",
+        "BriefDescription": "This event counts data TLB access with at least one translation table walk."
+    },
+    {
+        "ArchStdEvent": "ITLB_WALK",
+        "BriefDescription": "This event counts instruction TLB access with at least one translation table walk."
+    },
+    {
+        "EventCode": "0x0C00",
+        "EventName": "L1I_TLB_4K",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1I in 4KB page."
+    },
+    {
+        "EventCode": "0x0C01",
+        "EventName": "L1I_TLB_64K",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1I in 64KB page."
+    },
+    {
+        "EventCode": "0x0C02",
+        "EventName": "L1I_TLB_2M",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1I in 2MB page."
+    },
+    {
+        "EventCode": "0x0C03",
+        "EventName": "L1I_TLB_32M",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1I in 32MB page."
+    },
+    {
+        "EventCode": "0x0C04",
+        "EventName": "L1I_TLB_512M",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1I in 512MB page."
+    },
+    {
+        "EventCode": "0x0C05",
+        "EventName": "L1I_TLB_1G",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1I in 1GB page."
+    },
+    {
+        "EventCode": "0x0C06",
+        "EventName": "L1I_TLB_16G",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1I in 16GB page."
+    },
+    {
+        "EventCode": "0x0C08",
+        "EventName": "L1D_TLB_4K",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1D in 4KB page."
+    },
+    {
+        "EventCode": "0x0C09",
+        "EventName": "L1D_TLB_64K",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1D in 64KB page."
+    },
+    {
+        "EventCode": "0x0C0A",
+        "EventName": "L1D_TLB_2M",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1D in 2MB page."
+    },
+    {
+        "EventCode": "0x0C0B",
+        "EventName": "L1D_TLB_32M",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1D in 32MB page."
+    },
+    {
+        "EventCode": "0x0C0C",
+        "EventName": "L1D_TLB_512M",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1D in 512MB page."
+    },
+    {
+        "EventCode": "0x0C0D",
+        "EventName": "L1D_TLB_1G",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1D in 1GB page."
+    },
+    {
+        "EventCode": "0x0C0E",
+        "EventName": "L1D_TLB_16G",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L1D in 16GB page."
+    },
+    {
+        "EventCode": "0x0C10",
+        "EventName": "L1I_TLB_REFILL_4K",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 4KB page."
+    },
+    {
+        "EventCode": "0x0C11",
+        "EventName": "L1I_TLB_REFILL_64K",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 64KB page."
+    },
+    {
+        "EventCode": "0x0C12",
+        "EventName": "L1I_TLB_REFILL_2M",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 2MB page."
+    },
+    {
+        "EventCode": "0x0C13",
+        "EventName": "L1I_TLB_REFILL_32M",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 32MB page."
+    },
+    {
+        "EventCode": "0x0C14",
+        "EventName": "L1I_TLB_REFILL_512M",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 512MB page."
+    },
+    {
+        "EventCode": "0x0C15",
+        "EventName": "L1I_TLB_REFILL_1G",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 1GB page."
+    },
+    {
+        "EventCode": "0x0C16",
+        "EventName": "L1I_TLB_REFILL_16G",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1I in 16GB page."
+    },
+    {
+        "EventCode": "0x0C18",
+        "EventName": "L1D_TLB_REFILL_4K",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 4KB page."
+    },
+    {
+        "EventCode": "0x0C19",
+        "EventName": "L1D_TLB_REFILL_64K",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 64KB page."
+    },
+    {
+        "EventCode": "0x0C1A",
+        "EventName": "L1D_TLB_REFILL_2M",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 2MB page."
+    },
+    {
+        "EventCode": "0x0C1B",
+        "EventName": "L1D_TLB_REFILL_32M",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 32MB page."
+    },
+    {
+        "EventCode": "0x0C1C",
+        "EventName": "L1D_TLB_REFILL_512M",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 512MB page."
+    },
+    {
+        "EventCode": "0x0C1D",
+        "EventName": "L1D_TLB_REFILL_1G",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 1GB page."
+    },
+    {
+        "EventCode": "0x0C1E",
+        "EventName": "L1D_TLB_REFILL_16G",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L1D in 16GB page."
+    },
+    {
+        "EventCode": "0x0C20",
+        "EventName": "L2I_TLB_4K",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2I in 4KB page."
+    },
+    {
+        "EventCode": "0x0C21",
+        "EventName": "L2I_TLB_64K",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2I in 64KB page."
+    },
+    {
+        "EventCode": "0x0C22",
+        "EventName": "L2I_TLB_2M",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2I in 2MB page."
+    },
+    {
+        "EventCode": "0x0C23",
+        "EventName": "L2I_TLB_32M",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2I in 32MB page."
+    },
+    {
+        "EventCode": "0x0C24",
+        "EventName": "L2I_TLB_512M",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2I in 512MB page."
+    },
+    {
+        "EventCode": "0x0C25",
+        "EventName": "L2I_TLB_1G",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2I in 1GB page."
+    },
+    {
+        "EventCode": "0x0C26",
+        "EventName": "L2I_TLB_16G",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2I in 16GB page."
+    },
+    {
+        "EventCode": "0x0C28",
+        "EventName": "L2D_TLB_4K",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2D in 4KB page."
+    },
+    {
+        "EventCode": "0x0C29",
+        "EventName": "L2D_TLB_64K",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2D in 64KB page."
+    },
+    {
+        "EventCode": "0x0C2A",
+        "EventName": "L2D_TLB_2M",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2D in 2MB page."
+    },
+    {
+        "EventCode": "0x0C2B",
+        "EventName": "L2D_TLB_32M",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2D in 32MB page."
+    },
+    {
+        "EventCode": "0x0C2C",
+        "EventName": "L2D_TLB_512M",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2D in 512MB page."
+    },
+    {
+        "EventCode": "0x0C2D",
+        "EventName": "L2D_TLB_1G",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2D in 1GB page."
+    },
+    {
+        "EventCode": "0x0C2E",
+        "EventName": "L2D_TLB_16G",
+        "BriefDescription": "This event counts operations that cause a TLB access to the L2D in 16GB page."
+    },
+    {
+        "EventCode": "0x0C30",
+        "EventName": "L2I_TLB_REFILL_4K",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2Iin 4KB page."
+    },
+    {
+        "EventCode": "0x0C31",
+        "EventName": "L2I_TLB_REFILL_64K",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2I in 64KB page."
+    },
+    {
+        "EventCode": "0x0C32",
+        "EventName": "L2I_TLB_REFILL_2M",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2I in 2MB page."
+    },
+    {
+        "EventCode": "0x0C33",
+        "EventName": "L2I_TLB_REFILL_32M",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2I in 32MB page."
+    },
+    {
+        "EventCode": "0x0C34",
+        "EventName": "L2I_TLB_REFILL_512M",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2I in 512MB page."
+    },
+    {
+        "EventCode": "0x0C35",
+        "EventName": "L2I_TLB_REFILL_1G",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2I in 1GB page."
+    },
+    {
+        "EventCode": "0x0C36",
+        "EventName": "L2I_TLB_REFILL_16G",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2I in 16GB page."
+    },
+    {
+        "EventCode": "0x0C38",
+        "EventName": "L2D_TLB_REFILL_4K",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 4KB page."
+    },
+    {
+        "EventCode": "0x0C39",
+        "EventName": "L2D_TLB_REFILL_64K",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 64KB page."
+    },
+    {
+        "EventCode": "0x0C3A",
+        "EventName": "L2D_TLB_REFILL_2M",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 2MB page."
+    },
+    {
+        "EventCode": "0x0C3B",
+        "EventName": "L2D_TLB_REFILL_32M",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 32MB page."
+    },
+    {
+        "EventCode": "0x0C3C",
+        "EventName": "L2D_TLB_REFILL_512M",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 512MB page."
+    },
+    {
+        "EventCode": "0x0C3D",
+        "EventName": "L2D_TLB_REFILL_1G",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 1GB page."
+    },
+    {
+        "EventCode": "0x0C3E",
+        "EventName": "L2D_TLB_REFILL_16G",
+        "BriefDescription": "This event counts operations that cause a TLB refill to the L2D in 16GB page."
+    },
+    {
+        "ArchStdEvent": "DTLB_WALK_PERCYC",
+        "BriefDescription": "This event counts the number of DTLB_WALK events in progress on each Processor cycle."
+    },
+    {
+        "ArchStdEvent": "ITLB_WALK_PERCYC",
+        "BriefDescription": "This event counts the number of ITLB_WALK events in progress on each Processor cycle."
+    },
+    {
+        "ArchStdEvent": "DTLB_STEP",
+        "BriefDescription": "This event counts translation table walk access made by a refill of the data TLB."
+    },
+    {
+        "ArchStdEvent": "ITLB_STEP",
+        "BriefDescription": "This event counts translation table walk access made by a refill of the instruction TLB."
+    },
+    {
+        "ArchStdEvent": "DTLB_WALK_LARGE",
+        "BriefDescription": "This event counts translation table walk counted by DTLB_WALK where the result of the walk yields a large page size."
+    },
+    {
+        "ArchStdEvent": "ITLB_WALK_LARGE",
+        "BriefDescription": "This event counts translation table walk counted by ITLB_WALK where the result of the walk yields a large page size."
+    },
+    {
+        "ArchStdEvent": "DTLB_WALK_SMALL",
+        "BriefDescription": "This event counts translation table walk counted by DTLB_WALK where the result of the walk yields a small page size."
+    },
+    {
+        "ArchStdEvent": "ITLB_WALK_SMALL",
+        "BriefDescription": "This event counts translation table walk counted by ITLB_WALK where the result of the walk yields a small page size."
+    },
+    {
+        "ArchStdEvent": "DTLB_WALK_BLOCK",
+        "BriefDescription": "This event counts translation table walk counted by DTLB_WALK where the result of the walk yields a Block."
+    },
+    {
+        "ArchStdEvent": "ITLB_WALK_BLOCK",
+        "BriefDescription": "This event counts translation table walk counted by ITLB_WALK where the result of the walk yields a Block."
+    },
+    {
+        "ArchStdEvent": "DTLB_WALK_PAGE",
+        "BriefDescription": "This event counts translation table walk counted by DTLB_WALK where the result of the walk yields a Page."
+    },
+    {
+        "ArchStdEvent": "ITLB_WALK_PAGE",
+        "BriefDescription": "This event counts translation table walk counted by ITLB_WALK where the result of the walk yields a Page."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/trace.json b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/trace.json
new file mode 100644
index 000000000000..0c6e5054c9b5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/fujitsu/monaka/trace.json
@@ -0,0 +1,18 @@
+[
+    {
+        "ArchStdEvent": "TRB_WRAP",
+        "BriefDescription": "This event counts the event generated each time the current write pointer is wrapped to the base pointer."
+    },
+    {
+        "ArchStdEvent": "TRB_TRIG",
+        "BriefDescription": "This event counts the event generated when a Trace Buffer Extension Trigger Event occurs."
+    },
+    {
+        "ArchStdEvent": "TRCEXTOUT0",
+        "BriefDescription": "This event counts the event generated each time an event is signaled by the trace unit external event 0."
+    },
+    {
+        "ArchStdEvent": "CTI_TRIGOUT4",
+        "BriefDescription": "This event counts the event generated each time an event is signaled on CTI output trigger 4."
+    }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index f4d1ca4d1493..5c846fe90513 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -39,6 +39,7 @@
 0x00000000420f5160,v1,cavium/thunderx2,core
 0x00000000430f0af0,v1,cavium/thunderx2,core
 0x00000000460f0010,v1,fujitsu/a64fx,core
+0x00000000460f0030,v1,fujitsu/monaka,core
 0x00000000480fd010,v1,hisilicon/hip08,core
 0x00000000500f0000,v1,ampere/emag,core
 0x00000000c00fac30,v1,ampere/ampereone,core
diff --git a/tools/perf/pmu-events/arch/arm64/recommended.json b/tools/perf/pmu-events/arch/arm64/recommended.json
index 210afa856091..a3b4941ae90c 100644
--- a/tools/perf/pmu-events/arch/arm64/recommended.json
+++ b/tools/perf/pmu-events/arch/arm64/recommended.json
@@ -318,6 +318,11 @@
         "BriefDescription": "Barrier speculatively executed, DMB"
     },
     {
+        "EventCode": "0x7F",
+        "EventName": "CSDB_SPEC",
+        "BriefDescription": "Barrier Speculatively executed, CSDB."
+    },
+    {
         "PublicDescription": "Exception taken, Other synchronous",
         "EventCode": "0x81",
         "EventName": "EXC_UNDEF",
diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index d781a377757a..3e204700b59a 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -430,8 +430,11 @@ class JsonEvent:
   def to_c_string(self, metric: bool) -> str:
     """Representation of the event as a C struct initializer."""
 
+    def fix_comment(s: str) -> str:
+        return s.replace('*/', r'\*\/')
+
     s = self.build_c_string(metric)
-    return f'{{ { _bcs.offsets[s] } }}, /* {s} */\n'
+    return f'{{ { _bcs.offsets[s] } }}, /* {fix_comment(s)} */\n'
 
 
 @lru_cache(maxsize=None)
@@ -461,12 +464,16 @@ def preprocess_arch_std_files(archpath: str) -> None:
   """Read in all architecture standard events."""
   global _arch_std_events
   for item in os.scandir(archpath):
-    if item.is_file() and item.name.endswith('.json'):
+    if not item.is_file() or not item.name.endswith('.json'):
+      continue
+    try:
       for event in read_json_events(item.path, topic=''):
         if event.name:
           _arch_std_events[event.name.lower()] = event
         if event.metric_name:
           _arch_std_events[event.metric_name.lower()] = event
+    except Exception as e:
+        raise RuntimeError(f'Failure processing \'{item.name}\' in \'{archpath}\'') from e
 
 
 def add_events_table_entries(item: os.DirEntry, topic: str) -> None:
@@ -1252,7 +1259,10 @@ def main() -> None:
           item_path = '/'.join(parents) + ('/' if len(parents) > 0 else '') + item.name
           if 'test' not in item_path and 'common' not in item_path and item_path not in _args.model.split(','):
             continue
-      action(parents, item)
+      try:
+        action(parents, item)
+      except Exception as e:
+        raise RuntimeError(f'Action failure for \'{item.name}\' in {parents}') from e
       if item.is_dir():
         ftw(item.path, parents + [item.name], action)
 
diff --git a/tools/perf/scripts/Makefile.syscalls b/tools/perf/scripts/Makefile.syscalls
new file mode 100644
index 000000000000..8bf55333262e
--- /dev/null
+++ b/tools/perf/scripts/Makefile.syscalls
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0
+# This Makefile generates headers in
+# tools/perf/arch/$(SRCARCH)/include/generated/asm from the architecture's
+# syscall table. This will either be from the generic syscall table, or from a
+# table that is specific to that architecture.
+
+PHONY := all
+all:
+
+obj := $(OUTPUT)arch/$(SRCARCH)/include/generated/asm
+
+syscall_abis_32  := common,32
+syscall_abis_64  := common,64
+syscalltbl := $(srctree)/tools/scripts/syscall.tbl
+
+# let architectures override $(syscall_abis_%) and $(syscalltbl)
+-include $(srctree)/tools/perf/arch/$(SRCARCH)/entry/syscalls/Makefile.syscalls
+include $(srctree)/tools/build/Build.include
+-include $(srctree)/tools/perf/arch/$(SRCARCH)/entry/syscalls/Kbuild
+
+systbl := $(srctree)/tools/perf/scripts/syscalltbl.sh
+
+syscall-y   := $(addprefix $(obj)/, $(syscall-y))
+
+# Remove stale wrappers when the corresponding files are removed from generic-y
+old-headers := $(wildcard $(obj)/*.h)
+unwanted    := $(filter-out $(syscall-y),$(old-headers))
+
+quiet_cmd_remove = REMOVE  $(unwanted)
+      cmd_remove = rm -f $(unwanted)
+
+quiet_cmd_systbl = SYSTBL  $@
+      cmd_systbl = $(CONFIG_SHELL) $(systbl) \
+		   $(if $(systbl-args-$*),$(systbl-args-$*),$(systbl-args)) \
+		   --abis $(subst $(space),$(comma),$(strip $(syscall_abis_$*))) \
+		   $< $@
+
+all: $(syscall-y)
+	$(if $(unwanted),$(call cmd,remove))
+	@:
+
+$(obj)/syscalls_%.h: $(syscalltbl) $(systbl) FORCE
+	$(call if_changed,systbl)
+
+targets := $(syscall-y)
+
+# Create output directory. Skip it if at least one old header exists
+# since we know the output directory already exists.
+ifeq ($(old-headers),)
+$(shell mkdir -p $(obj))
+endif
+
+PHONY += FORCE
+
+FORCE:
+
+existing-targets := $(wildcard $(sort $(targets)))
+
+-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd)
+
+.PHONY: $(PHONY)
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
index 01f54d6724a5..60dcfe56d4d9 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -24,16 +24,6 @@
 #include "../../../util/srcline.h"
 #include "../../../util/srccode.h"
 
-#if PY_MAJOR_VERSION < 3
-#define _PyCapsule_GetPointer(arg1, arg2) \
-  PyCObject_AsVoidPtr(arg1)
-#define _PyBytes_FromStringAndSize(arg1, arg2) \
-  PyString_FromStringAndSize((arg1), (arg2))
-#define _PyUnicode_AsUTF8(arg) \
-  PyString_AsString(arg)
-
-PyMODINIT_FUNC initperf_trace_context(void);
-#else
 #define _PyCapsule_GetPointer(arg1, arg2) \
   PyCapsule_GetPointer((arg1), (arg2))
 #define _PyBytes_FromStringAndSize(arg1, arg2) \
@@ -42,7 +32,6 @@ PyMODINIT_FUNC initperf_trace_context(void);
   PyUnicode_AsUTF8(arg)
 
 PyMODINIT_FUNC PyInit_perf_trace_context(void);
-#endif
 
 static struct scripting_context *get_args(PyObject *args, const char *name, PyObject **arg2)
 {
@@ -104,7 +93,7 @@ static PyObject *perf_sample_insn(PyObject *obj, PyObject *args)
 	if (c->sample->ip && !c->sample->insn_len && thread__maps(c->al->thread)) {
 		struct machine *machine =  maps__machine(thread__maps(c->al->thread));
 
-		script_fetch_insn(c->sample, c->al->thread, machine);
+		script_fetch_insn(c->sample, c->al->thread, machine, /*native_arch=*/true);
 	}
 	if (!c->sample->insn_len)
 		Py_RETURN_NONE; /* N.B. This is a return statement */
@@ -213,12 +202,6 @@ static PyMethodDef ContextMethods[] = {
 	{ NULL, NULL, 0, NULL}
 };
 
-#if PY_MAJOR_VERSION < 3
-PyMODINIT_FUNC initperf_trace_context(void)
-{
-	(void) Py_InitModule("perf_trace_context", ContextMethods);
-}
-#else
 PyMODINIT_FUNC PyInit_perf_trace_context(void)
 {
 	static struct PyModuleDef moduledef = {
@@ -240,4 +223,3 @@ PyMODINIT_FUNC PyInit_perf_trace_context(void)
 
 	return mod;
 }
-#endif
diff --git a/tools/perf/scripts/python/mem-phys-addr.py b/tools/perf/scripts/python/mem-phys-addr.py
index 1f332e72b9b0..5e237a5a5f1b 100644
--- a/tools/perf/scripts/python/mem-phys-addr.py
+++ b/tools/perf/scripts/python/mem-phys-addr.py
@@ -3,98 +3,125 @@
 #
 # Copyright (c) 2018, Intel Corporation.
 
-from __future__ import division
-from __future__ import print_function
-
 import os
 import sys
-import struct
 import re
 import bisect
 import collections
+from dataclasses import dataclass
+from typing import (Dict, Optional)
 
 sys.path.append(os.environ['PERF_EXEC_PATH'] + \
-	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+    '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+@dataclass(frozen=True)
+class IomemEntry:
+    """Read from a line in /proc/iomem"""
+    begin: int
+    end: int
+    indent: int
+    label: str
 
-#physical address ranges for System RAM
-system_ram = []
-#physical address ranges for Persistent Memory
-pmem = []
-#file object for proc iomem
-f = None
-#Count for each type of memory
-load_mem_type_cnt = collections.Counter()
-#perf event name
-event_name = None
+# Physical memory layout from /proc/iomem. Key is the indent and then
+# a list of ranges.
+iomem: Dict[int, list[IomemEntry]] = collections.defaultdict(list)
+# Child nodes from the iomem parent.
+children: Dict[IomemEntry, set[IomemEntry]] = collections.defaultdict(set)
+# Maximum indent seen before an entry in the iomem file.
+max_indent: int = 0
+# Count for each range of memory.
+load_mem_type_cnt: Dict[IomemEntry, int] = collections.Counter()
+# Perf event name set from the first sample in the data.
+event_name: Optional[str] = None
 
 def parse_iomem():
-	global f
-	f = open('/proc/iomem', 'r')
-	for i, j in enumerate(f):
-		m = re.split('-|:',j,2)
-		if m[2].strip() == 'System RAM':
-			system_ram.append(int(m[0], 16))
-			system_ram.append(int(m[1], 16))
-		if m[2].strip() == 'Persistent Memory':
-			pmem.append(int(m[0], 16))
-			pmem.append(int(m[1], 16))
+    """Populate iomem from /proc/iomem file"""
+    global iomem
+    global max_indent
+    global children
+    with open('/proc/iomem', 'r', encoding='ascii') as f:
+        for line in f:
+            indent = 0
+            while line[indent] == ' ':
+                indent += 1
+            if indent > max_indent:
+                max_indent = indent
+            m = re.split('-|:', line, 2)
+            begin = int(m[0], 16)
+            end = int(m[1], 16)
+            label = m[2].strip()
+            entry = IomemEntry(begin, end, indent, label)
+            # Before adding entry, search for a parent node using its begin.
+            if indent > 0:
+                parent = find_memory_type(begin)
+                assert parent, f"Given indent expected a parent for {label}"
+                children[parent].add(entry)
+            iomem[indent].append(entry)
 
-def print_memory_type():
-	print("Event: %s" % (event_name))
-	print("%-40s  %10s  %10s\n" % ("Memory type", "count", "percentage"), end='')
-	print("%-40s  %10s  %10s\n" % ("----------------------------------------",
-					"-----------", "-----------"),
-					end='');
-	total = sum(load_mem_type_cnt.values())
-	for mem_type, count in sorted(load_mem_type_cnt.most_common(), \
-					key = lambda kv: (kv[1], kv[0]), reverse = True):
-		print("%-40s  %10d  %10.1f%%\n" %
-			(mem_type, count, 100 * count / total),
-			end='')
+def find_memory_type(phys_addr) -> Optional[IomemEntry]:
+    """Search iomem for the range containing phys_addr with the maximum indent"""
+    for i in range(max_indent, -1, -1):
+        if i not in iomem:
+            continue
+        position = bisect.bisect_right(iomem[i], phys_addr,
+                                       key=lambda entry: entry.begin)
+        if position is None:
+            continue
+        iomem_entry = iomem[i][position-1]
+        if  iomem_entry.begin <= phys_addr <= iomem_entry.end:
+            return iomem_entry
+    print(f"Didn't find {phys_addr}")
+    return None
 
-def trace_begin():
-	parse_iomem()
+def print_memory_type():
+    print(f"Event: {event_name}")
+    print(f"{'Memory type':<40}  {'count':>10}  {'percentage':>10}")
+    print(f"{'-' * 40:<40}  {'-' * 10:>10}  {'-' * 10:>10}")
+    total = sum(load_mem_type_cnt.values())
+    # Add count from children into the parent.
+    for i in range(max_indent, -1, -1):
+        if i not in iomem:
+            continue
+        for entry in iomem[i]:
+            global children
+            for child in children[entry]:
+                if load_mem_type_cnt[child] > 0:
+                    load_mem_type_cnt[entry] += load_mem_type_cnt[child]
 
-def trace_end():
-	print_memory_type()
-	f.close()
+    def print_entries(entries):
+        """Print counts from parents down to their children"""
+        global children
+        for entry in sorted(entries,
+                            key = lambda entry: load_mem_type_cnt[entry],
+                            reverse = True):
+            count = load_mem_type_cnt[entry]
+            if count > 0:
+                mem_type = ' ' * entry.indent + f"{entry.begin:x}-{entry.end:x} : {entry.label}"
+                percent = 100 * count / total
+                print(f"{mem_type:<40}  {count:>10}  {percent:>10.1f}")
+                print_entries(children[entry])
 
-def is_system_ram(phys_addr):
-	#/proc/iomem is sorted
-	position = bisect.bisect(system_ram, phys_addr)
-	if position % 2 == 0:
-		return False
-	return True
+    print_entries(iomem[0])
 
-def is_persistent_mem(phys_addr):
-	position = bisect.bisect(pmem, phys_addr)
-	if position % 2 == 0:
-		return False
-	return True
+def trace_begin():
+    parse_iomem()
 
-def find_memory_type(phys_addr):
-	if phys_addr == 0:
-		return "N/A"
-	if is_system_ram(phys_addr):
-		return "System RAM"
+def trace_end():
+    print_memory_type()
 
-	if is_persistent_mem(phys_addr):
-		return "Persistent Memory"
+def process_event(param_dict):
+    if "sample" not in param_dict:
+        return
 
-	#slow path, search all
-	f.seek(0, 0)
-	for j in f:
-		m = re.split('-|:',j,2)
-		if int(m[0], 16) <= phys_addr <= int(m[1], 16):
-			return m[2]
-	return "N/A"
+    sample = param_dict["sample"]
+    if "phys_addr" not in sample:
+        return
 
-def process_event(param_dict):
-	name       = param_dict["ev_name"]
-	sample     = param_dict["sample"]
-	phys_addr  = sample["phys_addr"]
+    phys_addr  = sample["phys_addr"]
+    entry = find_memory_type(phys_addr)
+    if entry:
+        load_mem_type_cnt[entry] += 1
 
-	global event_name
-	if event_name == None:
-		event_name = name
-	load_mem_type_cnt[find_memory_type(phys_addr)] += 1
+    global event_name
+    if event_name is None:
+        event_name  = param_dict["ev_name"]
diff --git a/tools/perf/scripts/syscalltbl.sh b/tools/perf/scripts/syscalltbl.sh
new file mode 100755
index 000000000000..1ce0d5aa8b50
--- /dev/null
+++ b/tools/perf/scripts/syscalltbl.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate a syscall table header.
+#
+# Each line of the syscall table should have the following format:
+#
+# NR ABI NAME [NATIVE] [COMPAT]
+#
+# NR       syscall number
+# ABI      ABI name
+# NAME     syscall name
+# NATIVE   native entry point (optional)
+# COMPAT   compat entry point (optional)
+
+set -e
+
+usage() {
+	echo >&2 "usage: $0 [--abis ABIS] INFILE OUTFILE" >&2
+	echo >&2
+	echo >&2 "  INFILE    input syscall table"
+	echo >&2 "  OUTFILE   output header file"
+	echo >&2
+	echo >&2 "options:"
+	echo >&2 "  --abis ABIS        ABI(s) to handle (By default, all lines are handled)"
+	exit 1
+}
+
+# default unless specified by options
+abis=
+
+while [ $# -gt 0 ]
+do
+	case $1 in
+	--abis)
+		abis=$(echo "($2)" | tr ',' '|')
+		shift 2;;
+	-*)
+		echo "$1: unknown option" >&2
+		usage;;
+	*)
+		break;;
+	esac
+done
+
+if [ $# -ne 2 ]; then
+	usage
+fi
+
+infile="$1"
+outfile="$2"
+
+nxt=0
+
+syscall_macro() {
+    nr="$1"
+    name="$2"
+
+    echo "	[$nr] = \"$name\","
+}
+
+emit() {
+    nr="$1"
+    entry="$2"
+
+    syscall_macro "$nr" "$entry"
+}
+
+echo "static const char *const syscalltbl[] = {" > $outfile
+
+sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
+grep -E "^[0-9]+[[:space:]]+$abis" "$infile" | sort -n > $sorted_table
+
+max_nr=0
+# the params are: nr abi name entry compat
+# use _ for intentionally unused variables according to SC2034
+while read nr _ name _ _; do
+    emit "$nr" "$name" >> $outfile
+    max_nr=$nr
+done < $sorted_table
+
+rm -f $sorted_table
+
+echo "};" >> $outfile
+
+echo "#define SYSCALLTBL_MAX_ID ${max_nr}" >> $outfile
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index ec4e1f034742..4bf8d3f5eae7 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -5,10 +5,10 @@ perf-test-y += tests-scripts.o
 perf-test-y += parse-events.o
 perf-test-y += dso-data.o
 perf-test-y += vmlinux-kallsyms.o
-perf-test-$(CONFIG_LIBTRACEEVENT) += openat-syscall.o
-perf-test-$(CONFIG_LIBTRACEEVENT) += openat-syscall-all-cpus.o
+perf-test-y += openat-syscall.o
+perf-test-y += openat-syscall-all-cpus.o
 perf-test-$(CONFIG_LIBTRACEEVENT) += openat-syscall-tp-fields.o
-perf-test-$(CONFIG_LIBTRACEEVENT) += mmap-basic.o
+perf-test-y += mmap-basic.o
 perf-test-y += perf-record.o
 perf-test-y += evsel-roundtrip-name.o
 perf-test-$(CONFIG_LIBTRACEEVENT) += evsel-tp-sched.o
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 4751dd3c6f67..14d30a5053be 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -42,6 +42,8 @@
 static bool dont_fork;
 /* Fork the tests in parallel and wait for their completion. */
 static bool sequential;
+/* Number of times each test is run. */
+static unsigned int runs_per_test = 1;
 const char *dso_to_test;
 const char *test_objdump_path = "objdump";
 
@@ -60,11 +62,9 @@ static struct test_suite *arch_tests[] = {
 
 static struct test_suite *generic_tests[] = {
 	&suite__vmlinux_matches_kallsyms,
-#ifdef HAVE_LIBTRACEEVENT
 	&suite__openat_syscall_event,
 	&suite__openat_syscall_event_on_all_cpus,
 	&suite__basic_mmap,
-#endif
 	&suite__mem,
 	&suite__parse_events,
 	&suite__expr,
@@ -151,58 +151,51 @@ static struct test_workload *workloads[] = {
 #define workloads__for_each(workload) \
 	for (unsigned i = 0; i < ARRAY_SIZE(workloads) && ({ workload = workloads[i]; 1; }); i++)
 
-static int num_subtests(const struct test_suite *t)
+#define test_suite__for_each_test_case(suite, idx)			\
+	for (idx = 0; (suite)->test_cases && (suite)->test_cases[idx].name != NULL; idx++)
+
+static int test_suite__num_test_cases(const struct test_suite *t)
 {
 	int num;
 
-	if (!t->test_cases)
-		return 0;
-
-	num = 0;
-	while (t->test_cases[num].name)
-		num++;
+	test_suite__for_each_test_case(t, num);
 
 	return num;
 }
 
-static bool has_subtests(const struct test_suite *t)
-{
-	return num_subtests(t) > 1;
-}
-
-static const char *skip_reason(const struct test_suite *t, int subtest)
+static const char *skip_reason(const struct test_suite *t, int test_case)
 {
 	if (!t->test_cases)
 		return NULL;
 
-	return t->test_cases[subtest >= 0 ? subtest : 0].skip_reason;
+	return t->test_cases[test_case >= 0 ? test_case : 0].skip_reason;
 }
 
-static const char *test_description(const struct test_suite *t, int subtest)
+static const char *test_description(const struct test_suite *t, int test_case)
 {
-	if (t->test_cases && subtest >= 0)
-		return t->test_cases[subtest].desc;
+	if (t->test_cases && test_case >= 0)
+		return t->test_cases[test_case].desc;
 
 	return t->desc;
 }
 
-static test_fnptr test_function(const struct test_suite *t, int subtest)
+static test_fnptr test_function(const struct test_suite *t, int test_case)
 {
-	if (subtest <= 0)
+	if (test_case <= 0)
 		return t->test_cases[0].run_case;
 
-	return t->test_cases[subtest].run_case;
+	return t->test_cases[test_case].run_case;
 }
 
-static bool test_exclusive(const struct test_suite *t, int subtest)
+static bool test_exclusive(const struct test_suite *t, int test_case)
 {
-	if (subtest <= 0)
+	if (test_case <= 0)
 		return t->test_cases[0].exclusive;
 
-	return t->test_cases[subtest].exclusive;
+	return t->test_cases[test_case].exclusive;
 }
 
-static bool perf_test__matches(const char *desc, int curr, int argc, const char *argv[])
+static bool perf_test__matches(const char *desc, int suite_num, int argc, const char *argv[])
 {
 	int i;
 
@@ -214,7 +207,7 @@ static bool perf_test__matches(const char *desc, int curr, int argc, const char
 		long nr = strtoul(argv[i], &end, 10);
 
 		if (*end == '\0') {
-			if (nr == curr + 1)
+			if (nr == suite_num + 1)
 				return true;
 			continue;
 		}
@@ -229,8 +222,8 @@ static bool perf_test__matches(const char *desc, int curr, int argc, const char
 struct child_test {
 	struct child_process process;
 	struct test_suite *test;
-	int test_num;
-	int subtest;
+	int suite_num;
+	int test_case_num;
 };
 
 static jmp_buf run_test_jmp_buf;
@@ -260,7 +253,7 @@ static int run_test_child(struct child_process *process)
 
 	pr_debug("--- start ---\n");
 	pr_debug("test child forked, pid %d\n", getpid());
-	err = test_function(child->test, child->subtest)(child->test, child->subtest);
+	err = test_function(child->test, child->test_case_num)(child->test, child->test_case_num);
 	pr_debug("---- end(%d) ----\n", err);
 
 err_out:
@@ -272,15 +265,16 @@ err_out:
 
 #define TEST_RUNNING -3
 
-static int print_test_result(struct test_suite *t, int i, int subtest, int result, int width,
-			     int running)
+static int print_test_result(struct test_suite *t, int curr_suite, int curr_test_case,
+			     int result, int width, int running)
 {
-	if (has_subtests(t)) {
+	if (test_suite__num_test_cases(t) > 1) {
 		int subw = width > 2 ? width - 2 : width;
 
-		pr_info("%3d.%1d: %-*s:", i + 1, subtest + 1, subw, test_description(t, subtest));
+		pr_info("%3d.%1d: %-*s:", curr_suite + 1, curr_test_case + 1, subw,
+			test_description(t, curr_test_case));
 	} else
-		pr_info("%3d: %-*s:", i + 1, width, test_description(t, subtest));
+		pr_info("%3d: %-*s:", curr_suite + 1, width, test_description(t, curr_test_case));
 
 	switch (result) {
 	case TEST_RUNNING:
@@ -290,7 +284,7 @@ static int print_test_result(struct test_suite *t, int i, int subtest, int resul
 		pr_info(" Ok\n");
 		break;
 	case TEST_SKIP: {
-		const char *reason = skip_reason(t, subtest);
+		const char *reason = skip_reason(t, curr_test_case);
 
 		if (reason)
 			color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", reason);
@@ -312,7 +306,7 @@ static void finish_test(struct child_test **child_tests, int running_test, int c
 {
 	struct child_test *child_test = child_tests[running_test];
 	struct test_suite *t;
-	int i, subi, err;
+	int curr_suite, curr_test_case, err;
 	bool err_done = false;
 	struct strbuf err_output = STRBUF_INIT;
 	int last_running = -1;
@@ -323,15 +317,15 @@ static void finish_test(struct child_test **child_tests, int running_test, int c
 		return;
 	}
 	t = child_test->test;
-	i = child_test->test_num;
-	subi = child_test->subtest;
+	curr_suite = child_test->suite_num;
+	curr_test_case = child_test->test_case_num;
 	err = child_test->process.err;
 	/*
 	 * For test suites with subtests, display the suite name ahead of the
 	 * sub test names.
 	 */
-	if (has_subtests(t) && subi == 0)
-		pr_info("%3d: %-*s:\n", i + 1, width, test_description(t, -1));
+	if (test_suite__num_test_cases(t) > 1 && curr_test_case == 0)
+		pr_info("%3d: %-*s:\n", curr_suite + 1, width, test_description(t, -1));
 
 	/*
 	 * Busy loop reading from the child's stdout/stderr that are set to be
@@ -340,10 +334,11 @@ static void finish_test(struct child_test **child_tests, int running_test, int c
 	if (err > 0)
 		fcntl(err, F_SETFL, O_NONBLOCK);
 	if (verbose > 1) {
-		if (has_subtests(t))
-			pr_info("%3d.%1d: %s:\n", i + 1, subi + 1, test_description(t, subi));
+		if (test_suite__num_test_cases(t) > 1)
+			pr_info("%3d.%1d: %s:\n", curr_suite + 1, curr_test_case + 1,
+				test_description(t, curr_test_case));
 		else
-			pr_info("%3d: %s:\n", i + 1, test_description(t, -1));
+			pr_info("%3d: %s:\n", curr_suite + 1, test_description(t, -1));
 	}
 	while (!err_done) {
 		struct pollfd pfds[1] = {
@@ -368,7 +363,8 @@ static void finish_test(struct child_test **child_tests, int running_test, int c
 					 */
 					fprintf(debug_file(), PERF_COLOR_DELETE_LINE);
 				}
-				print_test_result(t, i, subi, TEST_RUNNING, width, running);
+				print_test_result(t, curr_suite, curr_test_case, TEST_RUNNING,
+						  width, running);
 				last_running = running;
 			}
 		}
@@ -406,14 +402,14 @@ static void finish_test(struct child_test **child_tests, int running_test, int c
 		fprintf(stderr, "%s", err_output.buf);
 
 	strbuf_release(&err_output);
-	print_test_result(t, i, subi, ret, width, /*running=*/0);
+	print_test_result(t, curr_suite, curr_test_case, ret, width, /*running=*/0);
 	if (err > 0)
 		close(err);
 	zfree(&child_tests[running_test]);
 }
 
-static int start_test(struct test_suite *test, int i, int subi, struct child_test **child,
-		int width, int pass)
+static int start_test(struct test_suite *test, int curr_suite, int curr_test_case,
+		struct child_test **child, int width, int pass)
 {
 	int err;
 
@@ -421,17 +417,18 @@ static int start_test(struct test_suite *test, int i, int subi, struct child_tes
 	if (dont_fork) {
 		if (pass == 1) {
 			pr_debug("--- start ---\n");
-			err = test_function(test, subi)(test, subi);
+			err = test_function(test, curr_test_case)(test, curr_test_case);
 			pr_debug("---- end ----\n");
-			print_test_result(test, i, subi, err, width, /*running=*/0);
+			print_test_result(test, curr_suite, curr_test_case, err, width,
+					  /*running=*/0);
 		}
 		return 0;
 	}
-	if (pass == 1 && !sequential && test_exclusive(test, subi)) {
+	if (pass == 1 && !sequential && test_exclusive(test, curr_test_case)) {
 		/* When parallel, skip exclusive tests on the first pass. */
 		return 0;
 	}
-	if (pass != 1 && (sequential || !test_exclusive(test, subi))) {
+	if (pass != 1 && (sequential || !test_exclusive(test, curr_test_case))) {
 		/* Sequential and non-exclusive tests were run on the first pass. */
 		return 0;
 	}
@@ -440,8 +437,8 @@ static int start_test(struct test_suite *test, int i, int subi, struct child_tes
 		return -ENOMEM;
 
 	(*child)->test = test;
-	(*child)->test_num = i;
-	(*child)->subtest = subi;
+	(*child)->suite_num = curr_suite;
+	(*child)->test_case_num = curr_test_case;
 	(*child)->process.pid = -1;
 	(*child)->process.no_stdin = 1;
 	if (verbose <= 0) {
@@ -481,20 +478,16 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
 	int err = 0;
 
 	for (struct test_suite **t = suites; *t; t++) {
-		int len = strlen(test_description(*t, -1));
+		int i, len = strlen(test_description(*t, -1));
 
 		if (width < len)
 			width = len;
 
-		if (has_subtests(*t)) {
-			for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) {
-				len = strlen(test_description(*t, subi));
-				if (width < len)
-					width = len;
-				num_tests++;
-			}
-		} else {
-			num_tests++;
+		test_suite__for_each_test_case(*t, i) {
+			len = strlen(test_description(*t, i));
+			if (width < len)
+				width = len;
+			num_tests += runs_per_test;
 		}
 	}
 	child_tests = calloc(num_tests, sizeof(*child_tests));
@@ -512,7 +505,7 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
 				continue;
 
 			pr_debug3("Killing %d pid %d\n",
-				  child_test->test_num + 1,
+				  child_test->suite_num + 1,
 				  child_test->process.pid);
 			kill(child_test->process.pid, err);
 		}
@@ -528,50 +521,48 @@ static int __cmd_test(struct test_suite **suites, int argc, const char *argv[],
 	 */
 	for (int pass = 1; pass <= 2; pass++) {
 		int child_test_num = 0;
-		int i = 0;
+		int curr_suite = 0;
 
-		for (struct test_suite **t = suites; *t; t++) {
-			int curr = i++;
+		for (struct test_suite **t = suites; *t; t++, curr_suite++) {
+			int curr_test_case;
 
-			if (!perf_test__matches(test_description(*t, -1), curr, argc, argv)) {
+			if (!perf_test__matches(test_description(*t, -1), curr_suite, argc, argv)) {
 				/*
 				 * Test suite shouldn't be run based on
-				 * description. See if subtest should.
+				 * description. See if any test case should.
 				 */
 				bool skip = true;
 
-				for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) {
-					if (perf_test__matches(test_description(*t, subi),
-								curr, argc, argv))
+				test_suite__for_each_test_case(*t, curr_test_case) {
+					if (perf_test__matches(test_description(*t, curr_test_case),
+							       curr_suite, argc, argv)) {
 						skip = false;
+						break;
+					}
 				}
-
 				if (skip)
 					continue;
 			}
 
-			if (intlist__find(skiplist, i)) {
-				pr_info("%3d: %-*s:", curr + 1, width, test_description(*t, -1));
+			if (intlist__find(skiplist, curr_suite + 1)) {
+				pr_info("%3d: %-*s:", curr_suite + 1, width,
+					test_description(*t, -1));
 				color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
 				continue;
 			}
 
-			if (!has_subtests(*t)) {
-				err = start_test(*t, curr, -1, &child_tests[child_test_num++],
-						 width, pass);
-				if (err)
-					goto err_out;
-				continue;
-			}
-			for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) {
-				if (!perf_test__matches(test_description(*t, subi),
-							curr, argc, argv))
-					continue;
-
-				err = start_test(*t, curr, subi, &child_tests[child_test_num++],
-						 width, pass);
-				if (err)
-					goto err_out;
+			for (unsigned int run = 0; run < runs_per_test; run++) {
+				test_suite__for_each_test_case(*t, curr_test_case) {
+					if (!perf_test__matches(test_description(*t, curr_test_case),
+								curr_suite, argc, argv))
+						continue;
+
+					err = start_test(*t, curr_suite, curr_test_case,
+							 &child_tests[child_test_num++],
+							 width, pass);
+					if (err)
+						goto err_out;
+				}
 			}
 		}
 		if (!sequential) {
@@ -592,25 +583,24 @@ err_out:
 	return err;
 }
 
-static int perf_test__list(struct test_suite **suites, int argc, const char **argv)
+static int perf_test__list(FILE *fp, struct test_suite **suites, int argc, const char **argv)
 {
-	int i = 0;
+	int curr_suite = 0;
 
-	for (struct test_suite **t = suites; *t; t++) {
-		int curr = i++;
+	for (struct test_suite **t = suites; *t; t++, curr_suite++) {
+		int curr_test_case;
 
-		if (!perf_test__matches(test_description(*t, -1), curr, argc, argv))
+		if (!perf_test__matches(test_description(*t, -1), curr_suite, argc, argv))
 			continue;
 
-		pr_info("%3d: %s\n", i, test_description(*t, -1));
+		fprintf(fp, "%3d: %s\n", curr_suite + 1, test_description(*t, -1));
 
-		if (has_subtests(*t)) {
-			int subn = num_subtests(*t);
-			int subi;
+		if (test_suite__num_test_cases(*t) <= 1)
+			continue;
 
-			for (subi = 0; subi < subn; subi++)
-				pr_info("%3d:%1d: %s\n", i, subi + 1,
-					test_description(*t, subi));
+		test_suite__for_each_test_case(*t, curr_test_case) {
+			fprintf(fp, "%3d.%1d: %s\n", curr_suite + 1, curr_test_case + 1,
+				test_description(*t, curr_test_case));
 		}
 	}
 	return 0;
@@ -667,27 +657,24 @@ static struct test_suite **build_suites(void)
 	if (suites[2] == NULL)
 		suites[2] = create_script_test_suites();
 
-#define for_each_test(t)						\
+#define for_each_suite(suite)						\
 	for (size_t i = 0, j = 0; i < ARRAY_SIZE(suites); i++, j = 0)	\
-		while ((t = suites[i][j++]) != NULL)
+		while ((suite = suites[i][j++]) != NULL)
 
-	for_each_test(t)
+	for_each_suite(t)
 		num_suites++;
 
 	result = calloc(num_suites + 1, sizeof(struct test_suite *));
 
 	for (int pass = 1; pass <= 2; pass++) {
-		for_each_test(t) {
+		for_each_suite(t) {
 			bool exclusive = false;
+			int curr_test_case;
 
-			if (!has_subtests(t)) {
-				exclusive = test_exclusive(t, -1);
-			} else {
-				for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) {
-					if (test_exclusive(t, subi)) {
-						exclusive = true;
-						break;
-					}
+			test_suite__for_each_test_case(t, curr_test_case) {
+				if (test_exclusive(t, curr_test_case)) {
+					exclusive = true;
+					break;
 				}
 			}
 			if ((!exclusive && pass == 1) || (exclusive && pass == 2))
@@ -695,7 +682,7 @@ static struct test_suite **build_suites(void)
 		}
 	}
 	return result;
-#undef for_each_test
+#undef for_each_suite
 }
 
 int cmd_test(int argc, const char **argv)
@@ -715,6 +702,8 @@ int cmd_test(int argc, const char **argv)
 		    "Do not fork for testcase"),
 	OPT_BOOLEAN('S', "sequential", &sequential,
 		    "Run the tests one after another rather than in parallel"),
+	OPT_UINTEGER('r', "runs-per-test", &runs_per_test,
+		     "Run each test the given number of times, default 1"),
 	OPT_STRING('w', "workload", &workload, "work", "workload to run for testing, use '--list-workloads' to list the available ones."),
 	OPT_BOOLEAN(0, "list-workloads", &list_workloads, "List the available builtin workloads to use with -w/--workload"),
 	OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"),
@@ -738,7 +727,7 @@ int cmd_test(int argc, const char **argv)
 	argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0);
 	if (argc >= 1 && !strcmp(argv[0], "list")) {
 		suites = build_suites();
-		ret = perf_test__list(suites, argc - 1, argv + 1);
+		ret = perf_test__list(stdout, suites, argc - 1, argv + 1);
 		free(suites);
 		return ret;
 	}
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 27c82cfb7e7d..b1abb34d7818 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <errno.h>
+#include <linux/kconfig.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <inttypes.h>
@@ -8,6 +9,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <sys/param.h>
+#include <sys/utsname.h>
 #include <perf/cpumap.h>
 #include <perf/evlist.h>
 #include <perf/mmap.h>
@@ -176,16 +178,104 @@ static int read_objdump_output(FILE *f, void *buf, size_t *len, u64 start_addr)
 	return err;
 }
 
+/*
+ * Only gets GNU objdump version. Returns 0 for llvm-objdump.
+ */
+static int objdump_version(void)
+{
+	size_t line_len;
+	char cmd[PATH_MAX * 2];
+	char *line = NULL;
+	const char *fmt;
+	FILE *f;
+	int ret;
+
+	int version_tmp, version_num = 0;
+	char *version = 0, *token;
+
+	fmt = "%s --version";
+	ret = snprintf(cmd, sizeof(cmd), fmt, test_objdump_path);
+	if (ret <= 0 || (size_t)ret >= sizeof(cmd))
+		return -1;
+	/* Ignore objdump errors */
+	strcat(cmd, " 2>/dev/null");
+	f = popen(cmd, "r");
+	if (!f) {
+		pr_debug("popen failed\n");
+		return -1;
+	}
+	/* Get first line of objdump --version output */
+	ret = getline(&line, &line_len, f);
+	pclose(f);
+	if (ret < 0) {
+		pr_debug("getline failed\n");
+		return -1;
+	}
+
+	token = strsep(&line, " ");
+	if (token != NULL && !strcmp(token, "GNU")) {
+		// version is last part of first line of objdump --version output.
+		while ((token = strsep(&line, " ")))
+			version = token;
+
+		// Convert version into a format we can compare with
+		token = strsep(&version, ".");
+		version_num = atoi(token);
+		if (version_num)
+			version_num *= 10000;
+
+		token = strsep(&version, ".");
+		version_tmp = atoi(token);
+		if (token)
+			version_num += version_tmp * 100;
+
+		token = strsep(&version, ".");
+		version_tmp = atoi(token);
+		if (token)
+			version_num += version_tmp;
+	}
+
+	return version_num;
+}
+
 static int read_via_objdump(const char *filename, u64 addr, void *buf,
 			    size_t len)
 {
+	u64 stop_address = addr + len;
+	struct utsname uname_buf;
 	char cmd[PATH_MAX * 2];
 	const char *fmt;
 	FILE *f;
 	int ret;
 
+	ret = uname(&uname_buf);
+	if (ret) {
+		pr_debug("uname failed\n");
+		return -1;
+	}
+
+	if (!strncmp(uname_buf.machine, "riscv", 5)) {
+		int version = objdump_version();
+
+		/* Default to this workaround if version parsing fails */
+		if (version < 0 || version > 24100) {
+			/*
+			 * Starting at riscv objdump version 2.41, dumping in
+			 * the middle of an instruction is not supported. riscv
+			 * instructions are aligned along 2-byte intervals and
+			 * can be either 2-bytes or 4-bytes. This makes it
+			 * possible that the stop-address lands in the middle of
+			 * a 4-byte instruction. Increase the stop_address by
+			 * two to ensure an instruction is not cut in half, but
+			 * leave the len as-is so only the expected number of
+			 * bytes are collected.
+			 */
+			stop_address += 2;
+		}
+	}
+
 	fmt = "%s -z -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s";
-	ret = snprintf(cmd, sizeof(cmd), fmt, test_objdump_path, addr, addr + len,
+	ret = snprintf(cmd, sizeof(cmd), fmt, test_objdump_path, addr, stop_address,
 		       filename);
 	if (ret <= 0 || (size_t)ret >= sizeof(cmd))
 		return -1;
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 2f0168b2a5a9..2354246afc5a 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -156,21 +156,54 @@ static int test__cpu_map_print(struct test_suite *test __maybe_unused, int subte
 	return 0;
 }
 
-static int test__cpu_map_merge(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
+static int __test__cpu_map_merge(const char *lhs, const char *rhs, int nr, const char *expected)
 {
-	struct perf_cpu_map *a = perf_cpu_map__new("4,2,1");
-	struct perf_cpu_map *b = perf_cpu_map__new("4,5,7");
-	struct perf_cpu_map *c = perf_cpu_map__merge(a, b);
+	struct perf_cpu_map *a = perf_cpu_map__new(lhs);
+	struct perf_cpu_map *b = perf_cpu_map__new(rhs);
 	char buf[100];
 
-	TEST_ASSERT_VAL("failed to merge map: bad nr", perf_cpu_map__nr(c) == 5);
-	cpu_map__snprint(c, buf, sizeof(buf));
-	TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, "1-2,4-5,7"));
+	perf_cpu_map__merge(&a, b);
+	TEST_ASSERT_VAL("failed to merge map: bad nr", perf_cpu_map__nr(a) == nr);
+	cpu_map__snprint(a, buf, sizeof(buf));
+	TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, expected));
 	perf_cpu_map__put(b);
-	perf_cpu_map__put(c);
+
+	/*
+	 * If 'b' is a superset of 'a', 'a' points to the same map with the
+	 * map 'b'. In this case, the owner 'b' has released the resource above
+	 * but 'a' still keeps the ownership, the reference counter should be 1.
+	 */
+	TEST_ASSERT_VAL("unexpected refcnt: bad result",
+			refcount_read(perf_cpu_map__refcnt(a)) == 1);
+
+	perf_cpu_map__put(a);
 	return 0;
 }
 
+static int test__cpu_map_merge(struct test_suite *test __maybe_unused,
+			       int subtest __maybe_unused)
+{
+	int ret;
+
+	ret = __test__cpu_map_merge("4,2,1", "4,5,7", 5, "1-2,4-5,7");
+	if (ret)
+		return ret;
+	ret = __test__cpu_map_merge("1-8", "6-9", 9, "1-9");
+	if (ret)
+		return ret;
+	ret = __test__cpu_map_merge("1-8,12-20", "6-9,15", 18, "1-9,12-20");
+	if (ret)
+		return ret;
+	ret = __test__cpu_map_merge("4,2,1", "1", 3, "1-2,4");
+	if (ret)
+		return ret;
+	ret = __test__cpu_map_merge("1", "4,2,1", 3, "1-2,4");
+	if (ret)
+		return ret;
+	ret = __test__cpu_map_merge("1", "1", 1, "1");
+	return ret;
+}
+
 static int __test__cpu_map_intersect(const char *lhs, const char *rhs, int nr, const char *expected)
 {
 	struct perf_cpu_map *a = perf_cpu_map__new(lhs);
@@ -219,30 +252,29 @@ static int test__cpu_map_equal(struct test_suite *test __maybe_unused, int subte
 	struct perf_cpu_map *empty = perf_cpu_map__intersect(one, two);
 	struct perf_cpu_map *pair = perf_cpu_map__new("1-2");
 	struct perf_cpu_map *tmp;
-	struct perf_cpu_map *maps[] = {empty, any, one, two, pair};
+	struct perf_cpu_map **maps[] = {&empty, &any, &one, &two, &pair};
 
 	for (size_t i = 0; i < ARRAY_SIZE(maps); i++) {
 		/* Maps equal themself. */
-		TEST_ASSERT_VAL("equal", perf_cpu_map__equal(maps[i], maps[i]));
+		TEST_ASSERT_VAL("equal", perf_cpu_map__equal(*maps[i], *maps[i]));
 		for (size_t j = 0; j < ARRAY_SIZE(maps); j++) {
 			/* Maps dont't equal each other. */
 			if (i == j)
 				continue;
-			TEST_ASSERT_VAL("not equal", !perf_cpu_map__equal(maps[i], maps[j]));
+			TEST_ASSERT_VAL("not equal", !perf_cpu_map__equal(*maps[i], *maps[j]));
 		}
 	}
 
 	/* Maps equal made maps. */
-	tmp = perf_cpu_map__merge(perf_cpu_map__get(one), two);
-	TEST_ASSERT_VAL("pair", perf_cpu_map__equal(pair, tmp));
-	perf_cpu_map__put(tmp);
+	perf_cpu_map__merge(&two, one);
+	TEST_ASSERT_VAL("pair", perf_cpu_map__equal(pair, two));
 
 	tmp = perf_cpu_map__intersect(pair, one);
 	TEST_ASSERT_VAL("one", perf_cpu_map__equal(one, tmp));
 	perf_cpu_map__put(tmp);
 
 	for (size_t i = 0; i < ARRAY_SIZE(maps); i++)
-		perf_cpu_map__put(maps[i]);
+		perf_cpu_map__put(*maps[i]);
 
 	return TEST_OK;
 }
diff --git a/tools/perf/tests/event_groups.c b/tools/perf/tests/event_groups.c
index ccd9d8b2903f..c119ff114948 100644
--- a/tools/perf/tests/event_groups.c
+++ b/tools/perf/tests/event_groups.c
@@ -10,9 +10,10 @@
 #include "header.h"
 #include "../perf-sys.h"
 
-/* hw: cycles, sw: context-switch, uncore: [arch dependent] */
+/* hw: cycles,instructions sw: context-switch, uncore: [arch dependent] */
 static int types[] = {0, 1, -1};
 static unsigned long configs[] = {0, 3, 0};
+static unsigned long configs_hw[] = {1};
 
 #define NR_UNCORE_PMUS 5
 
@@ -93,7 +94,18 @@ static int run_test(int i, int j, int k)
 		return erroneous ? 0 : -1;
 	}
 
-	sibling_fd2 = event_open(types[k], configs[k], group_fd);
+	/*
+	 * if all three events (leader and two sibling events)
+	 * are hardware events, use instructions as one of the
+	 * sibling event. There is event constraint in powerpc that
+	 * events using same counter cannot be programmed in a group.
+	 * Since PERF_COUNT_HW_INSTRUCTIONS is a generic hardware
+	 * event and present in all platforms, lets use that.
+	 */
+	if (!i && !j && !k)
+		sibling_fd2 = event_open(types[k], configs_hw[k], group_fd);
+	else
+		sibling_fd2 = event_open(types[k], configs[k], group_fd);
 	if (sibling_fd2 == -1) {
 		close(sibling_fd1);
 		close(group_fd);
@@ -124,9 +136,18 @@ static int test__event_groups(struct test_suite *text __maybe_unused, int subtes
 				if (r)
 					ret = TEST_FAIL;
 
-				pr_debug("0x%x 0x%lx, 0x%x 0x%lx, 0x%x 0x%lx: %s\n",
-					 types[i], configs[i], types[j], configs[j],
-					 types[k], configs[k], r ? "Fail" : "Pass");
+				/*
+				 * For all three events as HW events, second sibling
+				 * event is picked from configs_hw. So print accordingly
+				 */
+				if (!i && !j && !k)
+					pr_debug("0x%x 0x%lx, 0x%x 0x%lx, 0x%x 0x%lx: %s\n",
+						 types[i], configs[i], types[j], configs[j],
+						 types[k], configs_hw[k], r ? "Fail" : "Pass");
+				else
+					pr_debug("0x%x 0x%lx, 0x%x 0x%lx, 0x%x 0x%lx: %s\n",
+						 types[i], configs[i], types[j], configs[j],
+						 types[k], configs[k], r ? "Fail" : "Pass");
 			}
 		}
 	}
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index a7fcbd589752..0ee94caf9ec1 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -86,7 +86,6 @@ make_no_libdw_dwarf_unwind := NO_LIBDW_DWARF_UNWIND=1
 make_no_backtrace   := NO_BACKTRACE=1
 make_no_libcapstone := NO_CAPSTONE=1
 make_no_libnuma     := NO_LIBNUMA=1
-make_no_libaudit    := NO_LIBAUDIT=1
 make_no_libbionic   := NO_LIBBIONIC=1
 make_no_auxtrace    := NO_AUXTRACE=1
 make_no_libbpf	    := NO_LIBBPF=1
@@ -97,7 +96,6 @@ make_no_libllvm     := NO_LIBLLVM=1
 make_with_babeltrace:= LIBBABELTRACE=1
 make_with_coresight := CORESIGHT=1
 make_no_sdt	    := NO_SDT=1
-make_no_syscall_tbl := NO_SYSCALL_TABLE=1
 make_no_libpfm4     := NO_LIBPFM4=1
 make_with_gtk2      := GTK2=1
 make_refcnt_check   := EXTRA_CFLAGS="-DREFCNT_CHECKING=1"
@@ -122,10 +120,10 @@ make_static         := LDFLAGS=-static NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX3
 # all the NO_* variable combined
 make_minimal        := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_GTK2=1
 make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_BACKTRACE=1
-make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
+make_minimal        += NO_LIBNUMA=1 NO_LIBBIONIC=1
 make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
 make_minimal        += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1
-make_minimal        += NO_LIBCAP=1 NO_SYSCALL_TABLE=1 NO_CAPSTONE=1
+make_minimal        += NO_LIBCAP=1 NO_CAPSTONE=1
 
 # $(run) contains all available tests
 run := make_pure
@@ -158,7 +156,6 @@ run += make_no_libdw_dwarf_unwind
 run += make_no_backtrace
 run += make_no_libcapstone
 run += make_no_libnuma
-run += make_no_libaudit
 run += make_no_libbionic
 run += make_no_auxtrace
 run += make_no_libbpf
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 82a19674a38f..5ec2e5607987 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -54,8 +54,6 @@ static bool test_perf_config(const struct perf_evsel *evsel, __u64 expected_conf
 	return (evsel->attr.config & PERF_HW_EVENT_MASK) == expected_config;
 }
 
-#ifdef HAVE_LIBTRACEEVENT
-
 #if defined(__s390x__)
 /* Return true if kvm module is available and loaded. Test this
  * and return success when trace point kvm_s390_create_vm
@@ -112,7 +110,6 @@ static int test__checkevent_tracepoint_multi(struct evlist *evlist)
 	}
 	return TEST_OK;
 }
-#endif /* HAVE_LIBTRACEEVENT */
 
 static int test__checkevent_raw(struct evlist *evlist)
 {
@@ -311,7 +308,6 @@ static int test__checkevent_breakpoint_rw(struct evlist *evlist)
 	return TEST_OK;
 }
 
-#ifdef HAVE_LIBTRACEEVENT
 static int test__checkevent_tracepoint_modifier(struct evlist *evlist)
 {
 	struct evsel *evsel = evlist__first(evlist);
@@ -340,7 +336,6 @@ test__checkevent_tracepoint_multi_modifier(struct evlist *evlist)
 
 	return test__checkevent_tracepoint_multi(evlist);
 }
-#endif /* HAVE_LIBTRACEEVENT */
 
 static int test__checkevent_raw_modifier(struct evlist *evlist)
 {
@@ -629,7 +624,6 @@ static int test__checkevent_pmu(struct evlist *evlist)
 	return TEST_OK;
 }
 
-#ifdef HAVE_LIBTRACEEVENT
 static int test__checkevent_list(struct evlist *evlist)
 {
 	struct evsel *evsel = evlist__first(evlist);
@@ -671,7 +665,6 @@ static int test__checkevent_list(struct evlist *evlist)
 
 	return TEST_OK;
 }
-#endif
 
 static int test__checkevent_pmu_name(struct evlist *evlist)
 {
@@ -971,7 +964,6 @@ static int test__group2(struct evlist *evlist)
 	return TEST_OK;
 }
 
-#ifdef HAVE_LIBTRACEEVENT
 static int test__group3(struct evlist *evlist __maybe_unused)
 {
 	struct evsel *evsel, *group1_leader = NULL, *group2_leader = NULL;
@@ -1078,7 +1070,6 @@ static int test__group3(struct evlist *evlist __maybe_unused)
 	}
 	return TEST_OK;
 }
-#endif
 
 static int test__group4(struct evlist *evlist __maybe_unused)
 {
@@ -1813,7 +1804,6 @@ static int test__term_equal_legacy(struct evlist *evlist)
 	return TEST_OK;
 }
 
-#ifdef HAVE_LIBTRACEEVENT
 static int count_tracepoints(void)
 {
 	struct dirent *events_ent;
@@ -1867,7 +1857,6 @@ static int test__all_tracepoints(struct evlist *evlist)
 
 	return test__checkevent_tracepoint_multi(evlist);
 }
-#endif /* HAVE_LIBTRACEVENT */
 
 struct evlist_test {
 	const char *name;
@@ -1876,7 +1865,6 @@ struct evlist_test {
 };
 
 static const struct evlist_test test__events[] = {
-#ifdef HAVE_LIBTRACEEVENT
 	{
 		.name  = "syscalls:sys_enter_openat",
 		.check = test__checkevent_tracepoint,
@@ -1887,7 +1875,6 @@ static const struct evlist_test test__events[] = {
 		.check = test__checkevent_tracepoint_multi,
 		/* 1 */
 	},
-#endif
 	{
 		.name  = "r1a",
 		.check = test__checkevent_raw,
@@ -1938,7 +1925,6 @@ static const struct evlist_test test__events[] = {
 		.check = test__checkevent_breakpoint_w,
 		/* 1 */
 	},
-#ifdef HAVE_LIBTRACEEVENT
 	{
 		.name  = "syscalls:sys_enter_openat:k",
 		.check = test__checkevent_tracepoint_modifier,
@@ -1949,7 +1935,6 @@ static const struct evlist_test test__events[] = {
 		.check = test__checkevent_tracepoint_multi_modifier,
 		/* 3 */
 	},
-#endif
 	{
 		.name  = "r1a:kp",
 		.check = test__checkevent_raw_modifier,
@@ -1995,13 +1980,11 @@ static const struct evlist_test test__events[] = {
 		.check = test__checkevent_breakpoint_w_modifier,
 		/* 2 */
 	},
-#ifdef HAVE_LIBTRACEEVENT
 	{
 		.name  = "r1,syscalls:sys_enter_openat:k,1:1:hp",
 		.check = test__checkevent_list,
 		/* 3 */
 	},
-#endif
 	{
 		.name  = "instructions:G",
 		.check = test__checkevent_exclude_host_modifier,
@@ -2032,13 +2015,11 @@ static const struct evlist_test test__events[] = {
 		.check = test__group2,
 		/* 9 */
 	},
-#ifdef HAVE_LIBTRACEEVENT
 	{
 		.name  = "group1{syscalls:sys_enter_openat:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
 		.check = test__group3,
 		/* 0 */
 	},
-#endif
 	{
 		.name  = "{cycles:u,instructions:kp}:p",
 		.check = test__group4,
@@ -2049,13 +2030,11 @@ static const struct evlist_test test__events[] = {
 		.check = test__group5,
 		/* 2 */
 	},
-#ifdef HAVE_LIBTRACEEVENT
 	{
 		.name  = "*:*",
 		.check = test__all_tracepoints,
 		/* 3 */
 	},
-#endif
 	{
 		.name  = "{cycles,cache-misses:G}:H",
 		.check = test__group_gh1,
@@ -2111,7 +2090,7 @@ static const struct evlist_test test__events[] = {
 		.check = test__checkevent_breakpoint_len_rw_modifier,
 		/* 4 */
 	},
-#if defined(__s390x__) && defined(HAVE_LIBTRACEEVENT)
+#if defined(__s390x__)
 	{
 		.name  = "kvm-s390:kvm_s390_create_vm",
 		.check = test__checkevent_tracepoint,
@@ -2265,13 +2244,11 @@ static const struct evlist_test test__events[] = {
 		.check = test__checkevent_breakpoint_2_events,
 		/* 3 */
 	},
-#ifdef HAVE_LIBTRACEEVENT
 	{
 		.name = "9p:9p_client_req",
 		.check = test__checkevent_tracepoint,
 		/* 4 */
 	},
-#endif
 };
 
 static const struct evlist_test test__events_pmu[] = {
diff --git a/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh b/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh
index bead723e34af..8226449ac5c3 100755
--- a/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh
+++ b/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+# perf_probe :: Reject blacklisted probes (exclusive)
 # SPDX-License-Identifier: GPL-2.0
 
 #
@@ -22,7 +22,7 @@ TEST_RESULT=0
 BLACKFUNC_LIST=`head -n 5 /sys/kernel/debug/kprobes/blacklist 2> /dev/null | cut -f2`
 if [ -z "$BLACKFUNC_LIST" ]; then
 	print_overall_skipped
-	exit 0
+	exit 2
 fi
 
 # try to find vmlinux with DWARF debug info
diff --git a/tools/perf/tests/shell/base_probe/test_adding_kernel.sh b/tools/perf/tests/shell/base_probe/test_adding_kernel.sh
index d541ffd44a93..df288cf90cd6 100755
--- a/tools/perf/tests/shell/base_probe/test_adding_kernel.sh
+++ b/tools/perf/tests/shell/base_probe/test_adding_kernel.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Add 'perf probe's, list and remove them
+# perf_probe :: Add probes, list and remove them (exclusive)
 # SPDX-License-Identifier: GPL-2.0
 
 #
@@ -33,7 +33,7 @@ fi
 check_kprobes_available
 if [ $? -ne 0 ]; then
 	print_overall_skipped
-	exit 0
+	exit 2
 fi
 
 
@@ -169,7 +169,7 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "force-adding probes :: second pr
 (( TEST_RESULT += $? ))
 
 # adding existing probe with '--force' should pass
-NO_OF_PROBES=`$CMD_PERF probe -l | wc -l`
+NO_OF_PROBES=`$CMD_PERF probe -l $TEST_PROBE| wc -l`
 $CMD_PERF probe --force --add $TEST_PROBE 2> $LOGS_DIR/adding_kernel_forceadd_03.err
 PERF_EXIT_CODE=$?
 
@@ -205,7 +205,7 @@ print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "using doubled probe"
 $CMD_PERF probe --del \* 2> $LOGS_DIR/adding_kernel_removing_wildcard.err
 PERF_EXIT_CODE=$?
 
-../common/check_all_lines_matched.pl "Removed event: probe:$TEST_PROBE" "Removed event: probe:${TEST_PROBE}_1" < $LOGS_DIR/adding_kernel_removing_wildcard.err
+../common/check_all_patterns_found.pl "Removed event: probe:$TEST_PROBE" "Removed event: probe:${TEST_PROBE}_1" < $LOGS_DIR/adding_kernel_removing_wildcard.err
 CHECK_EXIT_CODE=$?
 
 print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "removing multiple probes"
diff --git a/tools/perf/tests/shell/base_probe/test_basic.sh b/tools/perf/tests/shell/base_probe/test_basic.sh
index 09669ec479f2..9d8b5afbeddd 100755
--- a/tools/perf/tests/shell/base_probe/test_basic.sh
+++ b/tools/perf/tests/shell/base_probe/test_basic.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+# perf_probe :: Basic perf probe functionality (exclusive)
 # SPDX-License-Identifier: GPL-2.0
 
 #
@@ -19,7 +19,7 @@ TEST_RESULT=0
 
 if ! check_kprobes_available; then
 	print_overall_skipped
-	exit 0
+	exit 2
 fi
 
 
diff --git a/tools/perf/tests/shell/base_probe/test_invalid_options.sh b/tools/perf/tests/shell/base_probe/test_invalid_options.sh
index 1fedfd8b0d0d..92f7254eb32a 100755
--- a/tools/perf/tests/shell/base_probe/test_invalid_options.sh
+++ b/tools/perf/tests/shell/base_probe/test_invalid_options.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+# perf_probe :: Reject invalid options (exclusive)
 # SPDX-License-Identifier: GPL-2.0
 
 #
@@ -19,9 +19,12 @@ TEST_RESULT=0
 
 if ! check_kprobes_available; then
 	print_overall_skipped
-	exit 0
+	exit 2
 fi
 
+# Check for presence of DWARF
+$CMD_PERF check feature -q dwarf
+[ $? -ne 0 ] && HINT_FAIL="Some of the tests need DWARF to run"
 
 ### missing argument
 
@@ -75,5 +78,5 @@ done
 
 
 # print overall results
-print_overall_results "$TEST_RESULT"
+print_overall_results "$TEST_RESULT" $HINT_FAIL
 exit $?
diff --git a/tools/perf/tests/shell/base_probe/test_line_semantics.sh b/tools/perf/tests/shell/base_probe/test_line_semantics.sh
index d8f4bde0f585..20435b6bf6bc 100755
--- a/tools/perf/tests/shell/base_probe/test_line_semantics.sh
+++ b/tools/perf/tests/shell/base_probe/test_line_semantics.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+# perf_probe :: Check patterns for line semantics (exclusive)
 # SPDX-License-Identifier: GPL-2.0
 
 #
@@ -20,9 +20,12 @@ TEST_RESULT=0
 
 if ! check_kprobes_available; then
 	print_overall_skipped
-	exit 0
+	exit 2
 fi
 
+# Check for presence of DWARF
+$CMD_PERF check feature -q dwarf
+[ $? -ne 0 ] && HINT_FAIL="Some of the tests need DWARF to run"
 
 ### acceptable --line descriptions
 
@@ -51,5 +54,5 @@ done
 
 
 # print overall results
-print_overall_results "$TEST_RESULT"
+print_overall_results "$TEST_RESULT" $HINT_FAIL
 exit $?
diff --git a/tools/perf/tests/shell/base_report/setup.sh b/tools/perf/tests/shell/base_report/setup.sh
index 4caa496660c6..b03501b2e8fc 100755
--- a/tools/perf/tests/shell/base_report/setup.sh
+++ b/tools/perf/tests/shell/base_report/setup.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+# perftool-testsuite :: perf_report
 # SPDX-License-Identifier: GPL-2.0
 
 #
diff --git a/tools/perf/tests/shell/base_report/test_basic.sh b/tools/perf/tests/shell/base_report/test_basic.sh
index 47677cbd4df3..2398eba4d3fd 100755
--- a/tools/perf/tests/shell/base_report/test_basic.sh
+++ b/tools/perf/tests/shell/base_report/test_basic.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+# perf_report :: Basic perf report options (exclusive)
 # SPDX-License-Identifier: GPL-2.0
 
 #
diff --git a/tools/perf/tests/shell/common/init.sh b/tools/perf/tests/shell/common/init.sh
index 075f17623c8e..26c7525651e0 100644
--- a/tools/perf/tests/shell/common/init.sh
+++ b/tools/perf/tests/shell/common/init.sh
@@ -46,10 +46,13 @@ print_results()
 print_overall_results()
 {
 	RETVAL="$1"; shift
+	TASK_COMMENT="$*"
+	test -n "$TASK_COMMENT" && TASK_COMMENT=":: $TASK_COMMENT"
+
 	if [ $RETVAL -eq 0 ]; then
 		_echo "$MALLPASS## [ PASS ] ##$MEND $TEST_NAME :: $THIS_TEST_NAME SUMMARY"
 	else
-		_echo "$MALLFAIL## [ FAIL ] ##$MEND $TEST_NAME :: $THIS_TEST_NAME SUMMARY :: $RETVAL failures found"
+		_echo "$MALLFAIL## [ FAIL ] ##$MEND $TEST_NAME :: $THIS_TEST_NAME SUMMARY :: $RETVAL failures found $TASK_COMMENT"
 	fi
 	return $RETVAL
 }
@@ -85,7 +88,7 @@ consider_skipping()
 	# the runmode of a testcase needs to be at least the current suite's runmode
 	if [ $PERFTOOL_TESTSUITE_RUNMODE -lt $TESTCASE_RUNMODE ]; then
 		print_overall_skipped
-		exit 0
+		exit 2
 	fi
 }
 
diff --git a/tools/perf/tests/shell/coresight/Makefile b/tools/perf/tests/shell/coresight/Makefile
index b070e779703e..fa08fd9a5991 100644
--- a/tools/perf/tests/shell/coresight/Makefile
+++ b/tools/perf/tests/shell/coresight/Makefile
@@ -24,6 +24,6 @@ CLEANDIRS = $(SUBDIRS:%=clean-%)
 
 clean: $(CLEANDIRS)
 $(CLEANDIRS):
-	$(call QUIET_CLEAN, test-$(@:clean-%=%)) $(Q)$(MAKE) -C $(@:clean-%=%) clean >/dev/null
+	$(call QUIET_CLEAN, test-$(@:clean-%=%)) $(MAKE) -C $(@:clean-%=%) clean >/dev/null
 
 .PHONY: all clean $(SUBDIRS) $(CLEANDIRS) $(INSTALLDIRS)
diff --git a/tools/perf/tests/shell/ftrace.sh b/tools/perf/tests/shell/ftrace.sh
index 2df05052c324..c243731d2fbf 100755
--- a/tools/perf/tests/shell/ftrace.sh
+++ b/tools/perf/tests/shell/ftrace.sh
@@ -67,11 +67,8 @@ test_ftrace_latency() {
 
 test_ftrace_profile() {
     echo "perf ftrace profile test"
-    perf ftrace profile -m 16M sleep 0.1 > "${output}"
+    perf ftrace profile --graph-opts depth=5 sleep 0.1 > "${output}"
     grep ^# "${output}"
-    grep sleep "${output}"
-    grep schedule "${output}"
-    grep execve "${output}"
     time_re="[[:space:]]+1[[:digit:]]{5}\.[[:digit:]]{3}"
     # 100283.000 100283.000 100283.000          1   __x64_sys_clock_nanosleep
     # Check for one *clock_nanosleep line with a Count of just 1 that takes a bit more than 0.1 seconds
diff --git a/tools/perf/tests/shell/lib/perf_json_output_lint.py b/tools/perf/tests/shell/lib/perf_json_output_lint.py
index 8ddb85586131..b066d721f897 100644
--- a/tools/perf/tests/shell/lib/perf_json_output_lint.py
+++ b/tools/perf/tests/shell/lib/perf_json_output_lint.py
@@ -69,16 +69,16 @@ def check_json_output(expected_items):
   for item in json.loads(input):
     if expected_items != -1:
       count = len(item)
-      if count != expected_items and count >= 1 and count <= 7 and 'metric-value' in item:
+      if count not in expected_items and count >= 1 and count <= 7 and 'metric-value' in item:
         # Events that generate >1 metric may have isolated metric
         # values and possibly other prefixes like interval, core,
         # aggregate-number, or event-runtime/pcnt-running from multiplexing.
         pass
-      elif count != expected_items and count >= 1 and count <= 5 and 'metricgroup' in item:
+      elif count not in expected_items and count >= 1 and count <= 5 and 'metricgroup' in item:
         pass
-      elif count == expected_items + 1 and 'metric-threshold' in item:
+      elif count - 1 in expected_items and 'metric-threshold' in item:
           pass
-      elif count != expected_items:
+      elif count not in expected_items:
         raise RuntimeError(f'wrong number of fields. counted {count} expected {expected_items}'
                            f' in \'{item}\'')
     for key, value in item.items():
@@ -90,11 +90,11 @@ def check_json_output(expected_items):
 
 try:
   if args.no_args or args.system_wide or args.event:
-    expected_items = 7
+    expected_items = [5, 7]
   elif args.interval or args.per_thread or args.system_wide_no_aggr:
-    expected_items = 8
+    expected_items = [6, 8]
   elif args.per_core or args.per_socket or args.per_node or args.per_die or args.per_cluster or args.per_cache:
-    expected_items = 9
+    expected_items = [7, 9]
   else:
     # If no option is specified, don't check the number of items.
     expected_items = -1
diff --git a/tools/perf/tests/shell/perftool-testsuite_probe.sh b/tools/perf/tests/shell/perftool-testsuite_probe.sh
index a0fec33a0358..7b1bfd0f888f 100755
--- a/tools/perf/tests/shell/perftool-testsuite_probe.sh
+++ b/tools/perf/tests/shell/perftool-testsuite_probe.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# perftool-testsuite_probe
+# perftool-testsuite_probe (exclusive)
 # SPDX-License-Identifier: GPL-2.0
 
 test -d "$(dirname "$0")/base_probe" || exit 2
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index 47a26f25db9f..d5e5193cceb6 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
-# probe libc's inet_pton & backtrace it with ping
+# probe libc's inet_pton & backtrace it with ping (exclusive)
 
 # Installs a probe on libc's inet_pton function, that will use uprobes,
 # then use 'perf trace' on a ping to localhost asking for just one packet
@@ -43,17 +43,8 @@ trace_libc_inet_pton_backtrace() {
 		echo "((__GI_)?getaddrinfo|text_to_binary_address)\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
 		echo "(gaih_inet|main)\+0x[[:xdigit:]]+[[:space:]]\(inlined|.*/bin/ping.*\)$" >> $expected
 		;;
-	ppc64|ppc64le)
-		eventattr='max-stack=4'
-		# Add gaih_inet to expected backtrace only if it is part of libc.
-		if nm $libc | grep -F -q gaih_inet.; then
-			echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
-		fi
-		echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
-		echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected
-		;;
 	*)
-		eventattr='max-stack=3'
+		eventattr='max-stack=4'
 		echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected
 		;;
 	esac
@@ -76,14 +67,25 @@ trace_libc_inet_pton_backtrace() {
 	fi
 	perf script -i $perf_data | tac | grep -m1 ^ping -B9 | tac > $perf_script
 
-	exec 3<$perf_script
 	exec 4<$expected
-	while read line <&3 && read -r pattern <&4; do
+	while read -r pattern <&4; do
+		echo "Pattern: $pattern"
 		[ -z "$pattern" ] && break
-		echo $line
-		echo "$line" | grep -E -q "$pattern"
-		if [ $? -ne 0 ] ; then
-			printf "FAIL: expected backtrace entry \"%s\" got \"%s\"\n" "$pattern" "$line"
+
+		found=0
+
+		# Search lines in the perf script result
+		exec 3<$perf_script
+		while read line <&3; do
+			[ -z "$line" ] && break
+			echo "  Matching: $line"
+			! echo "$line" | grep -E -q "$pattern"
+			found=$?
+			[ $found -eq 1 ] && break
+		done
+
+		if [ $found -ne 1 ] ; then
+			printf "FAIL: Didn't find the expected backtrace entry \"%s\"\n" "$pattern"
 			return 1
 		fi
 	done
diff --git a/tools/perf/tests/shell/stat+std_output.sh b/tools/perf/tests/shell/stat+std_output.sh
index cbf2894b2c84..0f7967be60af 100755
--- a/tools/perf/tests/shell/stat+std_output.sh
+++ b/tools/perf/tests/shell/stat+std_output.sh
@@ -13,7 +13,7 @@ stat_output=$(mktemp /tmp/__perf_test.stat_output.std.XXXXX)
 
 event_name=(cpu-clock task-clock context-switches cpu-migrations page-faults stalled-cycles-frontend stalled-cycles-backend cycles instructions branches branch-misses)
 event_metric=("CPUs utilized" "CPUs utilized" "/sec" "/sec" "/sec" "frontend cycles idle" "backend cycles idle" "GHz" "insn per cycle" "/sec" "of all branches")
-skip_metric=("stalled cycles per insn" "tma_" "retiring" "frontend_bound" "bad_speculation" "backend_bound")
+skip_metric=("stalled cycles per insn" "tma_" "retiring" "frontend_bound" "bad_speculation" "backend_bound" "TopdownL1" "percent of slots")
 
 cleanup() {
   rm -f "${stat_output}"
diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh
index 7a8adf81e4b3..68323d636fb7 100755
--- a/tools/perf/tests/shell/stat.sh
+++ b/tools/perf/tests/shell/stat.sh
@@ -187,7 +187,11 @@ test_hybrid() {
   # Run default Perf stat
   cycles_events=$(perf stat -- true 2>&1 | grep -E "/cycles/[uH]*|  cycles[:uH]*  " -c)
 
-  if [ "$pmus" -ne "$cycles_events" ]
+  # The expectation is that default output will have a cycles events on each
+  # hybrid PMU. In situations with no cycles PMU events, like virtualized, this
+  # can fall back to task-clock and so the end count may be 0. Fail if neither
+  # condition holds.
+  if [ "$pmus" -ne "$cycles_events" ] && [ "0" -ne "$cycles_events" ]
   then
     echo "hybrid test [Found $pmus PMUs but $cycles_events cycles events. Failed]"
     err=1
diff --git a/tools/perf/tests/shell/test_arm_spe.sh b/tools/perf/tests/shell/test_arm_spe.sh
index 3258368634f7..a69aab70dd8a 100755
--- a/tools/perf/tests/shell/test_arm_spe.sh
+++ b/tools/perf/tests/shell/test_arm_spe.sh
@@ -107,7 +107,37 @@ arm_spe_system_wide_test() {
 	arm_spe_report "SPE system-wide testing" $err
 }
 
+arm_spe_discard_test() {
+	echo "SPE discard mode"
+
+	for f in /sys/bus/event_source/devices/arm_spe_*; do
+		if [ -e "$f/format/discard" ]; then
+			cpu=$(cut -c -1 "$f/cpumask")
+			break
+		fi
+	done
+
+	if [ -z $cpu ]; then
+		arm_spe_report "SPE discard mode not present" 2
+		return
+	fi
+
+	# Test can use wildcard SPE instance and Perf will only open the event
+	# on instances that have that format flag. But make sure the target
+	# runs on an instance with discard mode otherwise we're not testing
+	# anything.
+	perf record -o ${perfdata} -e arm_spe/discard/ -N -B --no-bpf-event \
+		-- taskset --cpu-list $cpu true
+
+	if perf report -i ${perfdata} --stats | grep 'AUX events\|AUXTRACE events'; then
+		arm_spe_report "SPE discard mode found unexpected data" 1
+	else
+		arm_spe_report "SPE discard mode" 0
+	fi
+}
+
 arm_spe_snapshot_test
 arm_spe_system_wide_test
+arm_spe_discard_test
 
 exit $glb_err
diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh
index 5f14d0cb013f..e01df7581393 100755
--- a/tools/perf/tests/shell/test_brstack.sh
+++ b/tools/perf/tests/shell/test_brstack.sh
@@ -30,7 +30,7 @@ test_user_branches() {
 	echo "Testing user branch stack sampling"
 
 	perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- ${TESTPROG} > /dev/null 2>&1
-	perf script -i $TMPDIR/perf.data --fields brstacksym | xargs -n1 > $TMPDIR/perf.script
+	perf script -i $TMPDIR/perf.data --fields brstacksym | tr -s ' ' '\n' > $TMPDIR/perf.script
 
 	# example of branch entries:
 	# 	brstack_foo+0x14/brstack_bar+0x40/P/-/-/0/CALL
@@ -59,7 +59,7 @@ test_filter() {
 	echo "Testing branch stack filtering permutation ($test_filter_filter,$test_filter_expect)"
 
 	perf record -o $TMPDIR/perf.data --branch-filter $test_filter_filter,save_type,u -- ${TESTPROG} > /dev/null 2>&1
-	perf script -i $TMPDIR/perf.data --fields brstack | xargs -n1 > $TMPDIR/perf.script
+	perf script -i $TMPDIR/perf.data --fields brstack | tr -s ' ' '\n' | grep '.' > $TMPDIR/perf.script
 
 	# fail if we find any branch type that doesn't match any of the expected ones
 	# also consider UNKNOWN branch types (-)
diff --git a/tools/perf/tests/shell/test_intel_pt.sh b/tools/perf/tests/shell/test_intel_pt.sh
index e6f0070975f6..f3a9a040bacc 100755
--- a/tools/perf/tests/shell/test_intel_pt.sh
+++ b/tools/perf/tests/shell/test_intel_pt.sh
@@ -644,6 +644,33 @@ test_pipe()
 	return 0
 }
 
+test_pause_resume()
+{
+	echo "--- Test with pause / resume ---"
+	if ! perf_record_no_decode -o "${perfdatafile}" -e intel_pt/aux-action=start-paused/u uname ; then
+		echo "SKIP: pause / resume is not supported"
+		return 2
+	fi
+	if ! perf_record_no_bpf -o "${perfdatafile}" \
+			-e intel_pt/aux-action=start-paused/u \
+			-e instructions/period=50000,aux-action=resume,name=Resume/u \
+			-e instructions/period=100000,aux-action=pause,name=Pause/u uname  ; then
+		echo "perf record with pause / resume failed"
+		return 1
+	fi
+	if ! perf script -i "${perfdatafile}" --itrace=b -Fperiod,event | \
+			awk 'BEGIN {paused=1;branches=0}
+			     /Resume/ {paused=0}
+			     /branches/ {if (paused) exit 1;branches=1}
+			     /Pause/ {paused=1}
+			     END {if (!branches) exit 1}' ; then
+		echo "perf record with pause / resume failed"
+		return 1
+	fi
+	echo OK
+	return 0
+}
+
 count_result()
 {
 	if [ "$1" -eq 2 ] ; then
@@ -672,6 +699,7 @@ test_power_event			|| ret=$? ; count_result $ret ; ret=0
 test_no_tnt				|| ret=$? ; count_result $ret ; ret=0
 test_event_trace			|| ret=$? ; count_result $ret ; ret=0
 test_pipe				|| ret=$? ; count_result $ret ; ret=0
+test_pause_resume			|| ret=$? ; count_result $ret ; ret=0
 
 cleanup
 
diff --git a/tools/perf/tests/shell/test_task_analyzer.sh b/tools/perf/tests/shell/test_task_analyzer.sh
index 7d76fc63d995..e194fcf61df3 100755
--- a/tools/perf/tests/shell/test_task_analyzer.sh
+++ b/tools/perf/tests/shell/test_task_analyzer.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# perf script task-analyzer tests
+# perf script task-analyzer tests (exclusive)
 # SPDX-License-Identifier: GPL-2.0
 
 tmpdir=$(mktemp -d /tmp/perf-script-task-analyzer-XXXXX)
diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh
index 5a3b8a5a9b5c..8d1e6bbeac90 100755
--- a/tools/perf/tests/shell/trace_btf_enum.sh
+++ b/tools/perf/tests/shell/trace_btf_enum.sh
@@ -26,8 +26,12 @@ check_vmlinux() {
 trace_landlock() {
   echo "Tracing syscall ${syscall}"
 
-  # test flight just to see if landlock_add_rule and libbpf are available
-  $TESTPROG
+  # test flight just to see if landlock_add_rule is available
+  if ! perf trace $TESTPROG 2>&1 | grep -q landlock
+  then
+    echo "No landlock system call found, skipping to non-syscall tracing."
+    return
+  fi
 
   if perf trace -e $syscall $TESTPROG 2>&1 | \
      grep -q -E ".*landlock_add_rule\(ruleset_fd: 11, rule_type: (LANDLOCK_RULE_PATH_BENEATH|LANDLOCK_RULE_NET_PORT), rule_attr: 0x[a-f0-9]+, flags: 45\) = -1.*"
diff --git a/tools/perf/tests/shell/trace_btf_general.sh b/tools/perf/tests/shell/trace_btf_general.sh
new file mode 100755
index 000000000000..e9ee727f3433
--- /dev/null
+++ b/tools/perf/tests/shell/trace_btf_general.sh
@@ -0,0 +1,94 @@
+#!/bin/bash
+# perf trace BTF general tests
+# SPDX-License-Identifier: GPL-2.0
+
+err=0
+set -e
+
+# shellcheck source=lib/probe.sh
+. "$(dirname $0)"/lib/probe.sh
+
+file1=$(mktemp /tmp/file1_XXXX)
+file2=$(echo $file1 | sed 's/file1/file2/g')
+
+buffer="buffer content"
+perf_config_tmp=$(mktemp /tmp/.perfconfig_XXXXX)
+
+trap cleanup EXIT TERM INT HUP
+
+check_vmlinux() {
+  echo "Checking if vmlinux BTF exists"
+  if [ ! -f /sys/kernel/btf/vmlinux ]
+  then
+    echo "Skipped due to missing vmlinux BTF"
+    return 2
+  fi
+  return 0
+}
+
+trace_test_string() {
+  echo "Testing perf trace's string augmentation"
+  if ! perf trace -e renameat* --max-events=1 -- mv ${file1} ${file2} 2>&1 | \
+    grep -q -E "^mv/[0-9]+ renameat(2)?\(.*, \"${file1}\", .*, \"${file2}\", .*\) += +[0-9]+$"
+  then
+    echo "String augmentation test failed"
+    err=1
+  fi
+}
+
+trace_test_buffer() {
+  echo "Testing perf trace's buffer augmentation"
+  # echo will insert a newline (\10) at the end of the buffer
+  if ! perf trace -e write --max-events=1 -- echo "${buffer}" 2>&1 | \
+    grep -q -E "^echo/[0-9]+ write\([0-9]+, ${buffer}.*, [0-9]+\) += +[0-9]+$"
+  then
+    echo "Buffer augmentation test failed"
+    err=1
+  fi
+}
+
+trace_test_struct_btf() {
+  echo "Testing perf trace's struct augmentation"
+  if ! perf trace -e clock_nanosleep --force-btf --max-events=1 -- sleep 1 2>&1 | \
+    grep -q -E "^sleep/[0-9]+ clock_nanosleep\(0, 0, \{1,\}, 0x[0-9a-f]+\) += +[0-9]+$"
+  then
+    echo "BTF struct augmentation test failed"
+    err=1
+  fi
+}
+
+cleanup() {
+  rm -rf ${file1} ${file2} ${perf_config_tmp}
+}
+
+trap_cleanup() {
+  echo "Unexpected signal in ${FUNCNAME[1]}"
+  cleanup
+  exit 1
+}
+
+# don't overwrite user's perf config
+trace_config() {
+  export PERF_CONFIG=${perf_config_tmp}
+  perf config trace.show_arg_names=false trace.show_duration=false \
+    trace.show_timestamp=false trace.args_alignment=0
+}
+
+skip_if_no_perf_trace || exit 2
+check_vmlinux || exit 2
+
+trace_config
+
+trace_test_string
+
+if [ $err = 0 ]; then
+  trace_test_buffer
+fi
+
+if [ $err = 0 ]; then
+  trace_test_struct_btf
+fi
+
+cleanup
+
+exit $err
diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c
index e6fd934b027a..a67c756f90b8 100644
--- a/tools/perf/tests/sigtrap.c
+++ b/tools/perf/tests/sigtrap.c
@@ -56,6 +56,7 @@ static struct perf_event_attr make_event_attr(void)
 
 #ifdef HAVE_BPF_SKEL
 #include <bpf/btf.h>
+#include <util/btf.h>
 
 static struct btf *btf;
 
@@ -73,21 +74,6 @@ static void btf__exit(void)
 	btf = NULL;
 }
 
-static const struct btf_member *__btf_type__find_member_by_name(int type_id, const char *member_name)
-{
-	const struct btf_type *t = btf__type_by_id(btf, type_id);
-	const struct btf_member *m;
-	int i;
-
-	for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
-		const char *current_member_name = btf__name_by_offset(btf, m->name_off);
-		if (!strcmp(current_member_name, member_name))
-			return m;
-	}
-
-	return NULL;
-}
-
 static bool attr_has_sigtrap(void)
 {
 	int id;
@@ -101,7 +87,7 @@ static bool attr_has_sigtrap(void)
 	if (id < 0)
 		return false;
 
-	return __btf_type__find_member_by_name(id, "sigtrap") != NULL;
+	return __btf_type__find_member_by_name(btf, id, "sigtrap") != NULL;
 }
 
 static bool kernel_with_sleepable_spinlocks(void)
@@ -119,7 +105,7 @@ static bool kernel_with_sleepable_spinlocks(void)
 		return false;
 
 	// Only RT has a "lock" member for "struct spinlock"
-	member = __btf_type__find_member_by_name(id, "lock");
+	member = __btf_type__find_member_by_name(btf, id, "lock");
 	if (member == NULL)
 		return false;
 
diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c
index 6468cc0d0204..d60983657bad 100644
--- a/tools/perf/tests/stat.c
+++ b/tools/perf/tests/stat.c
@@ -27,7 +27,7 @@ static int process_stat_config_event(const struct perf_tool *tool __maybe_unused
 				     struct machine *machine __maybe_unused)
 {
 	struct perf_record_stat_config *config = &event->stat_config;
-	struct perf_stat_config stat_config = {};
+	struct perf_stat_config test_stat_config = {};
 
 #define HAS(term, val) \
 	has_term(config, PERF_STAT_CONFIG_TERM__##term, val)
@@ -39,25 +39,27 @@ static int process_stat_config_event(const struct perf_tool *tool __maybe_unused
 
 #undef HAS
 
-	perf_event__read_stat_config(&stat_config, config);
+	perf_event__read_stat_config(&test_stat_config, config);
 
-	TEST_ASSERT_VAL("wrong aggr_mode", stat_config.aggr_mode == AGGR_CORE);
-	TEST_ASSERT_VAL("wrong scale",     stat_config.scale == 1);
-	TEST_ASSERT_VAL("wrong interval",  stat_config.interval == 1);
+	TEST_ASSERT_VAL("wrong aggr_mode", test_stat_config.aggr_mode == AGGR_CORE);
+	TEST_ASSERT_VAL("wrong scale",     test_stat_config.scale == 1);
+	TEST_ASSERT_VAL("wrong interval",  test_stat_config.interval == 1);
 	return 0;
 }
 
 static int test__synthesize_stat_config(struct test_suite *test __maybe_unused,
 					int subtest __maybe_unused)
 {
-	struct perf_stat_config stat_config = {
+	struct perf_stat_config test_stat_config = {
 		.aggr_mode	= AGGR_CORE,
 		.scale		= 1,
 		.interval	= 1,
 	};
 
 	TEST_ASSERT_VAL("failed to synthesize stat_config",
-		!perf_event__synthesize_stat_config(NULL, &stat_config, process_stat_config_event, NULL));
+		!perf_event__synthesize_stat_config(NULL, &test_stat_config,
+						    process_stat_config_event,
+						    NULL));
 
 	return 0;
 }
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 5cab17a1942e..576f82a15015 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -583,4 +583,4 @@ out_err:
 	goto out;
 }
 
-DEFINE_SUITE("Track with sched_switch", switch_tracking);
+DEFINE_SUITE_EXCLUSIVE("Track with sched_switch", switch_tracking);
diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c
index cf3ae0c1d871..1d5759d08141 100644
--- a/tools/perf/tests/tests-scripts.c
+++ b/tools/perf/tests/tests-scripts.c
@@ -174,7 +174,7 @@ static void append_script(int dir_fd, const char *name, char *desc,
 	char filename[PATH_MAX], link[128];
 	struct test_suite *test_suite, **result_tmp;
 	struct test_case *tests;
-	size_t len;
+	ssize_t len;
 	char *exclusive;
 
 	snprintf(link, sizeof(link), "/proc/%d/fd/%d", getpid(), dir_fd);
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index cb58b43aa063..8aea344536b8 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -81,6 +81,16 @@ struct test_suite {
 		.test_cases = tests__##_name,		\
 	}
 
+#define DEFINE_SUITE_EXCLUSIVE(description, _name)	\
+	struct test_case tests__##_name[] = {           \
+		TEST_CASE_EXCLUSIVE(description, _name),\
+		{	.name = NULL, }			\
+	};						\
+	struct test_suite suite__##_name = {		\
+		.desc = description,			\
+		.test_cases = tests__##_name,		\
+	}
+
 /* Tests */
 DECLARE_SUITE(vmlinux_matches_kallsyms);
 DECLARE_SUITE(openat_syscall_event);
diff --git a/tools/perf/tests/workloads/landlock.c b/tools/perf/tests/workloads/landlock.c
index e2b5ef647c09..1f285b7b6236 100644
--- a/tools/perf/tests/workloads/landlock.c
+++ b/tools/perf/tests/workloads/landlock.c
@@ -10,7 +10,7 @@
  * 'perf test' workload) we just add the required types and defines here instead
  * of including linux/landlock, that isn't available in older systems.
  *
- * We are not interested in the the result of the syscall, just in intercepting
+ * We are not interested in the result of the syscall, just in intercepting
  * its arguments.
  */
 
diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh
index 30d3889b2957..b22890b8d272 100755
--- a/tools/perf/trace/beauty/arch_errno_names.sh
+++ b/tools/perf/trace/beauty/arch_errno_names.sh
@@ -57,7 +57,8 @@ create_arch_errno_table_func()
 	archlist="$1"
 	default="$2"
 
-	printf 'arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch)\n'
+	printf 'static arch_syscalls__strerrno_t *\n'
+	printf 'arch_syscalls__strerrno_function(const char *arch)\n'
 	printf '{\n'
 	for arch in $archlist; do
 		arch_str=$(arch_string "$arch")
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index d7e727345dab..135d6ce88fb3 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -754,7 +754,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
 				hbt->timer(hbt->arg);
 
 			if (delay_secs != 0) {
-				symbol__annotate_decay_histogram(sym, evsel->core.idx);
+				symbol__annotate_decay_histogram(sym, evsel);
 				hists__scnprintf_title(hists, title, sizeof(title));
 				annotate_browser__show(&browser->b, title, help);
 			}
diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c
index e437d7889de6..2d04ece833aa 100644
--- a/tools/perf/ui/browsers/scripts.c
+++ b/tools/perf/ui/browsers/scripts.c
@@ -1,16 +1,18 @@
 // SPDX-License-Identifier: GPL-2.0
-#include "../../builtin.h"
-#include "../../perf.h"
 #include "../../util/util.h" // perf_exe()
 #include "../util.h"
+#include "../../util/evlist.h"
 #include "../../util/hist.h"
 #include "../../util/debug.h"
+#include "../../util/session.h"
 #include "../../util/symbol.h"
 #include "../browser.h"
 #include "../libslang.h"
 #include "config.h"
+#include <linux/err.h>
 #include <linux/string.h>
 #include <linux/zalloc.h>
+#include <subcmd/exec-cmd.h>
 #include <stdlib.h>
 
 #define SCRIPT_NAMELEN	128
@@ -78,6 +80,177 @@ static int scripts_config(const char *var, const char *value, void *data)
 }
 
 /*
+ * Some scripts specify the required events in their "xxx-record" file,
+ * this function will check if the events in perf.data match those
+ * mentioned in the "xxx-record".
+ *
+ * Fixme: All existing "xxx-record" are all in good formats "-e event ",
+ * which is covered well now. And new parsing code should be added to
+ * cover the future complex formats like event groups etc.
+ */
+static int check_ev_match(int dir_fd, const char *scriptname, struct perf_session *session)
+{
+	char line[BUFSIZ];
+	FILE *fp;
+
+	{
+		char filename[FILENAME_MAX + 5];
+		int fd;
+
+		scnprintf(filename, sizeof(filename), "bin/%s-record", scriptname);
+		fd = openat(dir_fd, filename, O_RDONLY);
+		if (fd == -1)
+			return -1;
+		fp = fdopen(fd, "r");
+		if (!fp)
+			return -1;
+	}
+
+	while (fgets(line, sizeof(line), fp)) {
+		char *p = skip_spaces(line);
+
+		if (*p == '#')
+			continue;
+
+		while (strlen(p)) {
+			int match, len;
+			struct evsel *pos;
+			char evname[128];
+
+			p = strstr(p, "-e");
+			if (!p)
+				break;
+
+			p += 2;
+			p = skip_spaces(p);
+			len = strcspn(p, " \t");
+			if (!len)
+				break;
+
+			snprintf(evname, len + 1, "%s", p);
+
+			match = 0;
+			evlist__for_each_entry(session->evlist, pos) {
+				if (evsel__name_is(pos, evname)) {
+					match = 1;
+					break;
+				}
+			}
+
+			if (!match) {
+				fclose(fp);
+				return -1;
+			}
+		}
+	}
+
+	fclose(fp);
+	return 0;
+}
+
+/*
+ * Return -1 if none is found, otherwise the actual scripts number.
+ *
+ * Currently the only user of this function is the script browser, which
+ * will list all statically runnable scripts, select one, execute it and
+ * show the output in a perf browser.
+ */
+static int find_scripts(char **scripts_array, char **scripts_path_array, int num,
+		 int pathlen)
+{
+	struct dirent *script_dirent, *lang_dirent;
+	int scripts_dir_fd, lang_dir_fd;
+	DIR *scripts_dir, *lang_dir;
+	struct perf_session *session;
+	struct perf_data data = {
+		.path = input_name,
+		.mode = PERF_DATA_MODE_READ,
+	};
+	char *temp;
+	int i = 0;
+	const char *exec_path = get_argv_exec_path();
+
+	session = perf_session__new(&data, NULL);
+	if (IS_ERR(session))
+		return PTR_ERR(session);
+
+	{
+		char scripts_path[PATH_MAX];
+
+		snprintf(scripts_path, sizeof(scripts_path), "%s/scripts", exec_path);
+		scripts_dir_fd = open(scripts_path, O_DIRECTORY);
+		pr_err("Failed to open directory '%s'", scripts_path);
+		if (scripts_dir_fd == -1) {
+			perf_session__delete(session);
+			return -1;
+		}
+	}
+	scripts_dir = fdopendir(scripts_dir_fd);
+	if (!scripts_dir) {
+		close(scripts_dir_fd);
+		perf_session__delete(session);
+		return -1;
+	}
+
+	while ((lang_dirent = readdir(scripts_dir)) != NULL) {
+		if (lang_dirent->d_type != DT_DIR &&
+		    (lang_dirent->d_type == DT_UNKNOWN &&
+		     !is_directory_at(scripts_dir_fd, lang_dirent->d_name)))
+			continue;
+		if (!strcmp(lang_dirent->d_name, ".") || !strcmp(lang_dirent->d_name, ".."))
+			continue;
+
+#ifndef HAVE_LIBPERL_SUPPORT
+		if (strstr(lang_dirent->d_name, "perl"))
+			continue;
+#endif
+#ifndef HAVE_LIBPYTHON_SUPPORT
+		if (strstr(lang_dirent->d_name, "python"))
+			continue;
+#endif
+
+		lang_dir_fd = openat(scripts_dir_fd, lang_dirent->d_name, O_DIRECTORY);
+		if (lang_dir_fd == -1)
+			continue;
+		lang_dir = fdopendir(lang_dir_fd);
+		if (!lang_dir) {
+			close(lang_dir_fd);
+			continue;
+		}
+		while ((script_dirent = readdir(lang_dir)) != NULL) {
+			if (script_dirent->d_type == DT_DIR)
+				continue;
+			if (script_dirent->d_type == DT_UNKNOWN &&
+			    is_directory_at(lang_dir_fd, script_dirent->d_name))
+				continue;
+			/* Skip those real time scripts: xxxtop.p[yl] */
+			if (strstr(script_dirent->d_name, "top."))
+				continue;
+			if (i >= num)
+				break;
+			scnprintf(scripts_path_array[i], pathlen, "%s/scripts/%s/%s",
+				exec_path,
+				lang_dirent->d_name,
+				script_dirent->d_name);
+			temp = strchr(script_dirent->d_name, '.');
+			snprintf(scripts_array[i],
+				(temp - script_dirent->d_name) + 1,
+				"%s", script_dirent->d_name);
+
+			if (check_ev_match(lang_dir_fd, scripts_array[i], session))
+				continue;
+
+			i++;
+		}
+		closedir(lang_dir);
+	}
+
+	closedir(scripts_dir);
+	perf_session__delete(session);
+	return i;
+}
+
+/*
  * When success, will copy the full path of the selected script
  * into  the buffer pointed by script_name, and return 0.
  * Return -1 on failure.
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 6da24aa039eb..8920e298420a 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -3,6 +3,7 @@
 #include "util/sort.h"
 #include "util/debug.h"
 #include "util/annotate.h"
+#include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/map.h"
 #include "util/dso.h"
@@ -26,7 +27,7 @@ static const char *const col_names[] = {
 };
 
 static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym,
-				 struct disasm_line *dl, int evidx)
+				 struct disasm_line *dl, const struct evsel *evsel)
 {
 	struct annotation *notes;
 	struct sym_hist *symhist;
@@ -42,8 +43,8 @@ static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym,
 		return 0;
 
 	notes = symbol__annotation(sym);
-	symhist = annotation__histogram(notes, evidx);
-	entry = annotated_source__hist_entry(notes->src, evidx, dl->al.offset);
+	symhist = annotation__histogram(notes, evsel);
+	entry = annotated_source__hist_entry(notes->src, evsel, dl->al.offset);
 	if (entry)
 		nr_samples = entry->nr_samples;
 
@@ -139,16 +140,17 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct map_symbol *ms,
 		gtk_list_store_append(store, &iter);
 
 		if (evsel__is_group_event(evsel)) {
-			for (i = 0; i < evsel->core.nr_members; i++) {
+			struct evsel *cur_evsel;
+
+			for_each_group_evsel(cur_evsel, evsel__leader(evsel)) {
 				ret += perf_gtk__get_percent(s + ret,
 							     sizeof(s) - ret,
 							     sym, pos,
-							     evsel->core.idx + i);
+							     cur_evsel);
 				ret += scnprintf(s + ret, sizeof(s) - ret, " ");
 			}
 		} else {
-			ret = perf_gtk__get_percent(s, sizeof(s), sym, pos,
-						    evsel->core.idx);
+			ret = perf_gtk__get_percent(s, sizeof(s), sym, pos, evsel);
 		}
 
 		if (ret)
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index e5491995adf0..34fda1d5eccb 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -121,7 +121,7 @@ int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 	     const char *fmtstr, hpp_snprint_fn print_fn,
 	     enum perf_hpp_fmt_type fmtype)
 {
-	int len = fmt->user_len ?: fmt->len;
+	int len = max(fmt->user_len ?: fmt->len, (int)strlen(fmt->name));
 
 	if (symbol_conf.field_sep) {
 		return __hpp__fmt(hpp, he, get_field, fmtstr, 1,
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index c06d2ee9024c..5ec97e8d6b6d 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -86,7 +86,7 @@ perf-util-y += pmu-bison.o
 perf-util-y += hwmon_pmu.o
 perf-util-y += tool_pmu.o
 perf-util-y += svghelper.o
-perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o
+perf-util-y += trace-event-info.o
 perf-util-y += trace-event-scripting.o
 perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event.o
 perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-parse.o
@@ -121,8 +121,10 @@ perf-util-y += spark.o
 perf-util-y += topdown.o
 perf-util-y += iostat.o
 perf-util-y += stream.o
+perf-util-y += kvm-stat.o
+perf-util-y += lock-contention.o
 perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
-perf-util-$(CONFIG_AUXTRACE) += intel-pt-decoder/
+perf-util-y += intel-pt-decoder/
 perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
 perf-util-$(CONFIG_AUXTRACE) += intel-bts.o
 perf-util-$(CONFIG_AUXTRACE) += arm-spe.o
@@ -168,6 +170,7 @@ perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
 perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter.o
 perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o
 perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o
+perf-util-$(CONFIG_PERF_BPF_SKEL) += btf.o
 
 ifeq ($(CONFIG_LIBTRACEEVENT),y)
   perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 32e15c9f53f3..31bb326b07a6 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -209,7 +209,7 @@ static int __symbol__account_cycles(struct cyc_hist *ch,
 }
 
 static int __symbol__inc_addr_samples(struct map_symbol *ms,
-				      struct annotated_source *src, int evidx, u64 addr,
+				      struct annotated_source *src, struct evsel *evsel, u64 addr,
 				      struct perf_sample *sample)
 {
 	struct symbol *sym = ms->sym;
@@ -228,14 +228,14 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms,
 	}
 
 	offset = addr - sym->start;
-	h = annotated_source__histogram(src, evidx);
+	h = annotated_source__histogram(src, evsel);
 	if (h == NULL) {
 		pr_debug("%s(%d): ENOMEM! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 ", func: %d\n",
 			 __func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC);
 		return -ENOMEM;
 	}
 
-	hash_key = offset << 16 | evidx;
+	hash_key = offset << 16 | evsel->core.idx;
 	if (!hashmap__find(src->samples, hash_key, &entry)) {
 		entry = zalloc(sizeof(*entry));
 		if (entry == NULL)
@@ -252,7 +252,7 @@ static int __symbol__inc_addr_samples(struct map_symbol *ms,
 
 	pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64
 		  ", evidx=%d] => nr_samples: %" PRIu64 ", period: %" PRIu64 "\n",
-		  sym->start, sym->name, addr, addr - sym->start, evidx,
+		  sym->start, sym->name, addr, addr - sym->start, evsel->core.idx,
 		  entry->nr_samples, entry->period);
 	return 0;
 }
@@ -323,7 +323,7 @@ static int symbol__inc_addr_samples(struct map_symbol *ms,
 	if (sym == NULL)
 		return 0;
 	src = symbol__hists(sym, evsel->evlist->core.nr_entries);
-	return src ? __symbol__inc_addr_samples(ms, src, evsel->core.idx, addr, sample) : 0;
+	return src ? __symbol__inc_addr_samples(ms, src, evsel, addr, sample) : 0;
 }
 
 static int symbol__account_br_cntr(struct annotated_branch *branch,
@@ -861,15 +861,14 @@ static void calc_percent(struct annotation *notes,
 			 s64 offset, s64 end)
 {
 	struct hists *hists = evsel__hists(evsel);
-	int evidx = evsel->core.idx;
-	struct sym_hist *sym_hist = annotation__histogram(notes, evidx);
+	struct sym_hist *sym_hist = annotation__histogram(notes, evsel);
 	unsigned int hits = 0;
 	u64 period = 0;
 
 	while (offset < end) {
 		struct sym_hist_entry *entry;
 
-		entry = annotated_source__hist_entry(notes->src, evidx, offset);
+		entry = annotated_source__hist_entry(notes->src, evsel, offset);
 		if (entry) {
 			hits   += entry->nr_samples;
 			period += entry->period;
@@ -1140,15 +1139,14 @@ static void print_summary(struct rb_root *root, const char *filename)
 
 static void symbol__annotate_hits(struct symbol *sym, struct evsel *evsel)
 {
-	int evidx = evsel->core.idx;
 	struct annotation *notes = symbol__annotation(sym);
-	struct sym_hist *h = annotation__histogram(notes, evidx);
+	struct sym_hist *h = annotation__histogram(notes, evsel);
 	u64 len = symbol__size(sym), offset;
 
 	for (offset = 0; offset < len; ++offset) {
 		struct sym_hist_entry *entry;
 
-		entry = annotated_source__hist_entry(notes->src, evidx, offset);
+		entry = annotated_source__hist_entry(notes->src, evsel, offset);
 		if (entry && entry->nr_samples != 0)
 			printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
 			       sym->start + offset, entry->nr_samples);
@@ -1178,7 +1176,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel)
 	const char *d_filename;
 	const char *evsel_name = evsel__name(evsel);
 	struct annotation *notes = symbol__annotation(sym);
-	struct sym_hist *h = annotation__histogram(notes, evsel->core.idx);
+	struct sym_hist *h = annotation__histogram(notes, evsel);
 	struct annotation_line *pos, *queue = NULL;
 	struct annotation_options *opts = &annotate_opts;
 	u64 start = map__rip_2objdump(map, sym->start);
@@ -1364,18 +1362,18 @@ out_free_filename:
 	return err;
 }
 
-void symbol__annotate_zero_histogram(struct symbol *sym, int evidx)
+void symbol__annotate_zero_histogram(struct symbol *sym, struct evsel *evsel)
 {
 	struct annotation *notes = symbol__annotation(sym);
-	struct sym_hist *h = annotation__histogram(notes, evidx);
+	struct sym_hist *h = annotation__histogram(notes, evsel);
 
 	memset(h, 0, sizeof(*notes->src->histograms) * notes->src->nr_histograms);
 }
 
-void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
+void symbol__annotate_decay_histogram(struct symbol *sym, struct evsel *evsel)
 {
 	struct annotation *notes = symbol__annotation(sym);
-	struct sym_hist *h = annotation__histogram(notes, evidx);
+	struct sym_hist *h = annotation__histogram(notes, evsel);
 	struct annotation_line *al;
 
 	h->nr_samples = 0;
@@ -1385,7 +1383,7 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
 		if (al->offset == -1)
 			continue;
 
-		entry = annotated_source__hist_entry(notes->src, evidx, al->offset);
+		entry = annotated_source__hist_entry(notes->src, evsel, al->offset);
 		if (entry == NULL)
 			continue;
 
@@ -2102,6 +2100,57 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
 	return 0;
 }
 
+const char * const perf_disassembler__strs[] = {
+	[PERF_DISASM_UNKNOWN]  = "unknown",
+	[PERF_DISASM_LLVM]     = "llvm",
+	[PERF_DISASM_CAPSTONE] = "capstone",
+	[PERF_DISASM_OBJDUMP]  = "objdump",
+};
+
+
+static void annotation_options__add_disassembler(struct annotation_options *options,
+						 enum perf_disassembler dis)
+{
+	for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers); i++) {
+		if (options->disassemblers[i] == dis) {
+			/* Disassembler is already present then don't add again. */
+			return;
+		}
+		if (options->disassemblers[i] == PERF_DISASM_UNKNOWN) {
+			/* Found a free slot. */
+			options->disassemblers[i] = dis;
+			return;
+		}
+	}
+	pr_err("Failed to add disassembler %d\n", dis);
+}
+
+static int annotation_options__add_disassemblers_str(struct annotation_options *options,
+						const char *str)
+{
+	while (str && *str != '\0') {
+		const char *comma = strchr(str, ',');
+		int len = comma ? comma - str : (int)strlen(str);
+		bool match = false;
+
+		for (u8 i = 0; i < ARRAY_SIZE(perf_disassembler__strs); i++) {
+			const char *dis_str = perf_disassembler__strs[i];
+
+			if (len == (int)strlen(dis_str) && !strncmp(str, dis_str, len)) {
+				annotation_options__add_disassembler(options, i);
+				match = true;
+				break;
+			}
+		}
+		if (!match) {
+			pr_err("Invalid disassembler '%.*s'\n", len, str);
+			return -1;
+		}
+		str = comma ? comma + 1 : NULL;
+	}
+	return 0;
+}
+
 static int annotation__config(const char *var, const char *value, void *data)
 {
 	struct annotation_options *opt = data;
@@ -2117,11 +2166,10 @@ static int annotation__config(const char *var, const char *value, void *data)
 		else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL)
 			opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
 	} else if (!strcmp(var, "annotate.disassemblers")) {
-		opt->disassemblers_str = strdup(value);
-		if (!opt->disassemblers_str) {
-			pr_err("Not enough memory for annotate.disassemblers\n");
-			return -1;
-		}
+		int err = annotation_options__add_disassemblers_str(opt, value);
+
+		if (err)
+			return err;
 	} else if (!strcmp(var, "annotate.hide_src_code")) {
 		opt->hide_src_code = perf_config_bool("hide_src_code", value);
 	} else if (!strcmp(var, "annotate.jump_arrows")) {
@@ -2187,9 +2235,25 @@ void annotation_options__exit(void)
 	zfree(&annotate_opts.objdump_path);
 }
 
+static void annotation_options__default_init_disassemblers(struct annotation_options *options)
+{
+	if (options->disassemblers[0] != PERF_DISASM_UNKNOWN) {
+		/* Already initialized. */
+		return;
+	}
+#ifdef HAVE_LIBLLVM_SUPPORT
+	annotation_options__add_disassembler(options, PERF_DISASM_LLVM);
+#endif
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+	annotation_options__add_disassembler(options, PERF_DISASM_CAPSTONE);
+#endif
+	annotation_options__add_disassembler(options, PERF_DISASM_OBJDUMP);
+}
+
 void annotation_config__init(void)
 {
 	perf_config(annotation__config, &annotate_opts);
+	annotation_options__default_init_disassemblers(&annotate_opts);
 }
 
 static unsigned int parse_percent_type(char *str1, char *str2)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 194a05cbc506..98db1b88daf4 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -15,6 +15,7 @@
 #include "hashmap.h"
 #include "disasm.h"
 #include "branch.h"
+#include "evsel.h"
 
 struct hist_browser_timer;
 struct hist_entry;
@@ -23,7 +24,6 @@ struct map_symbol;
 struct addr_map_symbol;
 struct option;
 struct perf_sample;
-struct evsel;
 struct symbol;
 struct annotated_data_type;
 
@@ -34,8 +34,13 @@ struct annotated_data_type;
 #define ANNOTATION__BR_CNTR_WIDTH 30
 #define ANNOTATION_DUMMY_LEN	256
 
-// llvm, capstone, objdump
-#define MAX_DISASSEMBLERS 3
+enum perf_disassembler {
+	PERF_DISASM_UNKNOWN = 0,
+	PERF_DISASM_LLVM,
+	PERF_DISASM_CAPSTONE,
+	PERF_DISASM_OBJDUMP,
+};
+#define MAX_DISASSEMBLERS (PERF_DISASM_OBJDUMP + 1)
 
 struct annotation_options {
 	bool hide_src_code,
@@ -52,14 +57,12 @@ struct annotation_options {
 	     annotate_src,
 	     full_addr;
 	u8   offset_level;
-	u8   nr_disassemblers;
+	u8   disassemblers[MAX_DISASSEMBLERS];
 	int  min_pcnt;
 	int  max_lines;
 	int  context;
 	char *objdump_path;
 	char *disassembler_style;
-	const char *disassemblers_str;
-	const char *disassemblers[MAX_DISASSEMBLERS];
 	const char *prefix;
 	const char *prefix_strip;
 	unsigned int percent_type;
@@ -134,6 +137,8 @@ struct disasm_line {
 	struct annotation_line	 al;
 };
 
+extern const char * const perf_disassembler__strs[];
+
 void annotation_line__add(struct annotation_line *al, struct list_head *head);
 
 static inline double annotation_data__percent(struct annotation_data *data,
@@ -373,21 +378,23 @@ static inline u8 annotation__br_cntr_width(void)
 void annotation__update_column_widths(struct annotation *notes);
 void annotation__toggle_full_addr(struct annotation *notes, struct map_symbol *ms);
 
-static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx)
+static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src,
+							   const struct evsel *evsel)
 {
-	return &src->histograms[idx];
+	return &src->histograms[evsel->core.idx];
 }
 
-static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
+static inline struct sym_hist *annotation__histogram(struct annotation *notes,
+						     const struct evsel *evsel)
 {
-	return annotated_source__histogram(notes->src, idx);
+	return annotated_source__histogram(notes->src, evsel);
 }
 
 static inline struct sym_hist_entry *
-annotated_source__hist_entry(struct annotated_source *src, int idx, u64 offset)
+annotated_source__hist_entry(struct annotated_source *src, const struct evsel *evsel, u64 offset)
 {
 	struct sym_hist_entry *entry;
-	long key = offset << 16 | idx;
+	long key = offset << 16 | evsel->core.idx;
 
 	if (!hashmap__find(src->samples, key, &entry))
 		return NULL;
@@ -441,6 +448,7 @@ enum symbol_disassemble_errno {
 	SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP,
 	SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE,
 	SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF,
+	SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE,
 
 	__SYMBOL_ANNOTATE_ERRNO__END,
 };
@@ -448,8 +456,8 @@ enum symbol_disassemble_errno {
 int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, size_t buflen);
 
 int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel);
-void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
-void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
+void symbol__annotate_zero_histogram(struct symbol *sym, struct evsel *evsel);
+void symbol__annotate_decay_histogram(struct symbol *sym, struct evsel *evsel);
 void annotated_source__purge(struct annotated_source *as);
 
 int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel);
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 358c611eeddb..4bcd627e859f 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -67,6 +67,15 @@ enum arm_spe_common_data_source {
 	ARM_SPE_COMMON_DS_DRAM		= 0xe,
 };
 
+enum arm_spe_ampereone_data_source {
+	ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE    = 0x0,
+	ARM_SPE_AMPEREONE_SLC                           = 0x3,
+	ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE             = 0x5,
+	ARM_SPE_AMPEREONE_DDR                           = 0x7,
+	ARM_SPE_AMPEREONE_L1D                           = 0x8,
+	ARM_SPE_AMPEREONE_L2D                           = 0x9,
+};
+
 struct arm_spe_record {
 	enum arm_spe_sample_type type;
 	int err;
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index dbf13f47879c..12761c39788f 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -103,6 +103,18 @@ struct arm_spe_queue {
 	u32				flags;
 };
 
+struct data_source_handle {
+	const struct midr_range *midr_ranges;
+	void (*ds_synth)(const struct arm_spe_record *record,
+			 union perf_mem_data_src *data_src);
+};
+
+#define DS(range, func)					\
+	{						\
+		.midr_ranges = range,			\
+		.ds_synth = arm_spe__synth_##func,	\
+	}
+
 static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
 			 unsigned char *buf, size_t len)
 {
@@ -443,6 +455,11 @@ static const struct midr_range common_ds_encoding_cpus[] = {
 	{},
 };
 
+static const struct midr_range ampereone_ds_encoding_cpus[] = {
+	MIDR_ALL_VERSIONS(MIDR_AMPERE1A),
+	{},
+};
+
 static void arm_spe__sample_flags(struct arm_spe_queue *speq)
 {
 	const struct arm_spe_record *record = &speq->decoder->record;
@@ -532,6 +549,49 @@ static void arm_spe__synth_data_source_common(const struct arm_spe_record *recor
 	}
 }
 
+/*
+ * Source is IMPDEF. Here we convert the source code used on AmpereOne cores
+ * to the common (Neoverse, Cortex) to avoid duplicating the decoding code.
+ */
+static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *record,
+						 union perf_mem_data_src *data_src)
+{
+	struct arm_spe_record common_record;
+
+	switch (record->source) {
+	case ARM_SPE_AMPEREONE_LOCAL_CHIP_CACHE_OR_DEVICE:
+		common_record.source = ARM_SPE_COMMON_DS_PEER_CORE;
+		break;
+	case ARM_SPE_AMPEREONE_SLC:
+		common_record.source = ARM_SPE_COMMON_DS_SYS_CACHE;
+		break;
+	case ARM_SPE_AMPEREONE_REMOTE_CHIP_CACHE:
+		common_record.source = ARM_SPE_COMMON_DS_REMOTE;
+		break;
+	case ARM_SPE_AMPEREONE_DDR:
+		common_record.source = ARM_SPE_COMMON_DS_DRAM;
+		break;
+	case ARM_SPE_AMPEREONE_L1D:
+		common_record.source = ARM_SPE_COMMON_DS_L1D;
+		break;
+	case ARM_SPE_AMPEREONE_L2D:
+		common_record.source = ARM_SPE_COMMON_DS_L2;
+		break;
+	default:
+		pr_warning_once("AmpereOne: Unknown data source (0x%x)\n",
+				record->source);
+		return;
+	}
+
+	common_record.op = record->op;
+	arm_spe__synth_data_source_common(&common_record, data_src);
+}
+
+static const struct data_source_handle data_source_handles[] = {
+	DS(common_ds_encoding_cpus, data_source_common),
+	DS(ampereone_ds_encoding_cpus, data_source_ampereone),
+};
+
 static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
 					union perf_mem_data_src *data_src)
 {
@@ -555,12 +615,14 @@ static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
 		data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
 }
 
-static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq)
+static bool arm_spe__synth_ds(struct arm_spe_queue *speq,
+			      const struct arm_spe_record *record,
+			      union perf_mem_data_src *data_src)
 {
 	struct arm_spe *spe = speq->spe;
-	bool is_in_cpu_list;
 	u64 *metadata = NULL;
-	u64 midr = 0;
+	u64 midr;
+	unsigned int i;
 
 	/* Metadata version 1 assumes all CPUs are the same (old behavior) */
 	if (spe->metadata_ver == 1) {
@@ -592,18 +654,20 @@ static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq)
 		midr = metadata[ARM_SPE_CPU_MIDR];
 	}
 
-	is_in_cpu_list = is_midr_in_range_list(midr, common_ds_encoding_cpus);
-	if (is_in_cpu_list)
-		return true;
-	else
-		return false;
+	for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) {
+		if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) {
+			data_source_handles[i].ds_synth(record, data_src);
+			return true;
+		}
+	}
+
+	return false;
 }
 
 static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
 				      const struct arm_spe_record *record)
 {
 	union perf_mem_data_src	data_src = { .mem_op = PERF_MEM_OP_NA };
-	bool is_common = arm_spe__is_common_ds_encoding(speq);
 
 	if (record->op & ARM_SPE_OP_LD)
 		data_src.mem_op = PERF_MEM_OP_LOAD;
@@ -612,9 +676,7 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
 	else
 		return 0;
 
-	if (is_common)
-		arm_spe__synth_data_source_common(record, &data_src);
-	else
+	if (!arm_spe__synth_ds(speq, record, &data_src))
 		arm_spe__synth_memory_level(record, &data_src);
 
 	if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index ca8682966fae..4d1633d87eff 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -810,19 +810,76 @@ no_opt:
 	return auxtrace_validate_aux_sample_size(evlist, opts);
 }
 
-void auxtrace_regroup_aux_output(struct evlist *evlist)
+static struct aux_action_opt {
+	const char *str;
+	u32 aux_action;
+	bool aux_event_opt;
+} aux_action_opts[] = {
+	{"start-paused", BIT(0), true},
+	{"pause",        BIT(1), false},
+	{"resume",       BIT(2), false},
+	{.str = NULL},
+};
+
+static const struct aux_action_opt *auxtrace_parse_aux_action_str(const char *str)
+{
+	const struct aux_action_opt *opt;
+
+	if (!str)
+		return NULL;
+
+	for (opt = aux_action_opts; opt->str; opt++)
+		if (!strcmp(str, opt->str))
+			return opt;
+
+	return NULL;
+}
+
+int auxtrace_parse_aux_action(struct evlist *evlist)
 {
-	struct evsel *evsel, *aux_evsel = NULL;
 	struct evsel_config_term *term;
+	struct evsel *aux_evsel = NULL;
+	struct evsel *evsel;
 
 	evlist__for_each_entry(evlist, evsel) {
-		if (evsel__is_aux_event(evsel))
+		bool is_aux_event = evsel__is_aux_event(evsel);
+		const struct aux_action_opt *opt;
+
+		if (is_aux_event)
 			aux_evsel = evsel;
-		term = evsel__get_config_term(evsel, AUX_OUTPUT);
+		term = evsel__get_config_term(evsel, AUX_ACTION);
+		if (!term) {
+			if (evsel__get_config_term(evsel, AUX_OUTPUT))
+				goto regroup;
+			continue;
+		}
+		opt = auxtrace_parse_aux_action_str(term->val.str);
+		if (!opt) {
+			pr_err("Bad aux-action '%s'\n", term->val.str);
+			return -EINVAL;
+		}
+		if (opt->aux_event_opt && !is_aux_event) {
+			pr_err("aux-action '%s' can only be used with AUX area event\n",
+			       term->val.str);
+			return -EINVAL;
+		}
+		if (!opt->aux_event_opt && is_aux_event) {
+			pr_err("aux-action '%s' cannot be used for AUX area event itself\n",
+			       term->val.str);
+			return -EINVAL;
+		}
+		evsel->core.attr.aux_action = opt->aux_action;
+regroup:
 		/* If possible, group with the AUX event */
-		if (term && aux_evsel)
+		if (aux_evsel)
 			evlist__regroup(evlist, aux_evsel, evsel);
+		if (!evsel__is_aux_event(evsel__leader(evsel))) {
+			pr_err("Events with aux-action must have AUX area event group leader\n");
+			return -EINVAL;
+		}
 	}
+
+	return 0;
 }
 
 struct auxtrace_record *__weak
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index dddaf4f3ffed..b0db84d27b25 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -578,7 +578,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
 int auxtrace_parse_sample_options(struct auxtrace_record *itr,
 				  struct evlist *evlist,
 				  struct record_opts *opts, const char *str);
-void auxtrace_regroup_aux_output(struct evlist *evlist);
+int auxtrace_parse_aux_action(struct evlist *evlist);
 int auxtrace_record__options(struct auxtrace_record *itr,
 			     struct evlist *evlist,
 			     struct record_opts *opts);
@@ -799,8 +799,10 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr __maybe_unused,
 }
 
 static inline
-void auxtrace_regroup_aux_output(struct evlist *evlist __maybe_unused)
+int auxtrace_parse_aux_action(struct evlist *evlist __maybe_unused)
 {
+	pr_err("AUX area tracing not supported\n");
+	return -EINVAL;
 }
 
 static inline
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 13608237c50e..c81444059ad0 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -289,7 +289,10 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
 		}
 
 		info_node->info_linear = info_linear;
-		perf_env__insert_bpf_prog_info(env, info_node);
+		if (!perf_env__insert_bpf_prog_info(env, info_node)) {
+			free(info_linear);
+			free(info_node);
+		}
 		info_linear = NULL;
 
 		/*
@@ -480,7 +483,10 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
 	info_node = malloc(sizeof(struct bpf_prog_info_node));
 	if (info_node) {
 		info_node->info_linear = info_linear;
-		perf_env__insert_bpf_prog_info(env, info_node);
+		if (!perf_env__insert_bpf_prog_info(env, info_node)) {
+			free(info_linear);
+			free(info_node);
+		}
 	} else
 		free(info_linear);
 
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c
index 06d1c4018407..25fc280e414a 100644
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -11,6 +11,7 @@
 #include "util/debug.h"
 #include "util/evlist.h"
 #include "util/bpf_counter.h"
+#include "util/stat.h"
 
 #include "util/bpf_skel/func_latency.skel.h"
 
@@ -36,6 +37,9 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
 		return -1;
 	}
 
+	skel->rodata->bucket_range = ftrace->bucket_range;
+	skel->rodata->min_latency = ftrace->min_latency;
+
 	/* don't need to set cpu filter for system-wide mode */
 	if (ftrace->target.cpu_list) {
 		ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
@@ -83,6 +87,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
 		}
 	}
 
+	skel->bss->min = INT64_MAX;
+
 	skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
 							    false, func->name);
 	if (IS_ERR(skel->links.func_begin)) {
@@ -119,7 +125,7 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
 }
 
 int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
-				  int buckets[])
+				  int buckets[], struct stats *stats)
 {
 	int i, fd, err;
 	u32 idx;
@@ -143,6 +149,13 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
 			buckets[idx] += hist[i];
 	}
 
+	if (skel->bss->count) {
+		stats->mean = skel->bss->total / skel->bss->count;
+		stats->n = skel->bss->count;
+		stats->max = skel->bss->max;
+		stats->min = skel->bss->min;
+	}
+
 	free(hist);
 	return 0;
 }
diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c
index 6c7126b7670d..5cff755c71fa 100644
--- a/tools/perf/util/bpf_kwork.c
+++ b/tools/perf/util/bpf_kwork.c
@@ -285,7 +285,7 @@ static int add_work(struct perf_kwork *kwork,
 	    (bpf_trace->get_work_name(key, &tmp.name)))
 		return -1;
 
-	work = perf_kwork_add_work(kwork, tmp.class, &tmp);
+	work = kwork->add_work(kwork, tmp.class, &tmp);
 	if (work == NULL)
 		return -1;
 
diff --git a/tools/perf/util/bpf_kwork_top.c b/tools/perf/util/bpf_kwork_top.c
index 7261cad43468..b6f187dd9136 100644
--- a/tools/perf/util/bpf_kwork_top.c
+++ b/tools/perf/util/bpf_kwork_top.c
@@ -255,7 +255,7 @@ static int add_work(struct perf_kwork *kwork, struct work_key *key,
 	bpf_trace = kwork_class_bpf_supported_list[type];
 	tmp.class = bpf_trace->class;
 
-	work = perf_kwork_add_work(kwork, tmp.class, &tmp);
+	work = kwork->add_work(kwork, tmp.class, &tmp);
 	if (!work)
 		return -1;
 
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 41a1ad087895..fc8666222399 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -2,6 +2,7 @@
 #include "util/cgroup.h"
 #include "util/debug.h"
 #include "util/evlist.h"
+#include "util/hashmap.h"
 #include "util/machine.h"
 #include "util/map.h"
 #include "util/symbol.h"
@@ -12,17 +13,106 @@
 #include <linux/zalloc.h>
 #include <linux/string.h>
 #include <bpf/bpf.h>
+#include <bpf/btf.h>
 #include <inttypes.h>
 
 #include "bpf_skel/lock_contention.skel.h"
 #include "bpf_skel/lock_data.h"
 
 static struct lock_contention_bpf *skel;
+static bool has_slab_iter;
+static struct hashmap slab_hash;
+
+static size_t slab_cache_hash(long key, void *ctx __maybe_unused)
+{
+	return key;
+}
+
+static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+	return key1 == key2;
+}
+
+static void check_slab_cache_iter(struct lock_contention *con)
+{
+	struct btf *btf = btf__load_vmlinux_btf();
+	s32 ret;
+
+	hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL);
+
+	if (btf == NULL) {
+		pr_debug("BTF loading failed: %s\n", strerror(errno));
+		return;
+	}
+
+	ret = btf__find_by_name_kind(btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT);
+	if (ret < 0) {
+		bpf_program__set_autoload(skel->progs.slab_cache_iter, false);
+		pr_debug("slab cache iterator is not available: %d\n", ret);
+		goto out;
+	}
+
+	has_slab_iter = true;
+
+	bpf_map__set_max_entries(skel->maps.slab_caches, con->map_nr_entries);
+out:
+	btf__free(btf);
+}
+
+static void run_slab_cache_iter(void)
+{
+	int fd;
+	char buf[256];
+	long key, *prev_key;
+
+	if (!has_slab_iter)
+		return;
+
+	fd = bpf_iter_create(bpf_link__fd(skel->links.slab_cache_iter));
+	if (fd < 0) {
+		pr_debug("cannot create slab cache iter: %d\n", fd);
+		return;
+	}
+
+	/* This will run the bpf program */
+	while (read(fd, buf, sizeof(buf)) > 0)
+		continue;
+
+	close(fd);
+
+	/* Read the slab cache map and build a hash with IDs */
+	fd = bpf_map__fd(skel->maps.slab_caches);
+	prev_key = NULL;
+	while (!bpf_map_get_next_key(fd, prev_key, &key)) {
+		struct slab_cache_data *data;
+
+		data = malloc(sizeof(*data));
+		if (data == NULL)
+			break;
+
+		if (bpf_map_lookup_elem(fd, &key, data) < 0)
+			break;
+
+		hashmap__add(&slab_hash, data->id, data);
+		prev_key = &key;
+	}
+}
+
+static void exit_slab_cache_iter(void)
+{
+	struct hashmap_entry *cur;
+	unsigned bkt;
+
+	hashmap__for_each_entry(&slab_hash, cur, bkt)
+		free(cur->pvalue);
+
+	hashmap__clear(&slab_hash);
+}
 
 int lock_contention_prepare(struct lock_contention *con)
 {
 	int i, fd;
-	int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1, ncgrps = 1;
+	int ncpus = 1, ntasks = 1, ntypes = 1, naddrs = 1, ncgrps = 1, nslabs = 1;
 	struct evlist *evlist = con->evlist;
 	struct target *target = con->target;
 
@@ -109,6 +199,15 @@ int lock_contention_prepare(struct lock_contention *con)
 			skel->rodata->use_cgroup_v2 = 1;
 	}
 
+	check_slab_cache_iter(con);
+
+	if (con->filters->nr_slabs && has_slab_iter) {
+		skel->rodata->has_slab = 1;
+		nslabs = con->filters->nr_slabs;
+	}
+
+	bpf_map__set_max_entries(skel->maps.slab_filter, nslabs);
+
 	if (lock_contention_bpf__load(skel) < 0) {
 		pr_err("Failed to load lock-contention BPF skeleton\n");
 		return -1;
@@ -179,6 +278,36 @@ int lock_contention_prepare(struct lock_contention *con)
 	bpf_program__set_autoload(skel->progs.collect_lock_syms, false);
 
 	lock_contention_bpf__attach(skel);
+
+	/* run the slab iterator after attaching */
+	run_slab_cache_iter();
+
+	if (con->filters->nr_slabs) {
+		u8 val = 1;
+		int cache_fd;
+		long key, *prev_key;
+
+		fd = bpf_map__fd(skel->maps.slab_filter);
+
+		/* Read the slab cache map and build a hash with its address */
+		cache_fd = bpf_map__fd(skel->maps.slab_caches);
+		prev_key = NULL;
+		while (!bpf_map_get_next_key(cache_fd, prev_key, &key)) {
+			struct slab_cache_data data;
+
+			if (bpf_map_lookup_elem(cache_fd, &key, &data) < 0)
+				break;
+
+			for (i = 0; i < con->filters->nr_slabs; i++) {
+				if (!strcmp(con->filters->slabs[i], data.name)) {
+					bpf_map_update_elem(fd, &key, &val, BPF_ANY);
+					break;
+				}
+			}
+			prev_key = &key;
+		}
+	}
+
 	return 0;
 }
 
@@ -347,6 +476,7 @@ static const char *lock_contention_get_name(struct lock_contention *con,
 
 	if (con->aggr_mode == LOCK_AGGR_ADDR) {
 		int lock_fd = bpf_map__fd(skel->maps.lock_syms);
+		struct slab_cache_data *slab_data;
 
 		/* per-process locks set upper bits of the flags */
 		if (flags & LCD_F_MMAP_LOCK)
@@ -365,6 +495,12 @@ static const char *lock_contention_get_name(struct lock_contention *con,
 				return "rq_lock";
 		}
 
+		/* look slab_hash for dynamic locks in a slab object */
+		if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) {
+			snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name);
+			return name_buf;
+		}
+
 		return "";
 	}
 
@@ -458,7 +594,7 @@ int lock_contention_read(struct lock_contention *con)
 		if (con->save_callstack) {
 			bpf_map_lookup_elem(stack, &key.stack_id, stack_trace);
 
-			if (!match_callstack_filter(machine, stack_trace)) {
+			if (!match_callstack_filter(machine, stack_trace, con->max_stack)) {
 				con->nr_filtered += data.count;
 				goto next;
 			}
@@ -539,5 +675,7 @@ int lock_contention_finish(struct lock_contention *con)
 		cgroup__put(cgrp);
 	}
 
+	exit_slab_cache_iter();
+
 	return 0;
 }
diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index a590a8ac1f9d..4269b41d1771 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -100,6 +100,11 @@ static void check_sched_switch_args(void)
 	const struct btf_type *t1, *t2, *t3;
 	u32 type_id;
 
+	if (!btf) {
+		pr_debug("Missing btf, check if CONFIG_DEBUG_INFO_BTF is enabled\n");
+		goto cleanup;
+	}
+
 	type_id = btf__find_by_name_kind(btf, "btf_trace_sched_switch",
 					 BTF_KIND_TYPEDEF);
 	if ((s32)type_id < 0)
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index 4a62ed593e84..e4352881e3fa 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -431,9 +431,9 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
 static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
 {
 	bool augmented, do_output = false;
-	int zero = 0, size, aug_size, index,
-	    value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
+	int zero = 0, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
 	u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */
+	s64 aug_size, size;
 	unsigned int nr, *beauty_map;
 	struct beauty_payload_enter *payload;
 	void *arg, *payload_offset;
@@ -484,14 +484,11 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
 		} else if (size > 0 && size <= value_size) { /* struct */
 			if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg))
 				augmented = true;
-		} else if (size < 0 && size >= -6) { /* buffer */
+		} else if ((int)size < 0 && size >= -6) { /* buffer */
 			index = -(size + 1);
 			barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick.
 			index &= 7;	    // Satisfy the bounds checking with the verifier in some kernels.
-			aug_size = args->args[index];
-
-			if (aug_size > TRACE_AUG_MAX_BUF)
-				aug_size = TRACE_AUG_MAX_BUF;
+			aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index];
 
 			if (aug_size > 0) {
 				if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg))
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c
index f613dc9cb123..fb144811b34f 100644
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -38,9 +38,18 @@ struct {
 
 int enabled = 0;
 
+// stats
+__s64 total;
+__s64 count;
+__s64 max;
+__s64 min;
+
 const volatile int has_cpu = 0;
 const volatile int has_task = 0;
 const volatile int use_nsec = 0;
+const volatile unsigned int bucket_range;
+const volatile unsigned int min_latency;
+const volatile unsigned int max_latency;
 
 SEC("kprobe/func")
 int BPF_PROG(func_begin)
@@ -92,7 +101,7 @@ int BPF_PROG(func_end)
 	start = bpf_map_lookup_elem(&functime, &tid);
 	if (start) {
 		__s64 delta = bpf_ktime_get_ns() - *start;
-		__u32 key;
+		__u32 key = 0;
 		__u64 *hist;
 
 		bpf_map_delete_elem(&functime, &tid);
@@ -100,17 +109,52 @@ int BPF_PROG(func_end)
 		if (delta < 0)
 			return 0;
 
+		if (bucket_range != 0) {
+			delta /= cmp_base;
+
+			if (min_latency > 0) {
+				if (delta > min_latency)
+					delta -= min_latency;
+				else
+					goto do_lookup;
+			}
+
+			// Less than 1 unit (ms or ns), or, in the future,
+			// than the min latency desired.
+			if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
+				// clang 12 doesn't like s64 / u32 division
+				key = (__u64)delta / bucket_range + 1;
+				if (key >= NUM_BUCKET ||
+					delta >= max_latency - min_latency)
+					key = NUM_BUCKET - 1;
+			}
+
+			delta += min_latency;
+			goto do_lookup;
+		}
 		// calculate index using delta
 		for (key = 0; key < (NUM_BUCKET - 1); key++) {
 			if (delta < (cmp_base << key))
 				break;
 		}
 
+do_lookup:
 		hist = bpf_map_lookup_elem(&latency, &key);
 		if (!hist)
 			return 0;
 
 		*hist += 1;
+
+		if (bucket_range == 0)
+			delta /= cmp_base;
+
+		__sync_fetch_and_add(&total, delta);
+		__sync_fetch_and_add(&count, 1);
+
+		if (delta > max)
+			max = delta;
+		if (delta < min)
+			min = delta;
 	}
 
 	return 0;
diff --git a/tools/perf/util/bpf_skel/kwork_top.bpf.c b/tools/perf/util/bpf_skel/kwork_top.bpf.c
index 594da91965a2..73e32e063030 100644
--- a/tools/perf/util/bpf_skel/kwork_top.bpf.c
+++ b/tools/perf/util/bpf_skel/kwork_top.bpf.c
@@ -18,7 +18,9 @@ enum kwork_class_type {
 };
 
 #define MAX_ENTRIES     102400
-#define MAX_NR_CPUS     2048
+#ifndef MAX_NR_CPUS
+#define MAX_NR_CPUS     4096
+#endif
 #define PF_KTHREAD      0x00200000
 #define MAX_COMMAND_LEN 16
 
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 1069bda5d733..6533ea9b044c 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -100,6 +100,20 @@ struct {
 	__uint(max_entries, 1);
 } cgroup_filter SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(long));
+	__uint(value_size, sizeof(__u8));
+	__uint(max_entries, 1);
+} slab_filter SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(long));
+	__uint(value_size, sizeof(struct slab_cache_data));
+	__uint(max_entries, 1);
+} slab_caches SEC(".maps");
+
 struct rw_semaphore___old {
 	struct task_struct *owner;
 } __attribute__((preserve_access_index));
@@ -116,12 +130,15 @@ struct mm_struct___new {
 	struct rw_semaphore mmap_lock;
 } __attribute__((preserve_access_index));
 
+extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym __weak;
+
 /* control flags */
 const volatile int has_cpu;
 const volatile int has_task;
 const volatile int has_type;
 const volatile int has_addr;
 const volatile int has_cgroup;
+const volatile int has_slab;
 const volatile int needs_callstack;
 const volatile int stack_skip;
 const volatile int lock_owner;
@@ -136,6 +153,8 @@ int perf_subsys_id = -1;
 
 __u64 end_ts;
 
+__u32 slab_cache_id;
+
 /* error stat */
 int task_fail;
 int stack_fail;
@@ -202,7 +221,7 @@ static inline int can_record(u64 *ctx)
 		__u64 addr = ctx[0];
 
 		ok = bpf_map_lookup_elem(&addr_filter, &addr);
-		if (!ok)
+		if (!ok && !has_slab)
 			return 0;
 	}
 
@@ -215,6 +234,17 @@ static inline int can_record(u64 *ctx)
 			return 0;
 	}
 
+	if (has_slab && bpf_get_kmem_cache) {
+		__u8 *ok;
+		__u64 addr = ctx[0];
+		long kmem_cache_addr;
+
+		kmem_cache_addr = (long)bpf_get_kmem_cache(addr);
+		ok = bpf_map_lookup_elem(&slab_filter, &kmem_cache_addr);
+		if (!ok)
+			return 0;
+	}
+
 	return 1;
 }
 
@@ -487,8 +517,28 @@ int contention_end(u64 *ctx)
 		};
 		int err;
 
-		if (aggr_mode == LOCK_AGGR_ADDR)
-			first.flags |= check_lock_type(pelem->lock, pelem->flags);
+		if (aggr_mode == LOCK_AGGR_ADDR) {
+			first.flags |= check_lock_type(pelem->lock,
+						       pelem->flags & LCB_F_TYPE_MASK);
+
+			/* Check if it's from a slab object */
+			if (bpf_get_kmem_cache) {
+				struct kmem_cache *s;
+				struct slab_cache_data *d;
+
+				s = bpf_get_kmem_cache(pelem->lock);
+				if (s != NULL) {
+					/*
+					 * Save the ID of the slab cache in the flags
+					 * (instead of full address) to reduce the
+					 * space in the contention_data.
+					 */
+					d = bpf_map_lookup_elem(&slab_caches, &s);
+					if (d != NULL)
+						first.flags |= d->id;
+				}
+			}
+		}
 
 		err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
 		if (err < 0) {
@@ -563,4 +613,43 @@ int BPF_PROG(end_timestamp)
 	return 0;
 }
 
+/*
+ * bpf_iter__kmem_cache added recently so old kernels don't have it in the
+ * vmlinux.h.  But we cannot add it here since it will cause a compiler error
+ * due to redefinition of the struct on later kernels.
+ *
+ * So it uses a CO-RE trick to access the member only if it has the type.
+ * This will support both old and new kernels without compiler errors.
+ */
+struct bpf_iter__kmem_cache___new {
+	struct kmem_cache *s;
+} __attribute__((preserve_access_index));
+
+SEC("iter/kmem_cache")
+int slab_cache_iter(void *ctx)
+{
+	struct kmem_cache *s = NULL;
+	struct slab_cache_data d;
+	const char *nameptr;
+
+	if (bpf_core_type_exists(struct bpf_iter__kmem_cache)) {
+		struct bpf_iter__kmem_cache___new *iter = ctx;
+
+		s = iter->s;
+	}
+
+	if (s == NULL)
+		return 0;
+
+	nameptr = s->name;
+	bpf_probe_read_kernel_str(d.name, sizeof(d.name), nameptr);
+
+	d.id = ++slab_cache_id << LCB_F_SLAB_ID_SHIFT;
+	if (d.id >= LCB_F_SLAB_ID_END)
+		return 0;
+
+	bpf_map_update_elem(&slab_caches, &s, &d, BPF_NOEXIST);
+	return 0;
+}
+
 char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
index de12892f992f..c15f734d7fc4 100644
--- a/tools/perf/util/bpf_skel/lock_data.h
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -32,7 +32,15 @@ struct contention_task_data {
 #define LCD_F_MMAP_LOCK		(1U << 31)
 #define LCD_F_SIGHAND_LOCK	(1U << 30)
 
-#define LCB_F_MAX_FLAGS		(1U << 7)
+#define LCB_F_SLAB_ID_SHIFT	16
+#define LCB_F_SLAB_ID_START	(1U << 16)
+#define LCB_F_SLAB_ID_END	(1U << 26)
+#define LCB_F_SLAB_ID_MASK	0x03FF0000U
+
+#define LCB_F_TYPE_MAX		(1U << 7)
+#define LCB_F_TYPE_MASK		0x0000007FU
+
+#define SLAB_NAME_MAX  28
 
 struct contention_data {
 	u64 total_time;
@@ -54,4 +62,9 @@ enum lock_class_sym {
 	LOCK_CLASS_RQLOCK,
 };
 
+struct slab_cache_data {
+	u32 id;
+	char name[SLAB_NAME_MAX];
+};
+
 #endif /* UTIL_BPF_SKEL_LOCK_DATA_H */
diff --git a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
index 4dcad7b682bd..7b81d3173917 100644
--- a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
+++ b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h
@@ -195,4 +195,12 @@ struct bpf_perf_event_data_kern {
  */
 struct rq {};
 
+struct kmem_cache {
+	const char *name;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__kmem_cache {
+	struct kmem_cache *s;
+} __attribute__((preserve_access_index));
+
 #endif // __VMLINUX_H
diff --git a/tools/perf/util/btf.c b/tools/perf/util/btf.c
new file mode 100644
index 000000000000..bb163fe87767
--- /dev/null
+++ b/tools/perf/util/btf.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Copyright (C) 2024, Red Hat, Inc
+ */
+
+#include <bpf/btf.h>
+#include <util/btf.h>
+#include <string.h>
+
+const struct btf_member *__btf_type__find_member_by_name(struct btf *btf,
+							 int type_id, const char *member_name)
+{
+	const struct btf_type *t = btf__type_by_id(btf, type_id);
+	const struct btf_member *m;
+	int i;
+
+	for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
+		const char *current_member_name = btf__name_by_offset(btf, m->name_off);
+
+		if (!strcmp(current_member_name, member_name))
+			return m;
+	}
+
+	return NULL;
+}
diff --git a/tools/perf/util/btf.h b/tools/perf/util/btf.h
new file mode 100644
index 000000000000..05e6e5bf23d6
--- /dev/null
+++ b/tools/perf/util/btf.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_UTIL_BTF
+#define __PERF_UTIL_BTF 1
+
+struct btf;
+struct btf_member;
+
+const struct btf_member *__btf_type__find_member_by_name(struct btf *btf,
+							 int type_id, const char *member_name);
+#endif // __PERF_UTIL_BTF
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 0f759dd96db7..fbcc0626f9ce 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -473,7 +473,7 @@ int evlist__expand_cgroup(struct evlist *evlist, const char *str,
 
 		leader = NULL;
 		evlist__for_each_entry(orig_list, pos) {
-			evsel = evsel__clone(pos);
+			evsel = evsel__clone(/*dest=*/NULL, pos);
 			if (evsel == NULL)
 				goto out_err;
 
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 68f9407ca74b..2d07c9257a1a 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -13,6 +13,7 @@
 #include <sys/param.h>
 #include "cache.h"
 #include "callchain.h"
+#include "header.h"
 #include <subcmd/exec-cmd.h>
 #include "util/event.h"  /* proc_map_timeout */
 #include "util/hist.h"  /* perf_hist_config */
@@ -34,6 +35,22 @@
 
 #define DEBUG_CACHE_DIR ".debug"
 
+#define METRIC_ONLY_LEN 20
+
+struct perf_stat_config stat_config = {
+	.aggr_mode		= AGGR_GLOBAL,
+	.aggr_level		= MAX_CACHE_LVL + 1,
+	.scale			= true,
+	.unit_width		= 4, /* strlen("unit") */
+	.run_count		= 1,
+	.metric_only_len	= METRIC_ONLY_LEN,
+	.walltime_nsecs_stats	= &walltime_nsecs_stats,
+	.ru_stats		= &ru_stats,
+	.big_num		= true,
+	.ctl_fd			= -1,
+	.ctl_fd_ack		= -1,
+	.iostat_run		= false,
+};
 
 char buildid_dir[MAXPATHLEN]; /* root dir for buildid, binary cache */
 
@@ -455,6 +472,16 @@ static int perf_ui_config(const char *var, const char *value)
 	return 0;
 }
 
+void perf_stat__set_big_num(int set)
+{
+	stat_config.big_num = (set != 0);
+}
+
+static void perf_stat__set_no_csv_summary(int set)
+{
+	stat_config.no_csv_summary = (set != 0);
+}
+
 static int perf_stat_config(const char *var, const char *value)
 {
 	if (!strcmp(var, "stat.big-num"))
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
index 9971313d61c1..a727c95cb119 100644
--- a/tools/perf/util/config.h
+++ b/tools/perf/util/config.h
@@ -50,6 +50,7 @@ int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
 			     const char *var, const char *value);
 void perf_config__exit(void);
 void perf_config__refresh(void);
+int perf_config__set_variable(const char *var, const char *value);
 
 /**
  * perf_config_sections__for_each - iterate thru all the sections
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 27094211edd8..5c329ad614e9 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -293,7 +293,7 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
 
 	die = cpu__get_die_id(cpu);
 	/* There is no die_id on legacy system. */
-	if (die == -1)
+	if (die < 0)
 		die = 0;
 
 	/*
@@ -322,7 +322,7 @@ struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data)
 	struct aggr_cpu_id id;
 
 	/* There is no cluster_id on legacy system. */
-	if (cluster == -1)
+	if (cluster < 0)
 		cluster = 0;
 
 	id = aggr_cpu_id__die(cpu, data);
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index f0599c61fab4..5e7ff09fbc95 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -426,8 +426,9 @@ static int add_tracepoint_values(struct ctf_writer *cw,
 				 struct evsel *evsel,
 				 struct perf_sample *sample)
 {
-	struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
-	struct tep_format_field *fields        = evsel->tp_format->format.fields;
+	const struct tep_event *tp_format = evsel__tp_format(evsel);
+	struct tep_format_field *common_fields = tp_format->format.common_fields;
+	struct tep_format_field *fields        = tp_format->format.fields;
 	int ret;
 
 	ret = add_tracepoint_fields_values(cw, event_class, event,
@@ -1064,8 +1065,9 @@ static int add_tracepoint_types(struct ctf_writer *cw,
 				struct evsel *evsel,
 				struct bt_ctf_event_class *class)
 {
-	struct tep_format_field *common_fields = evsel->tp_format->format.common_fields;
-	struct tep_format_field *fields        = evsel->tp_format->format.fields;
+	const struct tep_event *tp_format = evsel__tp_format(evsel);
+	struct tep_format_field *common_fields = tp_format ? tp_format->format.common_fields : NULL;
+	struct tep_format_field *fields        = tp_format ? tp_format->format.fields : NULL;
 	int ret;
 
 	ret = add_tracepoint_fields_types(cw, common_fields, class);
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
index 8304cd2d4a9c..d9f805bf6fb0 100644
--- a/tools/perf/util/data-convert-json.c
+++ b/tools/perf/util/data-convert-json.c
@@ -230,12 +230,12 @@ static int process_sample_event(const struct perf_tool *tool,
 
 #ifdef HAVE_LIBTRACEEVENT
 	if (sample->raw_data) {
-		int i;
-		struct tep_format_field **fields;
+		struct tep_event *tp_format = evsel__tp_format(evsel);
+		struct tep_format_field **fields = tp_format ? tep_event_fields(tp_format) : NULL;
 
-		fields = tep_event_fields(evsel->tp_format);
 		if (fields) {
-			i = 0;
+			int i = 0;
+
 			while (fields[i]) {
 				struct trace_seq s;
 
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index 41a2b08670dc..50c5c206b70e 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -1245,6 +1245,9 @@ int symbol__strerror_disassemble(struct map_symbol *ms, int errnum, char *buf, s
 		scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.",
 			  dso__long_name(dso));
 		break;
+	case SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE:
+		scnprintf(buf, buflen, "Couldn't determine the file %s type.", dso__long_name(dso));
+		break;
 	default:
 		scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
 		break;
@@ -2213,56 +2216,6 @@ out_free_command:
 	return err;
 }
 
-static int annotation_options__init_disassemblers(struct annotation_options *options)
-{
-	char *disassembler;
-
-	if (options->disassemblers_str == NULL) {
-		const char *default_disassemblers_str =
-#ifdef HAVE_LIBLLVM_SUPPORT
-				"llvm,"
-#endif
-#ifdef HAVE_LIBCAPSTONE_SUPPORT
-				"capstone,"
-#endif
-				"objdump";
-
-		options->disassemblers_str = strdup(default_disassemblers_str);
-		if (!options->disassemblers_str)
-			goto out_enomem;
-	}
-
-	disassembler = strdup(options->disassemblers_str);
-	if (disassembler == NULL)
-		goto out_enomem;
-
-	while (1) {
-		char *comma = strchr(disassembler, ',');
-
-		if (comma != NULL)
-			*comma = '\0';
-
-		options->disassemblers[options->nr_disassemblers++] = strim(disassembler);
-
-		if (comma == NULL)
-			break;
-
-		disassembler = comma + 1;
-
-		if (options->nr_disassemblers >= MAX_DISASSEMBLERS) {
-			pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n",
-				 MAX_DISASSEMBLERS, disassembler);
-			break;
-		}
-	}
-
-	return 0;
-
-out_enomem:
-	pr_err("Not enough memory for annotate.disassemblers\n");
-	return -1;
-}
-
 int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 {
 	struct annotation_options *options = args->options;
@@ -2271,7 +2224,6 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 	char symfs_filename[PATH_MAX];
 	bool delete_extract = false;
 	struct kcore_extract kce;
-	const char *disassembler;
 	bool decomp = false;
 	int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
 
@@ -2289,7 +2241,7 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 	} else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) {
 		return symbol__disassemble_bpf_image(sym, args);
 	} else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) {
-		return -1;
+		return SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE;
 	} else if (dso__is_kcore(dso)) {
 		kce.addr = map__rip_2objdump(map, sym->start);
 		kce.kcore_filename = symfs_filename;
@@ -2331,28 +2283,26 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
 		}
 	}
 
-	err = annotation_options__init_disassemblers(options);
-	if (err)
-		goto out_remove_tmp;
-
 	err = -1;
+	for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) {
+		enum perf_disassembler dis = options->disassemblers[i];
 
-	for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) {
-		disassembler = options->disassemblers[i];
-
-		if (!strcmp(disassembler, "llvm"))
+		switch (dis) {
+		case PERF_DISASM_LLVM:
 			err = symbol__disassemble_llvm(symfs_filename, sym, args);
-		else if (!strcmp(disassembler, "capstone"))
+			break;
+		case PERF_DISASM_CAPSTONE:
 			err = symbol__disassemble_capstone(symfs_filename, sym, args);
-		else if (!strcmp(disassembler, "objdump"))
+			break;
+		case PERF_DISASM_OBJDUMP:
 			err = symbol__disassemble_objdump(symfs_filename, sym, args);
-		else
-			pr_debug("Unknown disassembler %s, skipping...\n", disassembler);
-	}
-
-	if (err == 0) {
-		pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n",
-			 disassembler, options->disassemblers_str);
+			break;
+		case PERF_DISASM_UNKNOWN: /* End of disassemblers. */
+		default:
+			goto out_remove_tmp;
+		}
+		if (err == 0)
+			pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]);
 	}
 out_remove_tmp:
 	if (decomp)
diff --git a/tools/perf/util/dlfilter.c b/tools/perf/util/dlfilter.c
index 7d180bdaedbc..ddacef881af2 100644
--- a/tools/perf/util/dlfilter.c
+++ b/tools/perf/util/dlfilter.c
@@ -234,7 +234,8 @@ static const __u8 *dlfilter__insn(void *ctx, __u32 *len)
 			struct machine *machine = maps__machine(thread__maps(al->thread));
 
 			if (machine)
-				script_fetch_insn(d->sample, al->thread, machine);
+				script_fetch_insn(d->sample, al->thread, machine,
+						  /*native_arch=*/true);
 		}
 	}
 
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index e2843ca2edd9..cae4f6d63318 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -24,15 +24,19 @@ struct perf_env perf_env;
 #include "bpf-utils.h"
 #include <bpf/libbpf.h>
 
-void perf_env__insert_bpf_prog_info(struct perf_env *env,
+bool perf_env__insert_bpf_prog_info(struct perf_env *env,
 				    struct bpf_prog_info_node *info_node)
 {
+	bool ret;
+
 	down_write(&env->bpf_progs.lock);
-	__perf_env__insert_bpf_prog_info(env, info_node);
+	ret = __perf_env__insert_bpf_prog_info(env, info_node);
 	up_write(&env->bpf_progs.lock);
+
+	return ret;
 }
 
-void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node)
+bool __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node)
 {
 	__u32 prog_id = info_node->info_linear->info.id;
 	struct bpf_prog_info_node *node;
@@ -50,13 +54,14 @@ void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info
 			p = &(*p)->rb_right;
 		} else {
 			pr_debug("duplicated bpf prog info %u\n", prog_id);
-			return;
+			return false;
 		}
 	}
 
 	rb_link_node(&info_node->rb_node, parent, p);
 	rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos);
 	env->bpf_progs.infos_cnt++;
+	return true;
 }
 
 struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
@@ -326,10 +331,13 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
 
 	for (idx = 0; idx < nr_cpus; ++idx) {
 		struct perf_cpu cpu = { .cpu = idx };
+		int core_id   = cpu__get_core_id(cpu);
+		int socket_id = cpu__get_socket_id(cpu);
+		int die_id    = cpu__get_die_id(cpu);
 
-		env->cpu[idx].core_id	= cpu__get_core_id(cpu);
-		env->cpu[idx].socket_id	= cpu__get_socket_id(cpu);
-		env->cpu[idx].die_id	= cpu__get_die_id(cpu);
+		env->cpu[idx].core_id	= core_id >= 0 ? core_id : -1;
+		env->cpu[idx].socket_id	= socket_id >= 0 ? socket_id : -1;
+		env->cpu[idx].die_id	= die_id >= 0 ? die_id : -1;
 	}
 
 	env->nr_cpus_avail = nr_cpus;
@@ -472,15 +480,19 @@ const char *perf_env__arch(struct perf_env *env)
 	return normalize_arch(arch_name);
 }
 
+#if defined(HAVE_LIBTRACEEVENT)
+#include "trace/beauty/arch_errno_names.c"
+#endif
+
 const char *perf_env__arch_strerrno(struct perf_env *env __maybe_unused, int err __maybe_unused)
 {
-#if defined(HAVE_SYSCALL_TABLE_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
+#if defined(HAVE_LIBTRACEEVENT)
 	if (env->arch_strerrno == NULL)
 		env->arch_strerrno = arch_syscalls__strerrno_function(perf_env__arch(env));
 
 	return env->arch_strerrno ? env->arch_strerrno(err) : "no arch specific strerrno function";
 #else
-	return "!(HAVE_SYSCALL_TABLE_SUPPORT && HAVE_LIBTRACEEVENT)";
+	return "!HAVE_LIBTRACEEVENT";
 #endif
 }
 
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index ae604c4edbb7..d90e343cf1fa 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -56,8 +56,6 @@ struct pmu_caps {
 
 typedef const char *(arch_syscalls__strerrno_t)(int err);
 
-arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch);
-
 struct perf_env {
 	char			*hostname;
 	char			*os_release;
@@ -176,9 +174,9 @@ const char *perf_env__raw_arch(struct perf_env *env);
 int perf_env__nr_cpus_avail(struct perf_env *env);
 
 void perf_env__init(struct perf_env *env);
-void __perf_env__insert_bpf_prog_info(struct perf_env *env,
+bool __perf_env__insert_bpf_prog_info(struct perf_env *env,
 				      struct bpf_prog_info_node *info_node);
-void perf_env__insert_bpf_prog_info(struct perf_env *env,
+bool perf_env__insert_bpf_prog_info(struct perf_env *env,
 				    struct bpf_prog_info_node *info_node);
 struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
 							__u32 prog_id);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d22c5df1701e..bc144388f892 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -395,6 +395,7 @@ void evsel__init(struct evsel *evsel,
 	evsel->group_pmu_name = NULL;
 	evsel->skippable     = false;
 	evsel->alternate_hw_config = PERF_COUNT_HW_MAX;
+	evsel->script_output_type = -1; // FIXME: OUTPUT_TYPE_UNSET, see builtin-script.c
 }
 
 struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
@@ -454,7 +455,7 @@ static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src)
  * The assumption is that @orig is not configured nor opened yet.
  * So we only care about the attributes that can be set while it's parsed.
  */
-struct evsel *evsel__clone(struct evsel *orig)
+struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig)
 {
 	struct evsel *evsel;
 
@@ -467,7 +468,11 @@ struct evsel *evsel__clone(struct evsel *orig)
 	if (orig->bpf_obj)
 		return NULL;
 
-	evsel = evsel__new(&orig->core.attr);
+	if (dest)
+		evsel = dest;
+	else
+		evsel = evsel__new(&orig->core.attr);
+
 	if (evsel == NULL)
 		return NULL;
 
@@ -512,11 +517,12 @@ struct evsel *evsel__clone(struct evsel *orig)
 	evsel->core.leader = orig->core.leader;
 
 	evsel->max_events = orig->max_events;
-	free((char *)evsel->unit);
-	evsel->unit = strdup(orig->unit);
-	if (evsel->unit == NULL)
-		goto out_err;
-
+	zfree(&evsel->unit);
+	if (orig->unit) {
+		evsel->unit = strdup(orig->unit);
+		if (evsel->unit == NULL)
+			goto out_err;
+	}
 	evsel->scale = orig->scale;
 	evsel->snapshot = orig->snapshot;
 	evsel->per_pkg = orig->per_pkg;
@@ -544,53 +550,101 @@ out_err:
 	return NULL;
 }
 
+static int trace_event__id(const char *sys, const char *name)
+{
+	char *tp_dir = get_events_file(sys);
+	char path[PATH_MAX];
+	int id, err;
+
+	if (!tp_dir)
+		return -1;
+
+	scnprintf(path, PATH_MAX, "%s/%s/id", tp_dir, name);
+	put_events_file(tp_dir);
+	err = filename__read_int(path, &id);
+	if (err)
+		return err;
+
+	return id;
+}
+
 /*
  * Returns pointer with encoded error via <linux/err.h> interface.
  */
-#ifdef HAVE_LIBTRACEEVENT
 struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format)
 {
+	struct perf_event_attr attr = {
+		.type	       = PERF_TYPE_TRACEPOINT,
+		.sample_type   = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+				PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
+	};
 	struct evsel *evsel = zalloc(perf_evsel__object.size);
-	int err = -ENOMEM;
+	int err = -ENOMEM, id = -1;
 
-	if (evsel == NULL) {
+	if (evsel == NULL)
 		goto out_err;
-	} else {
-		struct perf_event_attr attr = {
-			.type	       = PERF_TYPE_TRACEPOINT,
-			.sample_type   = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
-					  PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
-		};
 
-		if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
-			goto out_free;
 
-		event_attr_init(&attr);
+	if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
+		goto out_free;
 
-		if (format) {
-			evsel->tp_format = trace_event__tp_format(sys, name);
-			if (IS_ERR(evsel->tp_format)) {
-				err = PTR_ERR(evsel->tp_format);
-				goto out_free;
-			}
-			attr.config = evsel->tp_format->id;
-		} else {
-			attr.config = (__u64) -1;
-		}
+#ifdef HAVE_LIBTRACEEVENT
+	evsel->tp_sys = strdup(sys);
+	if (!evsel->tp_sys)
+		goto out_free;
 
+	evsel->tp_name = strdup(name);
+	if (!evsel->tp_name)
+		goto out_free;
+#endif
 
-		attr.sample_period = 1;
-		evsel__init(evsel, &attr, idx);
-	}
+	event_attr_init(&attr);
 
+	if (format) {
+		id = trace_event__id(sys, name);
+		if (id < 0) {
+			err = id;
+			goto out_free;
+		}
+	}
+	attr.config = (__u64)id;
+	attr.sample_period = 1;
+	evsel__init(evsel, &attr, idx);
 	return evsel;
 
 out_free:
 	zfree(&evsel->name);
+#ifdef HAVE_LIBTRACEEVENT
+	zfree(&evsel->tp_sys);
+	zfree(&evsel->tp_name);
+#endif
 	free(evsel);
 out_err:
 	return ERR_PTR(err);
 }
+
+#ifdef HAVE_LIBTRACEEVENT
+struct tep_event *evsel__tp_format(struct evsel *evsel)
+{
+	struct tep_event *tp_format = evsel->tp_format;
+
+	if (tp_format)
+		return tp_format;
+
+	if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
+		return NULL;
+
+	tp_format = trace_event__tp_format(evsel->tp_sys, evsel->tp_name);
+	if (IS_ERR(tp_format)) {
+		int err = -PTR_ERR(evsel->tp_format);
+
+		pr_err("Error getting tracepoint format '%s' '%s'(%d)\n",
+			evsel__name(evsel), strerror(err), err);
+		return NULL;
+	}
+	evsel->tp_format = tp_format;
+	return evsel->tp_format;
+}
 #endif
 
 const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = {
@@ -1103,6 +1157,9 @@ static void evsel__apply_config_terms(struct evsel *evsel,
 		case EVSEL__CONFIG_TERM_AUX_OUTPUT:
 			attr->aux_output = term->val.aux_output ? 1 : 0;
 			break;
+		case EVSEL__CONFIG_TERM_AUX_ACTION:
+			/* Already applied by auxtrace */
+			break;
 		case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
 			/* Already applied by auxtrace */
 			break;
@@ -1587,6 +1644,10 @@ void evsel__exit(struct evsel *evsel)
 	perf_thread_map__put(evsel->core.threads);
 	zfree(&evsel->group_name);
 	zfree(&evsel->name);
+#ifdef HAVE_LIBTRACEEVENT
+	zfree(&evsel->tp_sys);
+	zfree(&evsel->tp_name);
+#endif
 	zfree(&evsel->filter);
 	zfree(&evsel->group_pmu_name);
 	zfree(&evsel->unit);
@@ -2090,16 +2151,17 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
 	return err;
 }
 
-static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
+static bool __has_attr_feature(struct perf_event_attr *attr,
+			       struct perf_cpu cpu, unsigned long flags)
 {
-	int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
+	int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
 			 /*group_fd=*/-1, flags);
 	close(fd);
 
 	if (fd < 0) {
 		attr->exclude_kernel = 1;
 
-		fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
+		fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
 			     /*group_fd=*/-1, flags);
 		close(fd);
 	}
@@ -2107,7 +2169,7 @@ static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
 	if (fd < 0) {
 		attr->exclude_hv = 1;
 
-		fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
+		fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
 			     /*group_fd=*/-1, flags);
 		close(fd);
 	}
@@ -2115,7 +2177,7 @@ static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
 	if (fd < 0) {
 		attr->exclude_guest = 1;
 
-		fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1,
+		fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, cpu.cpu,
 			     /*group_fd=*/-1, flags);
 		close(fd);
 	}
@@ -2127,6 +2189,13 @@ static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
 	return fd >= 0;
 }
 
+static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags)
+{
+	struct perf_cpu cpu = {.cpu = -1};
+
+	return __has_attr_feature(attr, cpu, flags);
+}
+
 static void evsel__detect_missing_pmu_features(struct evsel *evsel)
 {
 	struct perf_event_attr attr = {
@@ -2215,7 +2284,65 @@ found:
 	errno = old_errno;
 }
 
-static bool evsel__detect_missing_features(struct evsel *evsel)
+static bool evsel__probe_aux_action(struct evsel *evsel, struct perf_cpu cpu)
+{
+	struct perf_event_attr attr = evsel->core.attr;
+	int old_errno = errno;
+
+	attr.disabled = 1;
+	attr.aux_start_paused = 1;
+
+	if (__has_attr_feature(&attr, cpu, /*flags=*/0)) {
+		errno = old_errno;
+		return true;
+	}
+
+	/*
+	 * EOPNOTSUPP means the kernel supports the feature but the PMU does
+	 * not, so keep that distinction if possible.
+	 */
+	if (errno != EOPNOTSUPP)
+		errno = old_errno;
+
+	return false;
+}
+
+static void evsel__detect_missing_aux_action_feature(struct evsel *evsel, struct perf_cpu cpu)
+{
+	static bool detection_done;
+	struct evsel *leader;
+
+	/*
+	 * Don't bother probing aux_action if it is not being used or has been
+	 * probed before.
+	 */
+	if (!evsel->core.attr.aux_action || detection_done)
+		return;
+
+	detection_done = true;
+
+	/*
+	 * The leader is an AUX area event. If it has failed, assume the feature
+	 * is not supported.
+	 */
+	leader = evsel__leader(evsel);
+	if (evsel == leader) {
+		perf_missing_features.aux_action = true;
+		return;
+	}
+
+	/*
+	 * AUX area event with aux_action must have been opened successfully
+	 * already, so feature is supported.
+	 */
+	if (leader->core.attr.aux_action)
+		return;
+
+	if (!evsel__probe_aux_action(leader, cpu))
+		perf_missing_features.aux_action = true;
+}
+
+static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu cpu)
 {
 	static bool detection_done = false;
 	struct perf_event_attr attr = {
@@ -2225,6 +2352,8 @@ static bool evsel__detect_missing_features(struct evsel *evsel)
 	};
 	int old_errno;
 
+	evsel__detect_missing_aux_action_feature(evsel, cpu);
+
 	evsel__detect_missing_pmu_features(evsel);
 
 	if (evsel__has_br_stack(evsel))
@@ -2439,6 +2568,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
 	int idx, thread, nthreads;
 	int pid = -1, err, old_errno;
 	enum rlimit_action set_rlimit = NO_CHANGE;
+	struct perf_cpu cpu;
 
 	if (evsel__is_retire_lat(evsel))
 		return tpebs_start(evsel->evlist);
@@ -2476,6 +2606,7 @@ fallback_missing_features:
 	}
 
 	for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) {
+		cpu = perf_cpu_map__cpu(cpus, idx);
 
 		for (thread = 0; thread < nthreads; thread++) {
 			int fd, group_fd;
@@ -2496,10 +2627,9 @@ retry_open:
 
 			/* Debug message used by test scripts */
 			pr_debug2_peo("sys_perf_event_open: pid %d  cpu %d  group_fd %d  flags %#lx",
-				pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags);
+				pid, cpu.cpu, group_fd, evsel->open_flags);
 
-			fd = sys_perf_event_open(&evsel->core.attr, pid,
-						perf_cpu_map__cpu(cpus, idx).cpu,
+			fd = sys_perf_event_open(&evsel->core.attr, pid, cpu.cpu,
 						group_fd, evsel->open_flags);
 
 			FD(evsel, idx, thread) = fd;
@@ -2515,8 +2645,7 @@ retry_open:
 			bpf_counter__install_pe(evsel, idx, fd);
 
 			if (unlikely(test_attr__enabled())) {
-				test_attr__open(&evsel->core.attr, pid,
-						perf_cpu_map__cpu(cpus, idx),
+				test_attr__open(&evsel->core.attr, pid, cpu,
 						fd, group_fd, evsel->open_flags);
 			}
 
@@ -2571,7 +2700,7 @@ try_fallback:
 	if (err == -EMFILE && rlimit__increase_nofile(&set_rlimit))
 		goto retry_open;
 
-	if (err == -EINVAL && evsel__detect_missing_features(evsel))
+	if (err == -EINVAL && evsel__detect_missing_features(evsel, cpu))
 		goto fallback_missing_features;
 
 	if (evsel__precise_ip_fallback(evsel))
@@ -3218,12 +3347,16 @@ u16 evsel__id_hdr_size(const struct evsel *evsel)
 #ifdef HAVE_LIBTRACEEVENT
 struct tep_format_field *evsel__field(struct evsel *evsel, const char *name)
 {
-	return tep_find_field(evsel->tp_format, name);
+	struct tep_event *tp_format = evsel__tp_format(evsel);
+
+	return tp_format ? tep_find_field(tp_format, name) : NULL;
 }
 
 struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name)
 {
-	return tep_find_common_field(evsel->tp_format, name);
+	struct tep_event *tp_format = evsel__tp_format(evsel);
+
+	return tp_format ? tep_find_common_field(tp_format, name) : NULL;
 }
 
 void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name)
@@ -3448,6 +3581,78 @@ static bool find_process(const char *name)
 	return ret ? false : true;
 }
 
+static int dump_perf_event_processes(char *msg, size_t size)
+{
+	DIR *proc_dir;
+	struct dirent *proc_entry;
+	int printed = 0;
+
+	proc_dir = opendir(procfs__mountpoint());
+	if (!proc_dir)
+		return 0;
+
+	/* Walk through the /proc directory. */
+	while ((proc_entry = readdir(proc_dir)) != NULL) {
+		char buf[256];
+		DIR *fd_dir;
+		struct dirent *fd_entry;
+		int fd_dir_fd;
+
+		if (proc_entry->d_type != DT_DIR ||
+		    !isdigit(proc_entry->d_name[0]) ||
+		    strlen(proc_entry->d_name) > sizeof(buf) - 4)
+			continue;
+
+		scnprintf(buf, sizeof(buf), "%s/fd", proc_entry->d_name);
+		fd_dir_fd = openat(dirfd(proc_dir), buf, O_DIRECTORY);
+		if (fd_dir_fd == -1)
+			continue;
+		fd_dir = fdopendir(fd_dir_fd);
+		if (!fd_dir) {
+			close(fd_dir_fd);
+			continue;
+		}
+		while ((fd_entry = readdir(fd_dir)) != NULL) {
+			ssize_t link_size;
+
+			if (fd_entry->d_type != DT_LNK)
+				continue;
+			link_size = readlinkat(fd_dir_fd, fd_entry->d_name, buf, sizeof(buf));
+			if (link_size < 0)
+				continue;
+			/* Take care as readlink doesn't null terminate the string. */
+			if (!strncmp(buf, "anon_inode:[perf_event]", link_size)) {
+				int cmdline_fd;
+				ssize_t cmdline_size;
+
+				scnprintf(buf, sizeof(buf), "%s/cmdline", proc_entry->d_name);
+				cmdline_fd = openat(dirfd(proc_dir), buf, O_RDONLY);
+				if (cmdline_fd == -1)
+					continue;
+				cmdline_size = read(cmdline_fd, buf, sizeof(buf) - 1);
+				close(cmdline_fd);
+				if (cmdline_size < 0)
+					continue;
+				buf[cmdline_size] = '\0';
+				for (ssize_t i = 0; i < cmdline_size; i++) {
+					if (buf[i] == '\0')
+						buf[i] = ' ';
+				}
+
+				if (printed == 0)
+					printed += scnprintf(msg, size, "Possible processes:\n");
+
+				printed += scnprintf(msg + printed, size - printed,
+						"%s %s\n", proc_entry->d_name, buf);
+				break;
+			}
+		}
+		closedir(fd_dir);
+	}
+	closedir(proc_dir);
+	return printed;
+}
+
 int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused,
 				     char *msg __maybe_unused,
 				     size_t size __maybe_unused)
@@ -3481,7 +3686,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 			printed += scnprintf(msg, size,
 				"No permission to enable %s event.\n\n", evsel__name(evsel));
 
-		return scnprintf(msg + printed, size - printed,
+		return printed + scnprintf(msg + printed, size - printed,
 		 "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
 		 "access to performance monitoring and observability operations for processes\n"
 		 "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
@@ -3526,6 +3731,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 			return scnprintf(msg, size,
 	"%s: PMU Hardware doesn't support 'aux_output' feature",
 					 evsel__name(evsel));
+		if (evsel->core.attr.aux_action)
+			return scnprintf(msg, size,
+	"%s: PMU Hardware doesn't support 'aux_action' feature",
+					evsel__name(evsel));
 		if (evsel->core.attr.sample_period != 0)
 			return scnprintf(msg, size,
 	"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
@@ -3544,6 +3753,11 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 			return scnprintf(msg, size,
 	"The PMU counters are busy/taken by another profiler.\n"
 	"We found oprofile daemon running, please stop it and try again.");
+		printed += scnprintf(
+			msg, size,
+			"The PMU %s counters are busy and in use by another process.\n",
+			evsel->pmu ? evsel->pmu->name : "");
+		return printed + dump_perf_event_processes(msg + printed, size - printed);
 		break;
 	case EINVAL:
 		if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)
@@ -3556,6 +3770,8 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 			return scnprintf(msg, size, "clockid feature not supported.");
 		if (perf_missing_features.clockid_wrong)
 			return scnprintf(msg, size, "wrong clockid (%d).", clockid);
+		if (perf_missing_features.aux_action)
+			return scnprintf(msg, size, "The 'aux_action' feature is not supported, update the kernel.");
 		if (perf_missing_features.aux_output)
 			return scnprintf(msg, size, "The 'aux_output' feature is not supported, update the kernel.");
 		if (!target__has_cpu(target))
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 04934a7af174..5e789fa80590 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -59,6 +59,8 @@ struct evsel {
 		char			*group_name;
 		const char		*group_pmu_name;
 #ifdef HAVE_LIBTRACEEVENT
+		char			*tp_sys;
+		char			*tp_name;
 		struct tep_event	*tp_format;
 #endif
 		char			*filter;
@@ -119,6 +121,7 @@ struct evsel {
 	bool			default_metricgroup; /* A member of the Default metricgroup */
 	struct hashmap		*per_pkg_mask;
 	int			err;
+	int			script_output_type;
 	struct {
 		evsel__sb_cb_t	*cb;
 		void		*data;
@@ -205,6 +208,7 @@ struct perf_missing_features {
 	bool weight_struct;
 	bool read_lost;
 	bool branch_counters;
+	bool aux_action;
 	bool inherit_sample_read;
 };
 
@@ -241,26 +245,23 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr)
 	return evsel__new_idx(attr, 0);
 }
 
-struct evsel *evsel__clone(struct evsel *orig);
+struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig);
 
 int copy_config_terms(struct list_head *dst, struct list_head *src);
 void free_config_terms(struct list_head *config_terms);
 
 
-#ifdef HAVE_LIBTRACEEVENT
-struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format);
-
 /*
  * Returns pointer with encoded error via <linux/err.h> interface.
  */
+struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx, bool format);
 static inline struct evsel *evsel__newtp(const char *sys, const char *name)
 {
 	return evsel__newtp_idx(sys, name, 0, true);
 }
-#endif
 
 #ifdef HAVE_LIBTRACEEVENT
-struct tep_event *event_format__new(const char *sys, const char *name);
+struct tep_event *evsel__tp_format(struct evsel *evsel);
 #endif
 
 void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx);
diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h
index aee6f808b512..af52a1516d0b 100644
--- a/tools/perf/util/evsel_config.h
+++ b/tools/perf/util/evsel_config.h
@@ -25,6 +25,7 @@ enum evsel_term_type {
 	EVSEL__CONFIG_TERM_BRANCH,
 	EVSEL__CONFIG_TERM_PERCORE,
 	EVSEL__CONFIG_TERM_AUX_OUTPUT,
+	EVSEL__CONFIG_TERM_AUX_ACTION,
 	EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE,
 	EVSEL__CONFIG_TERM_CFG_CHG,
 };
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 86b7f46f9e2a..103984b29b1e 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -81,13 +81,15 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE
 #ifdef HAVE_LIBTRACEEVENT
 	if (details->trace_fields) {
 		struct tep_format_field *field;
+		const struct tep_event *tp_format;
 
 		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) {
 			printed += comma_fprintf(fp, &first, " (not a tracepoint)");
 			goto out;
 		}
 
-		field = evsel->tp_format->format.fields;
+		tp_format = evsel__tp_format(evsel);
+		field = tp_format ? tp_format->format.fields : NULL;
 		if (field == NULL) {
 			printed += comma_fprintf(fp, &first, " (no trace field)");
 			goto out;
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index f289044a1f7c..c221dcce6666 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -285,7 +285,7 @@ struct expr_parse_ctx *expr__ctx_new(void)
 {
 	struct expr_parse_ctx *ctx;
 
-	ctx = malloc(sizeof(struct expr_parse_ctx));
+	ctx = calloc(1, sizeof(struct expr_parse_ctx));
 	if (!ctx)
 		return NULL;
 
@@ -294,9 +294,6 @@ struct expr_parse_ctx *expr__ctx_new(void)
 		free(ctx);
 		return NULL;
 	}
-	ctx->sctx.user_requested_cpu_list = NULL;
-	ctx->sctx.runtime = 0;
-	ctx->sctx.system_wide = false;
 
 	return ctx;
 }
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h
index bae649ef50e8..5dee2caba0fe 100644
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -7,6 +7,7 @@
 
 struct evlist;
 struct hashamp;
+struct stats;
 
 struct perf_ftrace {
 	struct evlist		*evlist;
@@ -20,6 +21,9 @@ struct perf_ftrace {
 	unsigned long		percpu_buffer_size;
 	bool			inherit;
 	bool			use_nsec;
+	unsigned int		bucket_range;
+	unsigned int		min_latency;
+	unsigned int		max_latency;
 	int			graph_depth;
 	int			func_stack_trace;
 	int			func_irq_info;
@@ -43,7 +47,7 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace);
 int perf_ftrace__latency_start_bpf(struct perf_ftrace *ftrace);
 int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace);
 int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace,
-				  int buckets[]);
+				  int buckets[], struct stats *stats);
 int perf_ftrace__latency_cleanup_bpf(struct perf_ftrace *ftrace);
 
 #else  /* !HAVE_BPF_SKEL */
@@ -68,7 +72,8 @@ perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused)
 
 static inline int
 perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
-			      int buckets[] __maybe_unused)
+			      int buckets[] __maybe_unused,
+			      struct stats *stats __maybe_unused)
 {
 	return -1;
 }
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh
index 1b5140e5ce99..6a73c903d690 100755
--- a/tools/perf/util/generate-cmdlist.sh
+++ b/tools/perf/util/generate-cmdlist.sh
@@ -38,7 +38,7 @@ do
 done
 echo "#endif /* HAVE_LIBELF_SUPPORT */"
 
-echo "#if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT))"
+echo "#if defined(HAVE_LIBTRACEEVENT)"
 sed -n -e 's/^perf-\([^ 	]*\)[ 	].* audit*/\1/p' command-list.txt |
 sort |
 while read cmd
@@ -51,7 +51,7 @@ do
 	    p
      }' "Documentation/perf-$cmd.txt"
 done
-echo "#endif /* HAVE_LIBTRACEEVENT && (HAVE_LIBAUDIT_SUPPORT || HAVE_SYSCALL_TABLE_SUPPORT) */"
+echo "#endif /* HAVE_LIBTRACEEVENT */"
 
 echo "#ifdef HAVE_LIBTRACEEVENT"
 sed -n -e 's/^perf-\([^ 	]*\)[ 	].* traceevent.*/\1/p' command-list.txt |
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 3451e542b69a..d06aa86352d3 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -3158,7 +3158,10 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused)
 		/* after reading from file, translate offset to address */
 		bpil_offs_to_addr(info_linear);
 		info_node->info_linear = info_linear;
-		__perf_env__insert_bpf_prog_info(env, info_node);
+		if (!__perf_env__insert_bpf_prog_info(env, info_node)) {
+			free(info_linear);
+			free(info_node);
+		}
 	}
 
 	up_write(&env->bpf_progs.lock);
@@ -3205,7 +3208,8 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused)
 		if (__do_read(ff, node->data, data_size))
 			goto out;
 
-		__perf_env__insert_btf(env, node);
+		if (!__perf_env__insert_btf(env, node))
+			free(node);
 		node = NULL;
 	}
 
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index fff134565801..0f30f843c566 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -32,6 +32,9 @@
 #include <linux/time64.h>
 #include <linux/zalloc.h>
 
+static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
+static int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
+
 static bool hists__filter_entry_by_dso(struct hists *hists,
 				       struct hist_entry *he);
 static bool hists__filter_entry_by_thread(struct hists *hists,
@@ -1292,19 +1295,35 @@ out:
 	return err;
 }
 
-int64_t
-hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+static int64_t
+hist_entry__cmp_impl(struct perf_hpp_list *hpp_list, struct hist_entry *left,
+		     struct hist_entry *right, unsigned long fn_offset,
+		     bool ignore_dynamic, bool ignore_skipped)
 {
 	struct hists *hists = left->hists;
 	struct perf_hpp_fmt *fmt;
-	int64_t cmp = 0;
+	perf_hpp_fmt_cmp_t *fn;
+	int64_t cmp;
+
+	/*
+	 * Never collapse filtered and non-filtered entries.
+	 * Note this is not the same as having an extra (invisible) fmt
+	 * that corresponds to the filtered status.
+	 */
+	cmp = (int64_t)!!left->filtered - (int64_t)!!right->filtered;
+	if (cmp)
+		return cmp;
 
-	hists__for_each_sort_list(hists, fmt) {
-		if (perf_hpp__is_dynamic_entry(fmt) &&
+	perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+		if (ignore_dynamic && perf_hpp__is_dynamic_entry(fmt) &&
 		    !perf_hpp__defined_dynamic_entry(fmt, hists))
 			continue;
 
-		cmp = fmt->cmp(fmt, left, right);
+		if (ignore_skipped && perf_hpp__should_skip(fmt, hists))
+			continue;
+
+		fn = (void *)fmt + fn_offset;
+		cmp = (*fn)(fmt, left, right);
 		if (cmp)
 			break;
 	}
@@ -1313,23 +1332,33 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 }
 
 int64_t
-hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
 {
-	struct hists *hists = left->hists;
-	struct perf_hpp_fmt *fmt;
-	int64_t cmp = 0;
+	return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
+		offsetof(struct perf_hpp_fmt, cmp), true, false);
+}
 
-	hists__for_each_sort_list(hists, fmt) {
-		if (perf_hpp__is_dynamic_entry(fmt) &&
-		    !perf_hpp__defined_dynamic_entry(fmt, hists))
-			continue;
+static int64_t
+hist_entry__sort(struct hist_entry *left, struct hist_entry *right)
+{
+	return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
+		offsetof(struct perf_hpp_fmt, sort), false, true);
+}
 
-		cmp = fmt->collapse(fmt, left, right);
-		if (cmp)
-			break;
-	}
+int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+	return hist_entry__cmp_impl(left->hists->hpp_list, left, right,
+		offsetof(struct perf_hpp_fmt, collapse), true, false);
+}
 
-	return cmp;
+static int64_t
+hist_entry__collapse_hierarchy(struct perf_hpp_list *hpp_list,
+			       struct hist_entry *left,
+			       struct hist_entry *right)
+{
+	return hist_entry__cmp_impl(hpp_list, left, right,
+		offsetof(struct perf_hpp_fmt, collapse), false, false);
 }
 
 void hist_entry__delete(struct hist_entry *he)
@@ -1503,14 +1532,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
 	while (*p != NULL) {
 		parent = *p;
 		iter = rb_entry(parent, struct hist_entry, rb_node_in);
-
-		cmp = 0;
-		perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
-			cmp = fmt->collapse(fmt, iter, he);
-			if (cmp)
-				break;
-		}
-
+		cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he);
 		if (!cmp) {
 			he_stat__add_stat(&iter->stat, &he->stat);
 			return iter;
@@ -1730,24 +1752,6 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
 	return 0;
 }
 
-static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
-{
-	struct hists *hists = a->hists;
-	struct perf_hpp_fmt *fmt;
-	int64_t cmp = 0;
-
-	hists__for_each_sort_list(hists, fmt) {
-		if (perf_hpp__should_skip(fmt, a->hists))
-			continue;
-
-		cmp = fmt->sort(fmt, a, b);
-		if (cmp)
-			break;
-	}
-
-	return cmp;
-}
-
 static void hists__reset_filter_stats(struct hists *hists)
 {
 	hists->nr_non_filtered_entries = 0;
@@ -2449,21 +2453,15 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
 	struct hist_entry *he;
-	struct perf_hpp_fmt *fmt;
 	bool leftmost = true;
 
 	p = &root->rb_root.rb_node;
 	while (*p != NULL) {
-		int64_t cmp = 0;
+		int64_t cmp;
 
 		parent = *p;
 		he = rb_entry(parent, struct hist_entry, rb_node_in);
-
-		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
-			cmp = fmt->collapse(fmt, he, pair);
-			if (cmp)
-				break;
-		}
+		cmp = hist_entry__collapse_hierarchy(he->hpp_list, he, pair);
 		if (!cmp)
 			goto out;
 
@@ -2521,16 +2519,10 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *roo
 
 	while (n) {
 		struct hist_entry *iter;
-		struct perf_hpp_fmt *fmt;
-		int64_t cmp = 0;
+		int64_t cmp;
 
 		iter = rb_entry(n, struct hist_entry, rb_node_in);
-		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
-			cmp = fmt->collapse(fmt, iter, he);
-			if (cmp)
-				break;
-		}
-
+		cmp = hist_entry__collapse_hierarchy(he->hpp_list, iter, he);
 		if (cmp < 0)
 			n = n->rb_left;
 		else if (cmp > 0)
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 1131056924d9..46c8373e3146 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -342,8 +342,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 struct perf_hpp;
 struct perf_hpp_fmt;
 
-int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
-int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
 int hist_entry__transaction_len(void);
 int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
 			      struct hists *hists);
@@ -452,6 +450,9 @@ struct perf_hpp {
 	bool skip;
 };
 
+typedef int64_t (*perf_hpp_fmt_cmp_t)(
+	struct perf_hpp_fmt *, struct hist_entry *, struct hist_entry *);
+
 struct perf_hpp_fmt {
 	const char *name;
 	int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
@@ -463,12 +464,9 @@ struct perf_hpp_fmt {
 		     struct hist_entry *he);
 	int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 		     struct hist_entry *he);
-	int64_t (*cmp)(struct perf_hpp_fmt *fmt,
-		       struct hist_entry *a, struct hist_entry *b);
-	int64_t (*collapse)(struct perf_hpp_fmt *fmt,
-			    struct hist_entry *a, struct hist_entry *b);
-	int64_t (*sort)(struct perf_hpp_fmt *fmt,
-			struct hist_entry *a, struct hist_entry *b);
+	perf_hpp_fmt_cmp_t cmp;
+	perf_hpp_fmt_cmp_t collapse;
+	perf_hpp_fmt_cmp_t sort;
 	bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
 	void (*free)(struct perf_hpp_fmt *fmt);
 
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
index 30793d08c6d4..5b8f0149167d 100644
--- a/tools/perf/util/intel-pt-decoder/Build
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -7,16 +7,24 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table
 	$(call rule_mkdir)
 	@$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
 
-# Busybox's diff doesn't have -I, avoid warning in the case
+ifeq ($(SRCARCH),x86)
+  perf-util-y += inat.o insn.o
+else
+  perf-util-$(CONFIG_AUXTRACE) += inat.o insn.o
+endif
 
-$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
+$(OUTPUT)util/intel-pt-decoder/inat.o: $(srctree)/tools/arch/x86/lib/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_o_c)
 
-CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder
+CFLAGS_inat.o += -I$(OUTPUT)util/intel-pt-decoder
+
+$(OUTPUT)util/intel-pt-decoder/insn.o: $(srctree)/tools/arch/x86/lib/insn.c
+	$(call rule_mkdir)
+	$(call if_changed_dep,cc_o_c)
 
 ifeq ($(CC_NO_CLANG), 1)
-  CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init
+  CFLAGS_insn.o += -Wno-override-init
 endif
 
-CFLAGS_intel-pt-insn-decoder.o += -Wno-packed
+CFLAGS_insn.o += -Wno-packed
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 47cf35799a4d..8fabddc1c0da 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -11,9 +11,6 @@
 #include <byteswap.h>
 #include "../../../arch/x86/include/asm/insn.h"
 
-#include "../../../arch/x86/lib/inat.c"
-#include "../../../arch/x86/lib/insn.c"
-
 #include "event.h"
 
 #include "intel-pt-insn-decoder.h"
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 346513e5e9b7..f23e21502bf8 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -737,7 +737,7 @@ jit_inject(struct jit_buf_desc *jd, const char *path)
  * as captured in the RECORD_MMAP record
  */
 static int
-jit_detect(const char *mmap_name, pid_t pid, struct nsinfo *nsi)
+jit_detect(const char *mmap_name, pid_t pid, struct nsinfo *nsi, bool *in_pidns)
  {
 	char *p;
 	char *end = NULL;
@@ -773,11 +773,16 @@ jit_detect(const char *mmap_name, pid_t pid, struct nsinfo *nsi)
 	if (!end)
 		return -1;
 
+	*in_pidns = pid == nsinfo__nstgid(nsi);
 	/*
 	 * pid does not match mmap pid
 	 * pid==0 in system-wide mode (synthesized)
+	 *
+	 * If the pid in the file name is equal to the nstgid, then
+	 * the agent ran inside a container and perf outside the
+	 * container, so record it for further use in jit_inject().
 	 */
-	if (pid && pid2 != nsinfo__nstgid(nsi))
+	if (pid && !(pid2 == pid || *in_pidns))
 		return -1;
 	/*
 	 * validate suffix
@@ -830,6 +835,7 @@ jit_process(struct perf_session *session,
 	struct nsinfo *nsi;
 	struct evsel *first;
 	struct jit_buf_desc jd;
+	bool in_pidns = false;
 	int ret;
 
 	thread = machine__findnew_thread(machine, pid, tid);
@@ -844,7 +850,7 @@ jit_process(struct perf_session *session,
 	/*
 	 * first, detect marker mmap (i.e., the jitdump mmap)
 	 */
-	if (jit_detect(filename, pid, nsi)) {
+	if (jit_detect(filename, pid, nsi, &in_pidns)) {
 		nsinfo__put(nsi);
 
 		/*
@@ -866,6 +872,9 @@ jit_process(struct perf_session *session,
 	jd.machine = machine;
 	jd.nsi = nsi;
 
+	if (in_pidns)
+		nsinfo__set_in_pidns(nsi);
+
 	/*
 	 * track sample_type to compute id_all layout
 	 * perf sets the same sample type to all events as of now
diff --git a/tools/perf/util/kvm-stat.c b/tools/perf/util/kvm-stat.c
new file mode 100644
index 000000000000..38ace736db5c
--- /dev/null
+++ b/tools/perf/util/kvm-stat.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "debug.h"
+#include "evsel.h"
+#include "kvm-stat.h"
+
+#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
+
+bool kvm_exit_event(struct evsel *evsel)
+{
+	return evsel__name_is(evsel, kvm_exit_trace);
+}
+
+void exit_event_get_key(struct evsel *evsel,
+			struct perf_sample *sample,
+			struct event_key *key)
+{
+	key->info = 0;
+	key->key  = evsel__intval(evsel, sample, kvm_exit_reason);
+}
+
+
+bool exit_event_begin(struct evsel *evsel,
+		      struct perf_sample *sample, struct event_key *key)
+{
+	if (kvm_exit_event(evsel)) {
+		exit_event_get_key(evsel, sample, key);
+		return true;
+	}
+
+	return false;
+}
+
+bool kvm_entry_event(struct evsel *evsel)
+{
+	return evsel__name_is(evsel, kvm_entry_trace);
+}
+
+bool exit_event_end(struct evsel *evsel,
+		    struct perf_sample *sample __maybe_unused,
+		    struct event_key *key __maybe_unused)
+{
+	return kvm_entry_event(evsel);
+}
+
+static const char *get_exit_reason(struct perf_kvm_stat *kvm,
+				   struct exit_reasons_table *tbl,
+				   u64 exit_code)
+{
+	while (tbl->reason != NULL) {
+		if (tbl->exit_code == exit_code)
+			return tbl->reason;
+		tbl++;
+	}
+
+	pr_err("unknown kvm exit code:%lld on %s\n",
+		(unsigned long long)exit_code, kvm->exit_reasons_isa);
+	return "UNKNOWN";
+}
+
+void exit_event_decode_key(struct perf_kvm_stat *kvm,
+			   struct event_key *key,
+			   char *decode)
+{
+	const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
+						  key->key);
+
+	scnprintf(decode, KVM_EVENT_NAME_LEN, "%s", exit_reason);
+}
+
+#endif
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index 3e9ac754c3d1..4249542544bb 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -115,6 +115,8 @@ struct kvm_reg_events_ops {
 	struct kvm_events_ops *ops;
 };
 
+#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
+
 void exit_event_get_key(struct evsel *evsel,
 			struct perf_sample *sample,
 			struct event_key *key);
@@ -127,6 +129,7 @@ bool exit_event_end(struct evsel *evsel,
 void exit_event_decode_key(struct perf_kvm_stat *kvm,
 			   struct event_key *key,
 			   char *decode);
+#endif
 
 bool kvm_exit_event(struct evsel *evsel);
 bool kvm_entry_event(struct evsel *evsel);
diff --git a/tools/perf/util/kwork.h b/tools/perf/util/kwork.h
index 76fe2a821bcf..db00269b73f2 100644
--- a/tools/perf/util/kwork.h
+++ b/tools/perf/util/kwork.h
@@ -1,6 +1,7 @@
 #ifndef PERF_UTIL_KWORK_H
 #define PERF_UTIL_KWORK_H
 
+#include "perf.h"
 #include "util/tool.h"
 #include "util/time-utils.h"
 
@@ -251,12 +252,14 @@ struct perf_kwork {
 	 * perf kwork top data
 	 */
 	struct kwork_top_stat top_stat;
-};
 
-struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,
+	/* Add work callback. */
+	struct kwork_work *(*add_work)(struct perf_kwork *kwork,
 				       struct kwork_class *class,
 				       struct kwork_work *key);
 
+};
+
 #ifdef HAVE_BPF_SKEL
 
 int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork);
diff --git a/tools/perf/util/llvm-c-helpers.cpp b/tools/perf/util/llvm-c-helpers.cpp
index 663bcaba2041..004081bd12c9 100644
--- a/tools/perf/util/llvm-c-helpers.cpp
+++ b/tools/perf/util/llvm-c-helpers.cpp
@@ -18,7 +18,6 @@
 extern "C" {
 #include <linux/zalloc.h>
 }
-#include "symbol_conf.h"
 #include "llvm-c-helpers.h"
 
 extern "C"
diff --git a/tools/perf/util/lock-contention.c b/tools/perf/util/lock-contention.c
new file mode 100644
index 000000000000..92e7b7b572a2
--- /dev/null
+++ b/tools/perf/util/lock-contention.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "debug.h"
+#include "env.h"
+#include "lock-contention.h"
+#include "machine.h"
+#include "symbol.h"
+
+#include <limits.h>
+#include <string.h>
+
+#include <linux/hash.h>
+#include <linux/zalloc.h>
+
+#define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
+#define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
+
+struct callstack_filter {
+	struct list_head list;
+	char name[];
+};
+
+static LIST_HEAD(callstack_filters);
+struct hlist_head *lockhash_table;
+
+int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
+		     int unset __maybe_unused)
+{
+	char *s, *tmp, *tok;
+	int ret = 0;
+
+	s = strdup(str);
+	if (s == NULL)
+		return -1;
+
+	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
+		struct callstack_filter *entry;
+
+		entry = malloc(sizeof(*entry) + strlen(tok) + 1);
+		if (entry == NULL) {
+			pr_err("Memory allocation failure\n");
+			free(s);
+			return -1;
+		}
+
+		strcpy(entry->name, tok);
+		list_add_tail(&entry->list, &callstack_filters);
+	}
+
+	free(s);
+	return ret;
+}
+
+bool needs_callstack(void)
+{
+	return !list_empty(&callstack_filters);
+}
+
+struct lock_stat *lock_stat_find(u64 addr)
+{
+	struct hlist_head *entry = lockhashentry(addr);
+	struct lock_stat *ret;
+
+	hlist_for_each_entry(ret, entry, hash_entry) {
+		if (ret->addr == addr)
+			return ret;
+	}
+	return NULL;
+}
+
+struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
+{
+	struct hlist_head *entry = lockhashentry(addr);
+	struct lock_stat *ret, *new;
+
+	hlist_for_each_entry(ret, entry, hash_entry) {
+		if (ret->addr == addr)
+			return ret;
+	}
+
+	new = zalloc(sizeof(struct lock_stat));
+	if (!new)
+		goto alloc_failed;
+
+	new->addr = addr;
+	new->name = strdup(name);
+	if (!new->name) {
+		free(new);
+		goto alloc_failed;
+	}
+
+	new->flags = flags;
+	new->wait_time_min = ULLONG_MAX;
+
+	hlist_add_head(&new->hash_entry, entry);
+	return new;
+
+alloc_failed:
+	pr_err("memory allocation failed\n");
+	return NULL;
+}
+
+bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth)
+{
+	struct map *kmap;
+	struct symbol *sym;
+	u64 ip;
+	const char *arch = perf_env__arch(machine->env);
+
+	if (list_empty(&callstack_filters))
+		return true;
+
+	for (int i = 0; i < max_stack_depth; i++) {
+		struct callstack_filter *filter;
+
+		/*
+		 * In powerpc, the callchain saved by kernel always includes
+		 * first three entries as the NIP (next instruction pointer),
+		 * LR (link register), and the contents of LR save area in the
+		 * second stack frame. In certain scenarios its possible to have
+		 * invalid kernel instruction addresses in either LR or the second
+		 * stack frame's LR. In that case, kernel will store that address as
+		 * zero.
+		 *
+		 * The below check will continue to look into callstack,
+		 * incase first or second callstack index entry has 0
+		 * address for powerpc.
+		 */
+		if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
+						(i != 1 && i != 2))))
+			break;
+
+		ip = callstack[i];
+		sym = machine__find_kernel_symbol(machine, ip, &kmap);
+		if (sym == NULL)
+			continue;
+
+		list_for_each_entry(filter, &callstack_filters, list) {
+			if (strstr(sym->name, filter->name))
+				return true;
+		}
+	}
+	return false;
+}
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index 1a7248ff3889..a09f7fe877df 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -10,10 +10,12 @@ struct lock_filter {
 	int			nr_addrs;
 	int			nr_syms;
 	int			nr_cgrps;
+	int			nr_slabs;
 	unsigned int		*types;
 	unsigned long		*addrs;
 	char			**syms;
 	u64			*cgrps;
+	char			**slabs;
 };
 
 struct lock_stat {
@@ -67,10 +69,11 @@ struct lock_stat {
  */
 #define MAX_LOCK_DEPTH 48
 
-struct lock_stat *lock_stat_find(u64 addr);
-struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
+/* based on kernel/lockdep.c */
+#define LOCKHASH_BITS		12
+#define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
 
-bool match_callstack_filter(struct machine *machine, u64 *callstack);
+extern struct hlist_head *lockhash_table;
 
 /*
  * struct lock_seq_stat:
@@ -148,8 +151,17 @@ struct lock_contention {
 	bool save_callstack;
 };
 
-#ifdef HAVE_BPF_SKEL
+struct option;
+int parse_call_stack(const struct option *opt, const char *str, int unset);
+bool needs_callstack(void);
 
+struct lock_stat *lock_stat_find(u64 addr);
+struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags);
+
+bool match_callstack_filter(struct machine *machine, u64 *callstack, int max_stack_depth);
+
+
+#ifdef HAVE_BPF_SKEL
 int lock_contention_prepare(struct lock_contention *con);
 int lock_contention_start(void);
 int lock_contention_stop(void);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 27d5345d2b30..2d51badfbf2e 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1003,7 +1003,7 @@ static int machine__get_running_kernel_start(struct machine *machine,
 
 	err = kallsyms__get_symbol_start(filename, "_edata", &addr);
 	if (err)
-		err = kallsyms__get_function_start(filename, "_etext", &addr);
+		err = kallsyms__get_symbol_start(filename, "_etext", &addr);
 	if (!err)
 		*end = addr;
 
@@ -1468,6 +1468,8 @@ static int machine__create_modules(struct machine *machine)
 	if (modules__parse(modules, machine, machine__create_module))
 		return -1;
 
+	maps__fixup_end(machine__kernel_maps(machine));
+
 	if (!machine__set_modules_path(machine))
 		return 0;
 
diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c
index 432399cbe5dd..09c9cc326c08 100644
--- a/tools/perf/util/maps.c
+++ b/tools/perf/util/maps.c
@@ -1136,8 +1136,13 @@ struct map *maps__find_next_entry(struct maps *maps, struct map *map)
 	struct map *result = NULL;
 
 	down_read(maps__lock(maps));
+	while (!maps__maps_by_address_sorted(maps)) {
+		up_read(maps__lock(maps));
+		maps__sort_by_address(maps);
+		down_read(maps__lock(maps));
+	}
 	i = maps__by_address_index(maps, map);
-	if (i < maps__nr_maps(maps))
+	if (++i < maps__nr_maps(maps))
 		result = map__get(maps__maps_by_address(maps)[i]);
 
 	up_read(maps__lock(maps));
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index bf5090f5220b..3692e988c86e 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -258,6 +258,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr)
 	const char *s;
 	char *copy;
 	struct perf_cpu_map *cpu_map = NULL;
+	int ret;
 
 	while ((pmu = perf_pmus__scan_mem(pmu)) != NULL) {
 		for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
@@ -283,7 +284,9 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr)
 			rec_argv[i++] = "-e";
 			rec_argv[i++] = copy;
 
-			cpu_map = perf_cpu_map__merge(cpu_map, pmu->cpus);
+			ret = perf_cpu_map__merge(&cpu_map, pmu->cpus);
+			if (ret < 0)
+				return ret;
 		}
 	}
 
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index cb185c5659d6..68f5de2d79c7 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -266,11 +266,16 @@ pid_t nsinfo__pid(const struct nsinfo  *nsi)
 	return RC_CHK_ACCESS(nsi)->pid;
 }
 
-pid_t nsinfo__in_pidns(const struct nsinfo  *nsi)
+bool nsinfo__in_pidns(const struct nsinfo *nsi)
 {
 	return RC_CHK_ACCESS(nsi)->in_pidns;
 }
 
+void nsinfo__set_in_pidns(struct nsinfo *nsi)
+{
+	RC_CHK_ACCESS(nsi)->in_pidns = true;
+}
+
 void nsinfo__mountns_enter(struct nsinfo *nsi,
 				  struct nscookie *nc)
 {
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
index 8c0731c6cbb7..e95c79b80e27 100644
--- a/tools/perf/util/namespaces.h
+++ b/tools/perf/util/namespaces.h
@@ -58,7 +58,8 @@ void nsinfo__clear_need_setns(struct nsinfo *nsi);
 pid_t nsinfo__tgid(const struct nsinfo  *nsi);
 pid_t nsinfo__nstgid(const struct nsinfo  *nsi);
 pid_t nsinfo__pid(const struct nsinfo  *nsi);
-pid_t nsinfo__in_pidns(const struct nsinfo  *nsi);
+bool nsinfo__in_pidns(const struct nsinfo  *nsi);
+void nsinfo__set_in_pidns(struct nsinfo *nsi);
 
 void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc);
 void nsinfo__mountns_exit(struct nscookie *nc);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index afeb8d815bbf..1e23faa364b1 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -489,7 +489,6 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
 	return found_supported ? 0 : -EINVAL;
 }
 
-#ifdef HAVE_LIBTRACEEVENT
 static void tracepoint_error(struct parse_events_error *e, int err,
 			     const char *sys, const char *name, int column)
 {
@@ -644,7 +643,6 @@ static int add_tracepoint_multi_sys(struct parse_events_state *parse_state,
 	closedir(events_dir);
 	return ret;
 }
-#endif /* HAVE_LIBTRACEEVENT */
 
 size_t default_breakpoint_len(void)
 {
@@ -795,6 +793,7 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type)
 		[PARSE_EVENTS__TERM_TYPE_DRV_CFG]		= "driver-config",
 		[PARSE_EVENTS__TERM_TYPE_PERCORE]		= "percore",
 		[PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT]		= "aux-output",
+		[PARSE_EVENTS__TERM_TYPE_AUX_ACTION]		= "aux-action",
 		[PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE]	= "aux-sample-size",
 		[PARSE_EVENTS__TERM_TYPE_METRIC_ID]		= "metric-id",
 		[PARSE_EVENTS__TERM_TYPE_RAW]                   = "raw",
@@ -844,6 +843,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er
 	case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
 	case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
 	case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+	case PARSE_EVENTS__TERM_TYPE_AUX_ACTION:
 	case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
 	case PARSE_EVENTS__TERM_TYPE_RAW:
 	case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
@@ -963,6 +963,9 @@ do {									   \
 	case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
 		CHECK_TYPE_VAL(NUM);
 		break;
+	case PARSE_EVENTS__TERM_TYPE_AUX_ACTION:
+		CHECK_TYPE_VAL(STR);
+		break;
 	case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
 		CHECK_TYPE_VAL(NUM);
 		if (term->val.num > UINT_MAX) {
@@ -1066,7 +1069,6 @@ static int config_term_pmu(struct perf_event_attr *attr,
 	return config_term_common(attr, term, err);
 }
 
-#ifdef HAVE_LIBTRACEEVENT
 static int config_term_tracepoint(struct perf_event_attr *attr,
 				  struct parse_events_term *term,
 				  struct parse_events_error *err)
@@ -1081,6 +1083,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
 	case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
 	case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
 	case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+	case PARSE_EVENTS__TERM_TYPE_AUX_ACTION:
 	case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
 		return config_term_common(attr, term, err);
 	case PARSE_EVENTS__TERM_TYPE_USER:
@@ -1111,7 +1114,6 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
 
 	return 0;
 }
-#endif
 
 static int config_attr(struct perf_event_attr *attr,
 		       const struct parse_events_terms *head,
@@ -1217,6 +1219,9 @@ do {								\
 			ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output,
 					    term->val.num ? 1 : 0, term->weak);
 			break;
+		case PARSE_EVENTS__TERM_TYPE_AUX_ACTION:
+			ADD_CONFIG_TERM_STR(AUX_ACTION, term->val.str, term->weak);
+			break;
 		case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
 			ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size,
 					    term->val.num, term->weak);
@@ -1279,6 +1284,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head
 		case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
 		case PARSE_EVENTS__TERM_TYPE_PERCORE:
 		case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT:
+		case PARSE_EVENTS__TERM_TYPE_AUX_ACTION:
 		case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE:
 		case PARSE_EVENTS__TERM_TYPE_METRIC_ID:
 		case PARSE_EVENTS__TERM_TYPE_RAW:
@@ -1303,7 +1309,7 @@ int parse_events_add_tracepoint(struct parse_events_state *parse_state,
 				struct parse_events_terms *head_config, void *loc_)
 {
 	YYLTYPE *loc = loc_;
-#ifdef HAVE_LIBTRACEEVENT
+
 	if (head_config) {
 		struct perf_event_attr attr;
 
@@ -1318,16 +1324,6 @@ int parse_events_add_tracepoint(struct parse_events_state *parse_state,
 	else
 		return add_tracepoint_event(parse_state, list, sys, event,
 					    err, head_config, loc);
-#else
-	(void)parse_state;
-	(void)list;
-	(void)sys;
-	(void)event;
-	(void)head_config;
-	parse_events_error__handle(err, loc->first_column, strdup("unsupported tracepoint"),
-				strdup("libtraceevent is necessary for tracepoint support"));
-	return -1;
-#endif
 }
 
 static int __parse_events_add_numeric(struct parse_events_state *parse_state,
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 3f4334ec6231..e176a34ab088 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -74,6 +74,7 @@ enum parse_events__term_type {
 	PARSE_EVENTS__TERM_TYPE_DRV_CFG,
 	PARSE_EVENTS__TERM_TYPE_PERCORE,
 	PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT,
+	PARSE_EVENTS__TERM_TYPE_AUX_ACTION,
 	PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE,
 	PARSE_EVENTS__TERM_TYPE_METRIC_ID,
 	PARSE_EVENTS__TERM_TYPE_RAW,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 14e5bd856a18..bf7f73548605 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -321,6 +321,7 @@ overwrite		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
 no-overwrite		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
 percore			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
 aux-output		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
+aux-action		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); }
 aux-sample-size		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
 metric-id		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
 cpu-cycles|cycles				{ return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 00adf872bf00..2e62f272fda8 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -68,14 +68,12 @@ bool is_directory(const char *base_path, const struct dirent *dent)
 	return S_ISDIR(st.st_mode);
 }
 
-bool is_executable_file(const char *base_path, const struct dirent *dent)
+bool is_directory_at(int dir_fd, const char *path)
 {
-	char path[PATH_MAX];
 	struct stat st;
 
-	snprintf(path, sizeof(path), "%s/%s", base_path, dent->d_name);
-	if (stat(path, &st))
+	if (fstatat(dir_fd, path, &st, /*flags=*/0))
 		return false;
 
-	return !S_ISDIR(st.st_mode) && (st.st_mode & S_IXUSR);
+	return S_ISDIR(st.st_mode);
 }
diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h
index d94902c22222..fb850fb55c60 100644
--- a/tools/perf/util/path.h
+++ b/tools/perf/util/path.h
@@ -12,6 +12,6 @@ int path__join3(char *bf, size_t size, const char *path1, const char *path2, con
 
 bool is_regular_file(const char *file);
 bool is_directory(const char *base_path, const struct dirent *dent);
-bool is_executable_file(const char *base_path, const struct dirent *dent);
+bool is_directory_at(int dir_fd, const char *path);
 
 #endif /* _PERF_PATH_H */
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 59fbbba79697..c7f3543b9921 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -212,7 +212,6 @@ static void __p_config_hw_cache_id(char *buf, size_t size, u64 value)
 	}
 }
 
-#ifdef HAVE_LIBTRACEEVENT
 static void __p_config_tracepoint_id(char *buf, size_t size, u64 value)
 {
 	char *str = tracepoint_id_to_name(value);
@@ -220,7 +219,6 @@ static void __p_config_tracepoint_id(char *buf, size_t size, u64 value)
 	print_id_hex(str);
 	free(str);
 }
-#endif
 
 static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type, u64 value)
 {
@@ -238,9 +236,7 @@ static void __p_config_id(struct perf_pmu *pmu, char *buf, size_t size, u32 type
 	case PERF_TYPE_HW_CACHE:
 		return __p_config_hw_cache_id(buf, size, value);
 	case PERF_TYPE_TRACEPOINT:
-#ifdef HAVE_LIBTRACEEVENT
 		return __p_config_tracepoint_id(buf, size, value);
-#endif
 	case PERF_TYPE_RAW:
 	case PERF_TYPE_BREAKPOINT:
 	default:
@@ -335,6 +331,9 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 	PRINT_ATTRf(sample_max_stack, p_unsigned);
 	PRINT_ATTRf(aux_sample_size, p_unsigned);
 	PRINT_ATTRf(sig_data, p_unsigned);
+	PRINT_ATTRf(aux_start_paused, p_unsigned);
+	PRINT_ATTRf(aux_pause, p_unsigned);
+	PRINT_ATTRf(aux_resume, p_unsigned);
 
 	return ret;
 }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 08a9d0bd9301..6206c8fe2bf9 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -12,6 +12,7 @@
 #include <stdbool.h>
 #include <dirent.h>
 #include <api/fs/fs.h>
+#include <api/io.h>
 #include <locale.h>
 #include <fnmatch.h>
 #include <math.h>
@@ -748,26 +749,35 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, int err_loc, struct lis
  * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64)
  * may have a "cpus" file.
  */
-static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *name, bool is_core)
+static struct perf_cpu_map *pmu_cpumask(int dirfd, const char *pmu_name, bool is_core)
 {
-	struct perf_cpu_map *cpus;
 	const char *templates[] = {
 		"cpumask",
 		"cpus",
 		NULL
 	};
 	const char **template;
-	char pmu_name[PATH_MAX];
-	struct perf_pmu pmu = {.name = pmu_name};
-	FILE *file;
 
-	strlcpy(pmu_name, name, sizeof(pmu_name));
 	for (template = templates; *template; template++) {
-		file = perf_pmu__open_file_at(&pmu, dirfd, *template);
-		if (!file)
+		struct io io;
+		char buf[128];
+		char *cpumask = NULL;
+		size_t cpumask_len;
+		ssize_t ret;
+		struct perf_cpu_map *cpus;
+
+		io.fd = perf_pmu__pathname_fd(dirfd, pmu_name, *template, O_RDONLY);
+		if (io.fd < 0)
 			continue;
-		cpus = perf_cpu_map__read(file);
-		fclose(file);
+
+		io__init(&io, io.fd, buf, sizeof(buf));
+		ret = io__getline(&io, &cpumask, &cpumask_len);
+		close(io.fd);
+		if (ret < 0)
+			continue;
+
+		cpus = perf_cpu_map__new(cpumask);
+		free(cpumask);
 		if (cpus)
 			return cpus;
 	}
@@ -1763,6 +1773,7 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call
 		"no-overwrite",
 		"percore",
 		"aux-output",
+		"aux-action=(pause|resume|start-paused)",
 		"aux-sample-size=number",
 	};
 	struct perf_pmu_format *format;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index eaa0318e9b87..307ad6242a4e 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1383,20 +1383,20 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
 		if (p == buf) {
 			semantic_error("No file/function name in '%s'.\n", p);
 			err = -EINVAL;
-			goto err;
+			goto out;
 		}
 		*(p++) = '\0';
 
 		err = parse_line_num(&p, &lr->start, "start line");
 		if (err)
-			goto err;
+			goto out;
 
 		if (*p == '+' || *p == '-') {
 			const char c = *(p++);
 
 			err = parse_line_num(&p, &lr->end, "end line");
 			if (err)
-				goto err;
+				goto out;
 
 			if (c == '+') {
 				lr->end += lr->start;
@@ -1416,11 +1416,11 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
 		if (lr->start > lr->end) {
 			semantic_error("Start line must be smaller"
 				       " than end line.\n");
-			goto err;
+			goto out;
 		}
 		if (*p != '\0') {
 			semantic_error("Tailing with invalid str '%s'.\n", p);
-			goto err;
+			goto out;
 		}
 	}
 
@@ -1431,7 +1431,7 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
 			lr->file = strdup_esq(p);
 			if (lr->file == NULL) {
 				err = -ENOMEM;
-				goto err;
+				goto out;
 			}
 		}
 		if (*buf != '\0')
@@ -1439,7 +1439,7 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
 		if (!lr->function && !lr->file) {
 			semantic_error("Only '@*' is not allowed.\n");
 			err = -EINVAL;
-			goto err;
+			goto out;
 		}
 	} else if (strpbrk_esq(buf, "/."))
 		lr->file = strdup_esq(buf);
@@ -1448,10 +1448,10 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
 	else {	/* Invalid name */
 		semantic_error("'%s' is not a valid function name.\n", buf);
 		err = -EINVAL;
-		goto err;
+		goto out;
 	}
 
-err:
+out:
 	free(buf);
 	return err;
 }
@@ -2775,7 +2775,7 @@ int show_perf_probe_events(struct strfilter *filter)
 
 static int get_new_event_name(char *buf, size_t len, const char *base,
 			      struct strlist *namelist, bool ret_event,
-			      bool allow_suffix)
+			      bool allow_suffix, bool not_C_symname)
 {
 	int i, ret;
 	char *p, *nbase;
@@ -2786,10 +2786,24 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
 	if (!nbase)
 		return -ENOMEM;
 
-	/* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */
-	p = strpbrk(nbase, ".@");
-	if (p && p != nbase)
-		*p = '\0';
+	if (not_C_symname) {
+		/* Replace non-alnum with '_' */
+		char *s, *d;
+
+		s = d = nbase;
+		do {
+			if (*s && !isalnum(*s)) {
+				if (d != nbase && *(d - 1) != '_')
+					*d++ = '_';
+			} else
+				*d++ = *s;
+		} while (*s++);
+	} else {
+		/* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */
+		p = strpbrk(nbase, ".@");
+		if (p && p != nbase)
+			*p = '\0';
+	}
 
 	/* Try no suffix number */
 	ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : "");
@@ -2884,6 +2898,7 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev,
 				       bool allow_suffix)
 {
 	const char *event, *group;
+	bool not_C_symname = true;
 	char buf[MAX_EVENT_NAME_LEN];
 	int ret;
 
@@ -2898,8 +2913,10 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev,
 			(strncmp(pev->point.function, "0x", 2) != 0) &&
 			!strisglob(pev->point.function))
 			event = pev->point.function;
-		else
+		else {
 			event = tev->point.realname;
+			not_C_symname = !is_known_C_lang(tev->lang);
+		}
 	}
 	if (pev->group && !pev->sdt)
 		group = pev->group;
@@ -2916,7 +2933,8 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev,
 
 	/* Get an unused new event name */
 	ret = get_new_event_name(buf, sizeof(buf), event, namelist,
-				 tev->point.retprobe, allow_suffix);
+				 tev->point.retprobe, allow_suffix,
+				 not_C_symname);
 	if (ret < 0)
 		return ret;
 
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 61a5f4ff4e9c..71905ede0207 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -58,6 +58,7 @@ struct probe_trace_event {
 	char				*group;	/* Group name */
 	struct probe_trace_point	point;	/* Trace point */
 	int				nargs;	/* Number of args */
+	int				lang;	/* Dwarf language code */
 	bool				uprobes;	/* uprobes only */
 	struct probe_trace_arg		*args;	/* Arguments */
 };
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 7f2ee0cb43ca..1e769b68da37 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -35,6 +35,19 @@
 /* Kprobe tracer basic type is up to u64 */
 #define MAX_BASIC_TYPE_BITS	64
 
+bool is_known_C_lang(int lang)
+{
+	switch (lang) {
+	case DW_LANG_C89:
+	case DW_LANG_C:
+	case DW_LANG_C99:
+	case DW_LANG_C11:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /*
  * Probe finder related functions
  */
@@ -1270,6 +1283,8 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
 		goto end;
 	}
 
+	tev->lang = dwarf_srclang(dwarf_diecu(sc_die, &pf->cu_die, NULL, NULL));
+
 	pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
 		 tev->point.offset);
 
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index be7b46ea2460..dcf6cc1e1cbe 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -26,6 +26,9 @@ static inline int is_c_varname(const char *name)
 #include "dwarf-aux.h"
 #include "debuginfo.h"
 
+/* Check the language code is known C */
+bool is_known_C_lang(int lang);
+
 /* Find probe_trace_events specified by perf_probe_event from debuginfo */
 int debuginfo__find_trace_events(struct debuginfo *dbg,
 				 struct perf_probe_event *pev,
@@ -103,6 +106,8 @@ struct line_finder {
 	int			found;
 };
 
+#else
+#define is_known_C_lang(lang) (false)
 #endif /* HAVE_LIBDW_SUPPORT */
 
 #endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 2096cdbaa53b..b4bc57859f73 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -13,30 +13,12 @@
 #include "evsel.h"
 #include "event.h"
 #include "print_binary.h"
+#include "strbuf.h"
 #include "thread_map.h"
 #include "trace-event.h"
 #include "mmap.h"
-#include "util/bpf-filter.h"
-#include "util/env.h"
-#include "util/kvm-stat.h"
-#include "util/stat.h"
-#include "util/kwork.h"
 #include "util/sample.h"
-#include "util/lock-contention.h"
 #include <internal/lib.h>
-#include "../builtin.h"
-
-#if PY_MAJOR_VERSION < 3
-#define _PyUnicode_FromString(arg) \
-  PyString_FromString(arg)
-#define _PyUnicode_AsString(arg) \
-  PyString_AsString(arg)
-#define _PyUnicode_FromFormat(...) \
-  PyString_FromFormat(__VA_ARGS__)
-#define _PyLong_FromLong(arg) \
-  PyInt_FromLong(arg)
-
-#else
 
 #define _PyUnicode_FromString(arg) \
   PyUnicode_FromString(arg)
@@ -44,22 +26,8 @@
   PyUnicode_FromFormat(__VA_ARGS__)
 #define _PyLong_FromLong(arg) \
   PyLong_FromLong(arg)
-#endif
-
-#ifndef Py_TYPE
-#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
-#endif
-
-/* Define PyVarObject_HEAD_INIT for python 2.5 */
-#ifndef PyVarObject_HEAD_INIT
-# define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
-#endif
 
-#if PY_MAJOR_VERSION < 3
-PyMODINIT_FUNC initperf(void);
-#else
 PyMODINIT_FUNC PyInit_perf(void);
-#endif
 
 #define member_def(type, member, ptype, help) \
 	{ #member, ptype, \
@@ -89,7 +57,7 @@ struct pyrf_event {
 	sample_member_def(sample_period, period, T_ULONGLONG, "event period"),		 \
 	sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"),
 
-static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object.");
+static const char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object.");
 
 static PyMemberDef pyrf_mmap_event__members[] = {
 	sample_members
@@ -104,7 +72,7 @@ static PyMemberDef pyrf_mmap_event__members[] = {
 	{ .name = NULL, },
 };
 
-static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_mmap_event__repr(const struct pyrf_event *pevent)
 {
 	PyObject *ret;
 	char *s;
@@ -117,7 +85,7 @@ static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent)
 		     pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) {
 		ret = PyErr_NoMemory();
 	} else {
-		ret = _PyUnicode_FromString(s);
+		ret = PyUnicode_FromString(s);
 		free(s);
 	}
 	return ret;
@@ -133,7 +101,7 @@ static PyTypeObject pyrf_mmap_event__type = {
 	.tp_repr	= (reprfunc)pyrf_mmap_event__repr,
 };
 
-static char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object.");
+static const char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object.");
 
 static PyMemberDef pyrf_task_event__members[] = {
 	sample_members
@@ -146,9 +114,9 @@ static PyMemberDef pyrf_task_event__members[] = {
 	{ .name = NULL, },
 };
 
-static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_task_event__repr(const struct pyrf_event *pevent)
 {
-	return _PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
+	return PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
 				   "ptid: %u, time: %" PRI_lu64 "}",
 				   pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit",
 				   pevent->event.fork.pid,
@@ -168,7 +136,7 @@ static PyTypeObject pyrf_task_event__type = {
 	.tp_repr	= (reprfunc)pyrf_task_event__repr,
 };
 
-static char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object.");
+static const char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object.");
 
 static PyMemberDef pyrf_comm_event__members[] = {
 	sample_members
@@ -179,9 +147,9 @@ static PyMemberDef pyrf_comm_event__members[] = {
 	{ .name = NULL, },
 };
 
-static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_comm_event__repr(const struct pyrf_event *pevent)
 {
-	return _PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
+	return PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
 				   pevent->event.comm.pid,
 				   pevent->event.comm.tid,
 				   pevent->event.comm.comm);
@@ -197,7 +165,7 @@ static PyTypeObject pyrf_comm_event__type = {
 	.tp_repr	= (reprfunc)pyrf_comm_event__repr,
 };
 
-static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object.");
+static const char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object.");
 
 static PyMemberDef pyrf_throttle_event__members[] = {
 	sample_members
@@ -208,11 +176,12 @@ static PyMemberDef pyrf_throttle_event__members[] = {
 	{ .name = NULL, },
 };
 
-static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_throttle_event__repr(const struct pyrf_event *pevent)
 {
-	struct perf_record_throttle *te = (struct perf_record_throttle *)(&pevent->event.header + 1);
+	const struct perf_record_throttle *te = (const struct perf_record_throttle *)
+		(&pevent->event.header + 1);
 
-	return _PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRI_lu64 ", id: %" PRI_lu64
+	return PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRI_lu64 ", id: %" PRI_lu64
 				   ", stream_id: %" PRI_lu64 " }",
 				   pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un",
 				   te->time, te->id, te->stream_id);
@@ -228,7 +197,7 @@ static PyTypeObject pyrf_throttle_event__type = {
 	.tp_repr	= (reprfunc)pyrf_throttle_event__repr,
 };
 
-static char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object.");
+static const char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object.");
 
 static PyMemberDef pyrf_lost_event__members[] = {
 	sample_members
@@ -237,7 +206,7 @@ static PyMemberDef pyrf_lost_event__members[] = {
 	{ .name = NULL, },
 };
 
-static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_lost_event__repr(const struct pyrf_event *pevent)
 {
 	PyObject *ret;
 	char *s;
@@ -247,7 +216,7 @@ static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent)
 		     pevent->event.lost.id, pevent->event.lost.lost) < 0) {
 		ret = PyErr_NoMemory();
 	} else {
-		ret = _PyUnicode_FromString(s);
+		ret = PyUnicode_FromString(s);
 		free(s);
 	}
 	return ret;
@@ -263,7 +232,7 @@ static PyTypeObject pyrf_lost_event__type = {
 	.tp_repr	= (reprfunc)pyrf_lost_event__repr,
 };
 
-static char pyrf_read_event__doc[] = PyDoc_STR("perf read event object.");
+static const char pyrf_read_event__doc[] = PyDoc_STR("perf read event object.");
 
 static PyMemberDef pyrf_read_event__members[] = {
 	sample_members
@@ -272,9 +241,9 @@ static PyMemberDef pyrf_read_event__members[] = {
 	{ .name = NULL, },
 };
 
-static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_read_event__repr(const struct pyrf_event *pevent)
 {
-	return _PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }",
+	return PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }",
 				   pevent->event.read.pid,
 				   pevent->event.read.tid);
 	/*
@@ -293,7 +262,7 @@ static PyTypeObject pyrf_read_event__type = {
 	.tp_repr	= (reprfunc)pyrf_read_event__repr,
 };
 
-static char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object.");
+static const char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object.");
 
 static PyMemberDef pyrf_sample_event__members[] = {
 	sample_members
@@ -301,7 +270,7 @@ static PyMemberDef pyrf_sample_event__members[] = {
 	{ .name = NULL, },
 };
 
-static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_sample_event__repr(const struct pyrf_event *pevent)
 {
 	PyObject *ret;
 	char *s;
@@ -309,20 +278,20 @@ static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
 	if (asprintf(&s, "{ type: sample }") < 0) {
 		ret = PyErr_NoMemory();
 	} else {
-		ret = _PyUnicode_FromString(s);
+		ret = PyUnicode_FromString(s);
 		free(s);
 	}
 	return ret;
 }
 
 #ifdef HAVE_LIBTRACEEVENT
-static bool is_tracepoint(struct pyrf_event *pevent)
+static bool is_tracepoint(const struct pyrf_event *pevent)
 {
 	return pevent->evsel->core.attr.type == PERF_TYPE_TRACEPOINT;
 }
 
 static PyObject*
-tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
+tracepoint_field(const struct pyrf_event *pe, struct tep_format_field *field)
 {
 	struct tep_handle *pevent = field->event->tep;
 	void *data = pe->sample.raw_data;
@@ -343,7 +312,7 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
 		}
 		if (field->flags & TEP_FIELD_IS_STRING &&
 		    is_printable_array(data + offset, len)) {
-			ret = _PyUnicode_FromString((char *)data + offset);
+			ret = PyUnicode_FromString((char *)data + offset);
 		} else {
 			ret = PyByteArray_FromStringAndSize((const char *) data + offset, len);
 			field->flags &= ~TEP_FIELD_IS_STRING;
@@ -411,7 +380,7 @@ static PyTypeObject pyrf_sample_event__type = {
 	.tp_getattro	= (getattrofunc) pyrf_sample_event__getattro,
 };
 
-static char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object.");
+static const char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object.");
 
 static PyMemberDef pyrf_context_switch_event__members[] = {
 	sample_members
@@ -421,7 +390,7 @@ static PyMemberDef pyrf_context_switch_event__members[] = {
 	{ .name = NULL, },
 };
 
-static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent)
+static PyObject *pyrf_context_switch_event__repr(const struct pyrf_event *pevent)
 {
 	PyObject *ret;
 	char *s;
@@ -432,7 +401,7 @@ static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent)
 		     !!(pevent->event.header.misc & PERF_RECORD_MISC_SWITCH_OUT)) < 0) {
 		ret = PyErr_NoMemory();
 	} else {
-		ret = _PyUnicode_FromString(s);
+		ret = PyUnicode_FromString(s);
 		free(s);
 	}
 	return ret;
@@ -501,7 +470,7 @@ static PyTypeObject *pyrf_event__type[] = {
 	[PERF_RECORD_SWITCH_CPU_WIDE]  = &pyrf_context_switch_event__type,
 };
 
-static PyObject *pyrf_event__new(union perf_event *event)
+static PyObject *pyrf_event__new(const union perf_event *event)
 {
 	struct pyrf_event *pevent;
 	PyTypeObject *ptype;
@@ -569,7 +538,7 @@ static PySequenceMethods pyrf_cpu_map__sequence_methods = {
 	.sq_item   = pyrf_cpu_map__item,
 };
 
-static char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object.");
+static const char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object.");
 
 static PyTypeObject pyrf_cpu_map__type = {
 	PyVarObject_HEAD_INIT(NULL, 0)
@@ -638,7 +607,7 @@ static PySequenceMethods pyrf_thread_map__sequence_methods = {
 	.sq_item   = pyrf_thread_map__item,
 };
 
-static char pyrf_thread_map__doc[] = PyDoc_STR("thread map object.");
+static const char pyrf_thread_map__doc[] = PyDoc_STR("thread map object.");
 
 static PyTypeObject pyrf_thread_map__type = {
 	PyVarObject_HEAD_INIT(NULL, 0)
@@ -812,6 +781,17 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
 	return Py_None;
 }
 
+static PyObject *pyrf_evsel__str(PyObject *self)
+{
+	struct pyrf_evsel *pevsel = (void *)self;
+	struct evsel *evsel = &pevsel->evsel;
+
+	if (!evsel->pmu)
+		return PyUnicode_FromFormat("evsel(%s)", evsel__name(evsel));
+
+	return PyUnicode_FromFormat("evsel(%s/%s/)", evsel->pmu->name, evsel__name(evsel));
+}
+
 static PyMethodDef pyrf_evsel__methods[] = {
 	{
 		.ml_name  = "open",
@@ -822,7 +802,7 @@ static PyMethodDef pyrf_evsel__methods[] = {
 	{ .ml_name = NULL, }
 };
 
-static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
+static const char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
 
 static PyTypeObject pyrf_evsel__type = {
 	PyVarObject_HEAD_INIT(NULL, 0)
@@ -833,6 +813,8 @@ static PyTypeObject pyrf_evsel__type = {
 	.tp_doc		= pyrf_evsel__doc,
 	.tp_methods	= pyrf_evsel__methods,
 	.tp_init	= (initproc)pyrf_evsel__init,
+	.tp_str         = pyrf_evsel__str,
+	.tp_repr        = pyrf_evsel__str,
 };
 
 static int pyrf_evsel__setup_types(void)
@@ -918,17 +900,8 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
 
 	for (i = 0; i < evlist->core.pollfd.nr; ++i) {
 		PyObject *file;
-#if PY_MAJOR_VERSION < 3
-		FILE *fp = fdopen(evlist->core.pollfd.entries[i].fd, "r");
-
-		if (fp == NULL)
-			goto free_list;
-
-		file = PyFile_FromFile(fp, "perf", "r", NULL);
-#else
 		file = PyFile_FromFd(evlist->core.pollfd.entries[i].fd, "perf", "r", -1,
 				     NULL, NULL, NULL, 0);
-#endif
 		if (file == NULL)
 			goto free_list;
 
@@ -1098,8 +1071,10 @@ static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i)
 	struct pyrf_evlist *pevlist = (void *)obj;
 	struct evsel *pos;
 
-	if (i >= pevlist->evlist.core.nr_entries)
+	if (i >= pevlist->evlist.core.nr_entries) {
+		PyErr_SetString(PyExc_IndexError, "Index out of range");
 		return NULL;
+	}
 
 	evlist__for_each_entry(&pevlist->evlist, pos) {
 		if (i-- == 0)
@@ -1109,12 +1084,36 @@ static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i)
 	return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel));
 }
 
+static PyObject *pyrf_evlist__str(PyObject *self)
+{
+	struct pyrf_evlist *pevlist = (void *)self;
+	struct evsel *pos;
+	struct strbuf sb = STRBUF_INIT;
+	bool first = true;
+	PyObject *result;
+
+	strbuf_addstr(&sb, "evlist([");
+	evlist__for_each_entry(&pevlist->evlist, pos) {
+		if (!first)
+			strbuf_addch(&sb, ',');
+		if (!pos->pmu)
+			strbuf_addstr(&sb, evsel__name(pos));
+		else
+			strbuf_addf(&sb, "%s/%s/", pos->pmu->name, evsel__name(pos));
+		first = false;
+	}
+	strbuf_addstr(&sb, "])");
+	result = PyUnicode_FromString(sb.buf);
+	strbuf_release(&sb);
+	return result;
+}
+
 static PySequenceMethods pyrf_evlist__sequence_methods = {
 	.sq_length = pyrf_evlist__length,
 	.sq_item   = pyrf_evlist__item,
 };
 
-static char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object.");
+static const char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object.");
 
 static PyTypeObject pyrf_evlist__type = {
 	PyVarObject_HEAD_INIT(NULL, 0)
@@ -1126,6 +1125,8 @@ static PyTypeObject pyrf_evlist__type = {
 	.tp_doc		= pyrf_evlist__doc,
 	.tp_methods	= pyrf_evlist__methods,
 	.tp_init	= (initproc)pyrf_evlist__init,
+	.tp_repr        = pyrf_evlist__str,
+	.tp_str         = pyrf_evlist__str,
 };
 
 static int pyrf_evlist__setup_types(void)
@@ -1136,10 +1137,12 @@ static int pyrf_evlist__setup_types(void)
 
 #define PERF_CONST(name) { #name, PERF_##name }
 
-static struct {
+struct perf_constant {
 	const char *name;
 	int	    value;
-} perf__constants[] = {
+};
+
+static const struct perf_constant perf__constants[] = {
 	PERF_CONST(TYPE_HARDWARE),
 	PERF_CONST(TYPE_SOFTWARE),
 	PERF_CONST(TYPE_TRACEPOINT),
@@ -1234,12 +1237,66 @@ static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
 
 	tp_format = trace_event__tp_format(sys, name);
 	if (IS_ERR(tp_format))
-		return _PyLong_FromLong(-1);
+		return PyLong_FromLong(-1);
 
-	return _PyLong_FromLong(tp_format->id);
+	return PyLong_FromLong(tp_format->id);
 #endif // HAVE_LIBTRACEEVENT
 }
 
+static PyObject *pyrf_evsel__from_evsel(struct evsel *evsel)
+{
+	struct pyrf_evsel *pevsel = PyObject_New(struct pyrf_evsel, &pyrf_evsel__type);
+
+	if (!pevsel)
+		return NULL;
+
+	memset(&pevsel->evsel, 0, sizeof(pevsel->evsel));
+	evsel__init(&pevsel->evsel, &evsel->core.attr, evsel->core.idx);
+
+	evsel__clone(&pevsel->evsel, evsel);
+	return (PyObject *)pevsel;
+}
+
+static PyObject *pyrf_evlist__from_evlist(struct evlist *evlist)
+{
+	struct pyrf_evlist *pevlist = PyObject_New(struct pyrf_evlist, &pyrf_evlist__type);
+	struct evsel *pos;
+
+	if (!pevlist)
+		return NULL;
+
+	memset(&pevlist->evlist, 0, sizeof(pevlist->evlist));
+	evlist__init(&pevlist->evlist, evlist->core.all_cpus, evlist->core.threads);
+	evlist__for_each_entry(evlist, pos) {
+		struct pyrf_evsel *pevsel = (void *)pyrf_evsel__from_evsel(pos);
+
+		evlist__add(&pevlist->evlist, &pevsel->evsel);
+	}
+	return (PyObject *)pevlist;
+}
+
+static PyObject *pyrf__parse_events(PyObject *self, PyObject *args)
+{
+	const char *input;
+	struct evlist evlist = {};
+	struct parse_events_error err;
+	PyObject *result;
+
+	if (!PyArg_ParseTuple(args, "s", &input))
+		return NULL;
+
+	parse_events_error__init(&err);
+	evlist__init(&evlist, NULL, NULL);
+	if (parse_events(&evlist, input, &err)) {
+		parse_events_error__print(&err, input);
+		PyErr_SetFromErrno(PyExc_OSError);
+		return NULL;
+	}
+	result = pyrf_evlist__from_evlist(&evlist);
+	evlist__exit(&evlist);
+	return result;
+}
+
 static PyMethodDef perf__methods[] = {
 	{
 		.ml_name  = "tracepoint",
@@ -1247,21 +1304,20 @@ static PyMethodDef perf__methods[] = {
 		.ml_flags = METH_VARARGS | METH_KEYWORDS,
 		.ml_doc	  = PyDoc_STR("Get tracepoint config.")
 	},
+	{
+		.ml_name  = "parse_events",
+		.ml_meth  = (PyCFunction) pyrf__parse_events,
+		.ml_flags = METH_VARARGS,
+		.ml_doc	  = PyDoc_STR("Parse a string of events and return an evlist.")
+	},
 	{ .ml_name = NULL, }
 };
 
-#if PY_MAJOR_VERSION < 3
-PyMODINIT_FUNC initperf(void)
-#else
 PyMODINIT_FUNC PyInit_perf(void)
-#endif
 {
 	PyObject *obj;
 	int i;
 	PyObject *dict;
-#if PY_MAJOR_VERSION < 3
-	PyObject *module = Py_InitModule("perf", perf__methods);
-#else
 	static struct PyModuleDef moduledef = {
 		PyModuleDef_HEAD_INIT,
 		"perf",			/* m_name */
@@ -1274,7 +1330,6 @@ PyMODINIT_FUNC PyInit_perf(void)
 		NULL,			/* m_free */
 	};
 	PyObject *module = PyModule_Create(&moduledef);
-#endif
 
 	if (module == NULL ||
 	    pyrf_event__setup_types() < 0 ||
@@ -1282,11 +1337,7 @@ PyMODINIT_FUNC PyInit_perf(void)
 	    pyrf_evsel__setup_types() < 0 ||
 	    pyrf_thread_map__setup_types() < 0 ||
 	    pyrf_cpu_map__setup_types() < 0)
-#if PY_MAJOR_VERSION < 3
-		return;
-#else
 		return module;
-#endif
 
 	/* The page_size is placed in util object. */
 	page_size = sysconf(_SC_PAGE_SIZE);
@@ -1335,7 +1386,7 @@ PyMODINIT_FUNC PyInit_perf(void)
 		goto error;
 
 	for (i = 0; perf__constants[i].name != NULL; i++) {
-		obj = _PyLong_FromLong(perf__constants[i].value);
+		obj = PyLong_FromLong(perf__constants[i].value);
 		if (obj == NULL)
 			goto error;
 		PyDict_SetItemString(dict, perf__constants[i].name, obj);
@@ -1345,109 +1396,5 @@ PyMODINIT_FUNC PyInit_perf(void)
 error:
 	if (PyErr_Occurred())
 		PyErr_SetString(PyExc_ImportError, "perf: Init failed!");
-#if PY_MAJOR_VERSION >= 3
 	return module;
-#endif
-}
-
-
-/* The following are stubs to avoid dragging in builtin-* objects. */
-/* TODO: move the code out of the builtin-* file into util. */
-
-unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
-
-#ifdef HAVE_KVM_STAT_SUPPORT
-bool kvm_entry_event(struct evsel *evsel __maybe_unused)
-{
-	return false;
-}
-
-bool kvm_exit_event(struct evsel *evsel __maybe_unused)
-{
-	return false;
-}
-
-bool exit_event_begin(struct evsel *evsel __maybe_unused,
-		      struct perf_sample *sample  __maybe_unused,
-		      struct event_key *key  __maybe_unused)
-{
-	return false;
-}
-
-bool exit_event_end(struct evsel *evsel __maybe_unused,
-		    struct perf_sample *sample __maybe_unused,
-		    struct event_key *key __maybe_unused)
-{
-	return false;
-}
-
-void exit_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
-			   struct event_key *key __maybe_unused,
-			   char *decode __maybe_unused)
-{
-}
-#endif // HAVE_KVM_STAT_SUPPORT
-
-int find_scripts(char **scripts_array  __maybe_unused, char **scripts_path_array  __maybe_unused,
-		int num  __maybe_unused, int pathlen __maybe_unused)
-{
-	return -1;
-}
-
-void perf_stat__set_no_csv_summary(int set __maybe_unused)
-{
-}
-
-void perf_stat__set_big_num(int set __maybe_unused)
-{
-}
-
-int script_spec_register(const char *spec __maybe_unused, struct scripting_ops *ops __maybe_unused)
-{
-	return -1;
-}
-
-arch_syscalls__strerrno_t *arch_syscalls__strerrno_function(const char *arch __maybe_unused)
-{
-	return NULL;
-}
-
-struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork __maybe_unused,
-				       struct kwork_class *class __maybe_unused,
-				       struct kwork_work *key  __maybe_unused)
-{
-	return NULL;
-}
-
-void script_fetch_insn(struct perf_sample *sample __maybe_unused,
-		struct thread *thread __maybe_unused,
-		struct machine *machine __maybe_unused)
-{
-}
-
-int perf_sample__sprintf_flags(u32 flags __maybe_unused, char *str __maybe_unused,
-			size_t sz __maybe_unused)
-{
-	return -1;
-}
-
-bool match_callstack_filter(struct machine *machine __maybe_unused, u64 *callstack __maybe_unused)
-{
-	return false;
-}
-
-struct lock_stat *lock_stat_find(u64 addr __maybe_unused)
-{
-	return NULL;
-}
-
-struct lock_stat *lock_stat_findnew(u64 addr __maybe_unused, const char *name __maybe_unused,
-				int flags __maybe_unused)
-{
-	return NULL;
-}
-
-int cmd_inject(int argc __maybe_unused, const char *argv[] __maybe_unused)
-{
-	return -1;
 }
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 85b7f188f729..e261a57b87d4 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -344,7 +344,7 @@ static void perl_process_tracepoint(struct perf_sample *sample,
 				    struct addr_location *al)
 {
 	struct thread *thread = al->thread;
-	struct tep_event *event = evsel->tp_format;
+	struct tep_event *event;
 	struct tep_format_field *field;
 	static char handler[256];
 	unsigned long long val;
@@ -362,6 +362,7 @@ static void perl_process_tracepoint(struct perf_sample *sample,
 	if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
 		return;
 
+	event = evsel__tp_format(evsel);
 	if (!event) {
 		pr_debug("ug! no event found for type %" PRIu64, (u64)evsel->core.attr.config);
 		return;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 8bdae066e839..b1b5e94537e4 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -58,22 +58,6 @@
 #include "mem-events.h"
 #include "util/perf_regs.h"
 
-#if PY_MAJOR_VERSION < 3
-#define _PyUnicode_FromString(arg) \
-  PyString_FromString(arg)
-#define _PyUnicode_FromStringAndSize(arg1, arg2) \
-  PyString_FromStringAndSize((arg1), (arg2))
-#define _PyBytes_FromStringAndSize(arg1, arg2) \
-  PyString_FromStringAndSize((arg1), (arg2))
-#define _PyLong_FromLong(arg) \
-  PyInt_FromLong(arg)
-#define _PyLong_AsLong(arg) \
-  PyInt_AsLong(arg)
-#define _PyCapsule_New(arg1, arg2, arg3) \
-  PyCObject_FromVoidPtr((arg1), (arg2))
-
-PyMODINIT_FUNC initperf_trace_context(void);
-#else
 #define _PyUnicode_FromString(arg) \
   PyUnicode_FromString(arg)
 #define _PyUnicode_FromStringAndSize(arg1, arg2) \
@@ -88,7 +72,6 @@ PyMODINIT_FUNC initperf_trace_context(void);
   PyCapsule_New((arg1), (arg2), (arg3))
 
 PyMODINIT_FUNC PyInit_perf_trace_context(void);
-#endif
 
 #ifdef HAVE_LIBTRACEEVENT
 #define TRACE_EVENT_TYPE_MAX				\
@@ -181,17 +164,7 @@ static int get_argument_count(PyObject *handler)
 {
 	int arg_count = 0;
 
-	/*
-	 * The attribute for the code object is func_code in Python 2,
-	 * whereas it is __code__ in Python 3.0+.
-	 */
-	PyObject *code_obj = PyObject_GetAttrString(handler,
-		"func_code");
-	if (PyErr_Occurred()) {
-		PyErr_Clear();
-		code_obj = PyObject_GetAttrString(handler,
-			"__code__");
-	}
+	PyObject *code_obj = code_obj = PyObject_GetAttrString(handler, "__code__");
 	PyErr_Clear();
 	if (code_obj) {
 		PyObject *arg_count_obj = PyObject_GetAttrString(code_obj,
@@ -949,7 +922,7 @@ static void python_process_tracepoint(struct perf_sample *sample,
 				      struct addr_location *al,
 				      struct addr_location *addr_al)
 {
-	struct tep_event *event = evsel->tp_format;
+	struct tep_event *event;
 	PyObject *handler, *context, *t, *obj = NULL, *callchain;
 	PyObject *dict = NULL, *all_entries_dict = NULL;
 	static char handler_name[256];
@@ -966,6 +939,7 @@ static void python_process_tracepoint(struct perf_sample *sample,
 
 	bitmap_zero(events_defined, TRACE_EVENT_TYPE_MAX);
 
+	event = evsel__tp_format(evsel);
 	if (!event) {
 		snprintf(handler_name, sizeof(handler_name),
 			 "ug! no event found for type %" PRIu64, (u64)evsel->core.attr.config);
@@ -1902,12 +1876,6 @@ static void set_table_handlers(struct tables *tables)
 	tables->synth_handler = get_handler("synth_data");
 }
 
-#if PY_MAJOR_VERSION < 3
-static void _free_command_line(const char **command_line, int num)
-{
-	free(command_line);
-}
-#else
 static void _free_command_line(wchar_t **command_line, int num)
 {
 	int i;
@@ -1915,7 +1883,6 @@ static void _free_command_line(wchar_t **command_line, int num)
 		PyMem_RawFree(command_line[i]);
 	free(command_line);
 }
-#endif
 
 
 /*
@@ -1925,30 +1892,12 @@ static int python_start_script(const char *script, int argc, const char **argv,
 			       struct perf_session *session)
 {
 	struct tables *tables = &tables_global;
-#if PY_MAJOR_VERSION < 3
-	const char **command_line;
-#else
 	wchar_t **command_line;
-#endif
-	/*
-	 * Use a non-const name variable to cope with python 2.6's
-	 * PyImport_AppendInittab prototype
-	 */
-	char buf[PATH_MAX], name[19] = "perf_trace_context";
+	char buf[PATH_MAX];
 	int i, err = 0;
 	FILE *fp;
 
 	scripting_context->session = session;
-#if PY_MAJOR_VERSION < 3
-	command_line = malloc((argc + 1) * sizeof(const char *));
-	if (!command_line)
-		return -1;
-
-	command_line[0] = script;
-	for (i = 1; i < argc + 1; i++)
-		command_line[i] = argv[i - 1];
-	PyImport_AppendInittab(name, initperf_trace_context);
-#else
 	command_line = malloc((argc + 1) * sizeof(wchar_t *));
 	if (!command_line)
 		return -1;
@@ -1956,15 +1905,10 @@ static int python_start_script(const char *script, int argc, const char **argv,
 	command_line[0] = Py_DecodeLocale(script, NULL);
 	for (i = 1; i < argc + 1; i++)
 		command_line[i] = Py_DecodeLocale(argv[i - 1], NULL);
-	PyImport_AppendInittab(name, PyInit_perf_trace_context);
-#endif
+	PyImport_AppendInittab("perf_trace_context", PyInit_perf_trace_context);
 	Py_Initialize();
 
-#if PY_MAJOR_VERSION < 3
-	PySys_SetArgv(argc + 1, (char **)command_line);
-#else
 	PySys_SetArgv(argc + 1, command_line);
-#endif
 
 	fp = fopen(script, "r");
 	if (!fp) {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 507e6cba9545..c06e3020a976 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -37,6 +37,7 @@
 #include "arch/common.h"
 #include "units.h"
 #include "annotate.h"
+#include "perf.h"
 #include <internal/lib.h>
 
 static int perf_session__deliver_event(struct perf_session *session,
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 9dd60c7869a2..3dd33721823f 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1038,17 +1038,19 @@ static char *get_trace_output(struct hist_entry *he)
 		.data = he->raw_data,
 		.size = he->raw_size,
 	};
+	struct tep_event *tp_format;
 
 	evsel = hists_to_evsel(he->hists);
 
 	trace_seq_init(&seq);
-	if (symbol_conf.raw_trace) {
-		tep_print_fields(&seq, he->raw_data, he->raw_size,
-				 evsel->tp_format);
-	} else {
-		tep_print_event(evsel->tp_format->tep,
-				&seq, &rec, "%s", TEP_PRINT_INFO);
+	tp_format = evsel__tp_format(evsel);
+	if (tp_format) {
+		if (symbol_conf.raw_trace)
+			tep_print_fields(&seq, he->raw_data, he->raw_size, tp_format);
+		else
+			tep_print_event(tp_format->tep, &seq, &rec, "%s", TEP_PRINT_INFO);
 	}
+
 	/*
 	 * Trim the buffer, it starts at 4KB and we're not going to
 	 * add anything more to this buffer.
@@ -3293,9 +3295,8 @@ static int __dynamic_dimension__add(struct evsel *evsel,
 static int add_evsel_fields(struct evsel *evsel, bool raw_trace, int level)
 {
 	int ret;
-	struct tep_format_field *field;
-
-	field = evsel->tp_format->format.fields;
+	struct tep_event *tp_format = evsel__tp_format(evsel);
+	struct tep_format_field *field = tp_format ? tp_format->format.fields : NULL;
 	while (field) {
 		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
 		if (ret < 0)
@@ -3328,13 +3329,19 @@ static int add_all_matching_fields(struct evlist *evlist,
 {
 	int ret = -ESRCH;
 	struct evsel *evsel;
-	struct tep_format_field *field;
 
 	evlist__for_each_entry(evlist, evsel) {
+		struct tep_event *tp_format;
+		struct tep_format_field *field;
+
 		if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
 			continue;
 
-		field = tep_find_any_field(evsel->tp_format, field_name);
+		tp_format = evsel__tp_format(evsel);
+		if (tp_format == NULL)
+			continue;
+
+		field = tep_find_any_field(tp_format, field_name);
 		if (field == NULL)
 			continue;
 
@@ -3416,7 +3423,9 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok,
 	if (!strcmp(field_name, "*")) {
 		ret = add_evsel_fields(evsel, raw_trace, level);
 	} else {
-		struct tep_format_field *field = tep_find_any_field(evsel->tp_format, field_name);
+		struct tep_event *tp_format = evsel__tp_format(evsel);
+		struct tep_format_field *field =
+			tp_format ? tep_find_any_field(tp_format, field_name) : NULL;
 
 		if (field == NULL) {
 			pr_debug("Cannot find event field for %s.%s\n",
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 53dcdf07f5a2..ba79f73e1cf5 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -114,23 +114,59 @@ static void print_running_csv(struct perf_stat_config *config, u64 run, u64 ena)
 	fprintf(config->output, "%s%" PRIu64 "%s%.2f",
 		config->csv_sep, run, config->csv_sep, enabled_percent);
 }
+struct outstate {
+	/* Std mode: insert a newline before the next metric */
+	bool newline;
+	/* JSON mode: track need for comma for a previous field or not */
+	bool first;
+	/* Num CSV separators remaining to pad out when not all fields are printed */
+	int  csv_col_pad;
+
+	/*
+	 * The following don't track state across fields, but are here as a shortcut to
+	 * pass data to the print functions. The alternative would be to update the
+	 * function signatures of the entire print stack to pass them through.
+	 */
+	/* Place to output to */
+	FILE * const fh;
+	/* Lines are timestamped in --interval-print mode */
+	char timestamp[64];
+	/* Num items aggregated in current line. See struct perf_stat_aggr.nr */
+	int aggr_nr;
+	/* Core/socket/die etc ID for the current line */
+	struct aggr_cpu_id id;
+	/* Event for current line */
+	struct evsel *evsel;
+	/* Cgroup for current line */
+	struct cgroup *cgrp;
+};
+
+static const char *json_sep(struct outstate *os)
+{
+	const char *sep = os->first ? "" : ", ";
+
+	os->first = false;
+	return sep;
+}
+
+#define json_out(os, format, ...) fprintf((os)->fh, "%s" format, json_sep(os), ##__VA_ARGS__)
 
-static void print_running_json(struct perf_stat_config *config, u64 run, u64 ena)
+static void print_running_json(struct outstate *os, u64 run, u64 ena)
 {
 	double enabled_percent = 100;
 
 	if (run != ena)
 		enabled_percent = 100 * run / ena;
-	fprintf(config->output, "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ",
-		run, enabled_percent);
+	json_out(os, "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f",
+		 run, enabled_percent);
 }
 
-static void print_running(struct perf_stat_config *config,
+static void print_running(struct perf_stat_config *config, struct outstate *os,
 			  u64 run, u64 ena, bool before_metric)
 {
 	if (config->json_output) {
 		if (before_metric)
-			print_running_json(config, run, ena);
+			print_running_json(os, run, ena);
 	} else if (config->csv_output) {
 		if (before_metric)
 			print_running_csv(config, run, ena);
@@ -153,20 +189,20 @@ static void print_noise_pct_csv(struct perf_stat_config *config,
 	fprintf(config->output, "%s%.2f%%", config->csv_sep, pct);
 }
 
-static void print_noise_pct_json(struct perf_stat_config *config,
+static void print_noise_pct_json(struct outstate *os,
 				 double pct)
 {
-	fprintf(config->output, "\"variance\" : %.2f, ", pct);
+	json_out(os, "\"variance\" : %.2f", pct);
 }
 
-static void print_noise_pct(struct perf_stat_config *config,
+static void print_noise_pct(struct perf_stat_config *config, struct outstate *os,
 			    double total, double avg, bool before_metric)
 {
 	double pct = rel_stddev_stats(total, avg);
 
 	if (config->json_output) {
 		if (before_metric)
-			print_noise_pct_json(config, pct);
+			print_noise_pct_json(os, pct);
 	} else if (config->csv_output) {
 		if (before_metric)
 			print_noise_pct_csv(config, pct);
@@ -176,7 +212,7 @@ static void print_noise_pct(struct perf_stat_config *config,
 	}
 }
 
-static void print_noise(struct perf_stat_config *config,
+static void print_noise(struct perf_stat_config *config, struct outstate *os,
 			struct evsel *evsel, double avg, bool before_metric)
 {
 	struct perf_stat_evsel *ps;
@@ -185,7 +221,7 @@ static void print_noise(struct perf_stat_config *config,
 		return;
 
 	ps = evsel->stats;
-	print_noise_pct(config, stddev_stats(&ps->res_stats), avg, before_metric);
+	print_noise_pct(config, os, stddev_stats(&ps->res_stats), avg, before_metric);
 }
 
 static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name)
@@ -198,18 +234,19 @@ static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_n
 	fprintf(config->output, "%s%s", config->csv_sep, cgrp_name);
 }
 
-static void print_cgroup_json(struct perf_stat_config *config, const char *cgrp_name)
+static void print_cgroup_json(struct outstate *os, const char *cgrp_name)
 {
-	fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name);
+	json_out(os, "\"cgroup\" : \"%s\"", cgrp_name);
 }
 
-static void print_cgroup(struct perf_stat_config *config, struct cgroup *cgrp)
+static void print_cgroup(struct perf_stat_config *config, struct outstate *os,
+			 struct cgroup *cgrp)
 {
 	if (nr_cgroups || config->cgroup_list) {
 		const char *cgrp_name = cgrp ? cgrp->name  : "";
 
 		if (config->json_output)
-			print_cgroup_json(config, cgrp_name);
+			print_cgroup_json(os, cgrp_name);
 		else if (config->csv_output)
 			print_cgroup_csv(config, cgrp_name);
 		else
@@ -324,47 +361,45 @@ static void print_aggr_id_csv(struct perf_stat_config *config,
 	}
 }
 
-static void print_aggr_id_json(struct perf_stat_config *config,
+static void print_aggr_id_json(struct perf_stat_config *config, struct outstate *os,
 			       struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr)
 {
-	FILE *output = config->output;
-
 	switch (config->aggr_mode) {
 	case AGGR_CORE:
-		fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ",
+		json_out(os, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d",
 			id.socket, id.die, id.core, aggr_nr);
 		break;
 	case AGGR_CACHE:
-		fprintf(output, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d, ",
+		json_out(os, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d",
 			id.socket, id.die, id.cache_lvl, id.cache, aggr_nr);
 		break;
 	case AGGR_CLUSTER:
-		fprintf(output, "\"cluster\" : \"S%d-D%d-CLS%d\", \"aggregate-number\" : %d, ",
+		json_out(os, "\"cluster\" : \"S%d-D%d-CLS%d\", \"aggregate-number\" : %d",
 			id.socket, id.die, id.cluster, aggr_nr);
 		break;
 	case AGGR_DIE:
-		fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ",
+		json_out(os, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d",
 			id.socket, id.die, aggr_nr);
 		break;
 	case AGGR_SOCKET:
-		fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ",
+		json_out(os, "\"socket\" : \"S%d\", \"aggregate-number\" : %d",
 			id.socket, aggr_nr);
 		break;
 	case AGGR_NODE:
-		fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ",
+		json_out(os, "\"node\" : \"N%d\", \"aggregate-number\" : %d",
 			id.node, aggr_nr);
 		break;
 	case AGGR_NONE:
 		if (evsel->percore && !config->percore_show_thread) {
-			fprintf(output, "\"core\" : \"S%d-D%d-C%d\"",
+			json_out(os, "\"core\" : \"S%d-D%d-C%d\"",
 				id.socket, id.die, id.core);
 		} else if (id.cpu.cpu > -1) {
-			fprintf(output, "\"cpu\" : \"%d\", ",
+			json_out(os, "\"cpu\" : \"%d\"",
 				id.cpu.cpu);
 		}
 		break;
 	case AGGR_THREAD:
-		fprintf(output, "\"thread\" : \"%s-%d\", ",
+		json_out(os, "\"thread\" : \"%s-%d\"",
 			perf_thread_map__comm(evsel->core.threads, id.thread_idx),
 			perf_thread_map__pid(evsel->core.threads, id.thread_idx));
 		break;
@@ -376,29 +411,17 @@ static void print_aggr_id_json(struct perf_stat_config *config,
 	}
 }
 
-static void aggr_printout(struct perf_stat_config *config,
+static void aggr_printout(struct perf_stat_config *config, struct outstate *os,
 			  struct evsel *evsel, struct aggr_cpu_id id, int aggr_nr)
 {
 	if (config->json_output)
-		print_aggr_id_json(config, evsel, id, aggr_nr);
+		print_aggr_id_json(config, os, evsel, id, aggr_nr);
 	else if (config->csv_output)
 		print_aggr_id_csv(config, evsel, id, aggr_nr);
 	else
 		print_aggr_id_std(config, evsel, id, aggr_nr);
 }
 
-struct outstate {
-	FILE *fh;
-	bool newline;
-	bool first;
-	const char *prefix;
-	int  nfields;
-	int  aggr_nr;
-	struct aggr_cpu_id id;
-	struct evsel *evsel;
-	struct cgroup *cgrp;
-};
-
 static void new_line_std(struct perf_stat_config *config __maybe_unused,
 			 void *ctx)
 {
@@ -411,9 +434,9 @@ static inline void __new_line_std_csv(struct perf_stat_config *config,
 				      struct outstate *os)
 {
 	fputc('\n', os->fh);
-	if (os->prefix)
-		fputs(os->prefix, os->fh);
-	aggr_printout(config, os->evsel, os->id, os->aggr_nr);
+	if (config->interval)
+		fputs(os->timestamp, os->fh);
+	aggr_printout(config, os, os->evsel, os->id, os->aggr_nr);
 }
 
 static inline void __new_line_std(struct outstate *os)
@@ -464,7 +487,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx)
 	int i;
 
 	__new_line_std_csv(config, os);
-	for (i = 0; i < os->nfields; i++)
+	for (i = 0; i < os->csv_col_pad; i++)
 		fputs(config->csv_sep, os->fh);
 }
 
@@ -499,9 +522,9 @@ static void print_metric_json(struct perf_stat_config *config __maybe_unused,
 	FILE *out = os->fh;
 
 	if (unit) {
-		fprintf(out, "\"metric-value\" : \"%f\", \"metric-unit\" : \"%s\"", val, unit);
+		json_out(os, "\"metric-value\" : \"%f\", \"metric-unit\" : \"%s\"", val, unit);
 		if (thresh != METRIC_THRESHOLD_UNKNOWN) {
-			fprintf(out, ", \"metric-threshold\" : \"%s\"",
+			json_out(os, "\"metric-threshold\" : \"%s\"",
 				metric_threshold_classify__str(thresh));
 		}
 	}
@@ -514,9 +537,11 @@ static void new_line_json(struct perf_stat_config *config, void *ctx)
 	struct outstate *os = ctx;
 
 	fputs("\n{", os->fh);
-	if (os->prefix)
-		fprintf(os->fh, "%s", os->prefix);
-	aggr_printout(config, os->evsel, os->id, os->aggr_nr);
+	os->first = true;
+	if (config->interval)
+		json_out(os, "%s", os->timestamp);
+
+	aggr_printout(config, os, os->evsel, os->id, os->aggr_nr);
 }
 
 static void print_metricgroup_header_json(struct perf_stat_config *config,
@@ -526,7 +551,7 @@ static void print_metricgroup_header_json(struct perf_stat_config *config,
 	if (!metricgroup_name)
 		return;
 
-	fprintf(config->output, "\"metricgroup\" : \"%s\"}", metricgroup_name);
+	json_out((struct outstate *) ctx, "\"metricgroup\" : \"%s\"}", metricgroup_name);
 	new_line_json(config, ctx);
 }
 
@@ -539,12 +564,12 @@ static void print_metricgroup_header_csv(struct perf_stat_config *config,
 
 	if (!metricgroup_name) {
 		/* Leave space for running and enabling */
-		for (i = 0; i < os->nfields - 2; i++)
+		for (i = 0; i < os->csv_col_pad - 2; i++)
 			fputs(config->csv_sep, os->fh);
 		return;
 	}
 
-	for (i = 0; i < os->nfields; i++)
+	for (i = 0; i < os->csv_col_pad; i++)
 		fputs(config->csv_sep, os->fh);
 	fprintf(config->output, "%s", metricgroup_name);
 	new_line_csv(config, ctx);
@@ -644,7 +669,6 @@ static void print_metric_only_json(struct perf_stat_config *config __maybe_unuse
 				  const char *unit, double val)
 {
 	struct outstate *os = ctx;
-	FILE *out = os->fh;
 	char buf[64], *ends;
 	char tbuf[1024];
 	const char *vals;
@@ -661,13 +685,7 @@ static void print_metric_only_json(struct perf_stat_config *config __maybe_unuse
 	*ends = 0;
 	if (!vals[0])
 		vals = "none";
-	fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals);
-	os->first = false;
-}
-
-static void new_line_metric(struct perf_stat_config *config __maybe_unused,
-			    void *ctx __maybe_unused)
-{
+	json_out(os, "\"%s\" : \"%s\"", unit, vals);
 }
 
 static void print_metric_header(struct perf_stat_config *config,
@@ -743,28 +761,27 @@ static void print_counter_value_csv(struct perf_stat_config *config,
 	fprintf(output, "%s", evsel__name(evsel));
 }
 
-static void print_counter_value_json(struct perf_stat_config *config,
+static void print_counter_value_json(struct outstate *os,
 				     struct evsel *evsel, double avg, bool ok)
 {
-	FILE *output = config->output;
 	const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
 
 	if (ok)
-		fprintf(output, "\"counter-value\" : \"%f\", ", avg);
+		json_out(os, "\"counter-value\" : \"%f\"", avg);
 	else
-		fprintf(output, "\"counter-value\" : \"%s\", ", bad_count);
+		json_out(os, "\"counter-value\" : \"%s\"", bad_count);
 
 	if (evsel->unit)
-		fprintf(output, "\"unit\" : \"%s\", ", evsel->unit);
+		json_out(os, "\"unit\" : \"%s\"", evsel->unit);
 
-	fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel));
+	json_out(os, "\"event\" : \"%s\"", evsel__name(evsel));
 }
 
-static void print_counter_value(struct perf_stat_config *config,
+static void print_counter_value(struct perf_stat_config *config, struct outstate *os,
 				struct evsel *evsel, double avg, bool ok)
 {
 	if (config->json_output)
-		print_counter_value_json(config, evsel, avg, ok);
+		print_counter_value_json(os, evsel, avg, ok);
 	else if (config->csv_output)
 		print_counter_value_csv(config, evsel, avg, ok);
 	else
@@ -772,12 +789,13 @@ static void print_counter_value(struct perf_stat_config *config,
 }
 
 static void abs_printout(struct perf_stat_config *config,
+			 struct outstate *os,
 			 struct aggr_cpu_id id, int aggr_nr,
 			 struct evsel *evsel, double avg, bool ok)
 {
-	aggr_printout(config, evsel, id, aggr_nr);
-	print_counter_value(config, evsel, avg, ok);
-	print_cgroup(config, evsel->cgrp);
+	aggr_printout(config, os, evsel, id, aggr_nr);
+	print_counter_value(config, os, evsel, avg, ok);
+	print_cgroup(config, os, evsel->cgrp);
 }
 
 static bool is_mixed_hw_group(struct evsel *counter)
@@ -831,22 +849,23 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
 
 	if (config->csv_output) {
 		pm = config->metric_only ? print_metric_only_csv : print_metric_csv;
-		nl = config->metric_only ? new_line_metric : new_line_csv;
+		nl = config->metric_only ? NULL : new_line_csv;
 		pmh = print_metricgroup_header_csv;
-		os->nfields = 4 + (counter->cgrp ? 1 : 0);
+		os->csv_col_pad = 4 + (counter->cgrp ? 1 : 0);
 	} else if (config->json_output) {
 		pm = config->metric_only ? print_metric_only_json : print_metric_json;
-		nl = config->metric_only ? new_line_metric : new_line_json;
+		nl = config->metric_only ? NULL : new_line_json;
 		pmh = print_metricgroup_header_json;
 	} else {
 		pm = config->metric_only ? print_metric_only : print_metric_std;
-		nl = config->metric_only ? new_line_metric : new_line_std;
+		nl = config->metric_only ? NULL : new_line_std;
 		pmh = print_metricgroup_header_std;
 	}
 
 	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
 		if (config->metric_only) {
-			pm(config, os, METRIC_THRESHOLD_UNKNOWN, "", "", 0);
+			pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL,
+			   /*unit=*/NULL, /*val=*/0);
 			return;
 		}
 
@@ -868,17 +887,17 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
 	out.force_header = false;
 
 	if (!config->metric_only && !counter->default_metricgroup) {
-		abs_printout(config, os->id, os->aggr_nr, counter, uval, ok);
+		abs_printout(config, os, os->id, os->aggr_nr, counter, uval, ok);
 
-		print_noise(config, counter, noise, /*before_metric=*/true);
-		print_running(config, run, ena, /*before_metric=*/true);
+		print_noise(config, os, counter, noise, /*before_metric=*/true);
+		print_running(config, os, run, ena, /*before_metric=*/true);
 	}
 
 	if (ok) {
 		if (!config->metric_only && counter->default_metricgroup) {
 			void *from = NULL;
 
-			aggr_printout(config, os->evsel, os->id, os->aggr_nr);
+			aggr_printout(config, os, os->evsel, os->id, os->aggr_nr);
 			/* Print out all the metricgroup with the same metric event. */
 			do {
 				int num = 0;
@@ -891,8 +910,8 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
 						__new_line_std_csv(config, os);
 				}
 
-				print_noise(config, counter, noise, /*before_metric=*/true);
-				print_running(config, run, ena, /*before_metric=*/true);
+				print_noise(config, os, counter, noise, /*before_metric=*/true);
+				print_running(config, os, run, ena, /*before_metric=*/true);
 				from = perf_stat__print_shadow_stats_metricgroup(config, counter, aggr_idx,
 										 &num, from, &out,
 										 &config->metric_events);
@@ -901,12 +920,12 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
 			perf_stat__print_shadow_stats(config, counter, uval, aggr_idx,
 						      &out, &config->metric_events);
 	} else {
-		pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/"", /*val=*/0);
+		pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/NULL, /*val=*/0);
 	}
 
 	if (!config->metric_only) {
-		print_noise(config, counter, noise, /*before_metric=*/false);
-		print_running(config, run, ena, /*before_metric=*/false);
+		print_noise(config, os, counter, noise, /*before_metric=*/false);
+		print_running(config, os, run, ena, /*before_metric=*/false);
 	}
 }
 
@@ -1083,12 +1102,17 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 		return;
 
 	if (!metric_only) {
-		if (config->json_output)
+		if (config->json_output) {
+			os->first = true;
 			fputc('{', output);
-		if (os->prefix)
-			fprintf(output, "%s", os->prefix);
-		else if (config->summary && config->csv_output &&
-			 !config->no_csv_summary && !config->interval)
+		}
+		if (config->interval) {
+			if (config->json_output)
+				json_out(os, "%s", os->timestamp);
+			else
+				fprintf(output, "%s", os->timestamp);
+		} else if (config->summary && config->csv_output &&
+			   !config->no_csv_summary)
 			fprintf(output, "%s%s", "summary", config->csv_sep);
 	}
 
@@ -1114,15 +1138,19 @@ static void print_metric_begin(struct perf_stat_config *config,
 
 	if (config->json_output)
 		fputc('{', config->output);
-	if (os->prefix)
-		fprintf(config->output, "%s", os->prefix);
 
+	if (config->interval) {
+		if (config->json_output)
+			json_out(os, "%s", os->timestamp);
+		else
+			fprintf(config->output, "%s", os->timestamp);
+	}
 	evsel = evlist__first(evlist);
 	id = config->aggr_map->map[aggr_idx];
 	aggr = &evsel->stats->aggr[aggr_idx];
-	aggr_printout(config, evsel, id, aggr->nr);
+	aggr_printout(config, os, evsel, id, aggr->nr);
 
-	print_cgroup(config, os->cgrp ? : evsel->cgrp);
+	print_cgroup(config, os, os->cgrp ? : evsel->cgrp);
 }
 
 static void print_metric_end(struct perf_stat_config *config, struct outstate *os)
@@ -1301,7 +1329,7 @@ static void print_metric_headers(struct perf_stat_config *config,
 	struct perf_stat_output_ctx out = {
 		.ctx = &os,
 		.print_metric = print_metric_header,
-		.new_line = new_line_metric,
+		.new_line = NULL,
 		.force_header = true,
 	};
 
@@ -1336,20 +1364,20 @@ static void print_metric_headers(struct perf_stat_config *config,
 		fputc('\n', config->output);
 }
 
-static void prepare_interval(struct perf_stat_config *config,
-			     char *prefix, size_t len, struct timespec *ts)
+static void prepare_timestamp(struct perf_stat_config *config,
+			      struct outstate *os, struct timespec *ts)
 {
 	if (config->iostat_run)
 		return;
 
 	if (config->json_output)
-		scnprintf(prefix, len, "\"interval\" : %lu.%09lu, ",
+		scnprintf(os->timestamp, sizeof(os->timestamp), "\"interval\" : %lu.%09lu",
 			  (unsigned long) ts->tv_sec, ts->tv_nsec);
 	else if (config->csv_output)
-		scnprintf(prefix, len, "%lu.%09lu%s",
+		scnprintf(os->timestamp, sizeof(os->timestamp), "%lu.%09lu%s",
 			  (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep);
 	else
-		scnprintf(prefix, len, "%6lu.%09lu ",
+		scnprintf(os->timestamp, sizeof(os->timestamp), "%6lu.%09lu ",
 			  (unsigned long) ts->tv_sec, ts->tv_nsec);
 }
 
@@ -1557,7 +1585,7 @@ static void print_footer(struct perf_stat_config *config)
 		fprintf(output, " %17.*f +- %.*f seconds time elapsed",
 			precision, avg, precision, sd);
 
-		print_noise_pct(config, sd, avg, /*before_metric=*/false);
+		print_noise_pct(config, NULL, sd, avg, /*before_metric=*/false);
 	}
 	fprintf(output, "\n\n");
 
@@ -1672,9 +1700,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 			    int argc, const char **argv)
 {
 	bool metric_only = config->metric_only;
-	int interval = config->interval;
 	struct evsel *counter;
-	char buf[64];
 	struct outstate os = {
 		.fh = config->output,
 		.first = true,
@@ -1685,10 +1711,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 	if (config->iostat_run)
 		evlist->selected = evlist__first(evlist);
 
-	if (interval) {
-		os.prefix = buf;
-		prepare_interval(config, buf, sizeof(buf), ts);
-	}
+	if (config->interval)
+		prepare_timestamp(config, &os, ts);
 
 	print_header(config, _target, evlist, argc, argv);
 
@@ -1707,7 +1731,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 	case AGGR_THREAD:
 	case AGGR_GLOBAL:
 		if (config->iostat_run) {
-			iostat_print_counters(evlist, config, ts, buf,
+			iostat_print_counters(evlist, config, ts, os.timestamp,
 					      (iostat_print_counter_t)print_counter, &os);
 		} else if (config->cgroup_list) {
 			print_cgroup_counter(config, evlist, &os);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 47718610d5d8..fa8b2a1048ff 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -327,7 +327,8 @@ static void print_instructions(struct perf_stat_config *config,
 			     "insn per cycle", 0);
 	}
 	if (max_stalled && instructions) {
-		out->new_line(config, ctxp);
+		if (out->new_line)
+			out->new_line(config, ctxp);
 		print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ",
 			     "stalled cycles per insn", max_stalled / instructions);
 	}
@@ -670,7 +671,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config,
 			}
 		}
 
-		if ((*num)++ > 0)
+		if ((*num)++ > 0 && out->new_line)
 			out->new_line(config, ctxp);
 		generic_metric(config, mexp, evsel, aggr_idx, out);
 	}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 6f8cff3cd39a..2fda9acd7374 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -117,8 +117,9 @@ struct perf_stat_config {
 	unsigned int		topdown_level;
 };
 
+extern struct perf_stat_config stat_config;
+
 void perf_stat__set_big_num(int set);
-void perf_stat__set_no_csv_summary(int set);
 
 void update_stats(struct stats *stats, u64 val);
 double avg_stats(struct stats *stats);
diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c
index 545e44981a27..3de4a6130853 100644
--- a/tools/perf/util/stream.c
+++ b/tools/perf/util/stream.c
@@ -52,7 +52,6 @@ static struct evlist_streams *evlist_streams__new(int nr_evsel,
 			goto err;
 
 		s->nr_streams_max = nr_streams_max;
-		s->evsel_idx = -1;
 	}
 
 	els->ev_streams = es;
@@ -139,7 +138,7 @@ static int evlist__init_callchain_streams(struct evlist *evlist,
 
 		hists__output_resort(hists, NULL);
 		init_hot_callchain(hists, &es[i]);
-		es[i].evsel_idx = pos->core.idx;
+		es[i].evsel = pos;
 		i++;
 	}
 
@@ -166,12 +165,12 @@ struct evlist_streams *evlist__create_streams(struct evlist *evlist,
 }
 
 struct evsel_streams *evsel_streams__entry(struct evlist_streams *els,
-					   int evsel_idx)
+					   const struct evsel *evsel)
 {
 	struct evsel_streams *es = els->ev_streams;
 
 	for (int i = 0; i < els->nr_evsel; i++) {
-		if (es[i].evsel_idx == evsel_idx)
+		if (es[i].evsel == evsel)
 			return &es[i];
 	}
 
diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h
index bee768874fea..50f7e6e04982 100644
--- a/tools/perf/util/stream.h
+++ b/tools/perf/util/stream.h
@@ -2,7 +2,9 @@
 #ifndef __PERF_STREAM_H
 #define __PERF_STREAM_H
 
-#include "callchain.h"
+struct callchain_node;
+struct evlist;
+struct evsel;
 
 struct stream {
 	struct callchain_node	*cnode;
@@ -11,9 +13,9 @@ struct stream {
 
 struct evsel_streams {
 	struct stream		*streams;
+	const struct evsel	*evsel;
 	int			nr_streams_max;
 	int			nr_streams;
-	int			evsel_idx;
 	u64			streams_hits;
 };
 
@@ -22,15 +24,13 @@ struct evlist_streams {
 	int			nr_evsel;
 };
 
-struct evlist;
-
 void evlist_streams__delete(struct evlist_streams *els);
 
 struct evlist_streams *evlist__create_streams(struct evlist *evlist,
 					      int nr_streams_max);
 
 struct evsel_streams *evsel_streams__entry(struct evlist_streams *els,
-					   int evsel_idx);
+					   const struct evsel *evsel);
 
 void evsel_streams__match(struct evsel_streams *es_base,
 			  struct evsel_streams *es_pair);
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index 308fc7ec88cc..c0e927bbadf6 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -254,11 +254,20 @@ char *strpbrk_esc(char *str, const char *stopset)
 
 	do {
 		ptr = strpbrk(str, stopset);
-		if (ptr == str ||
-		    (ptr == str + 1 && *(ptr - 1) != '\\'))
+		if (!ptr) {
+			/* stopset not in str. */
 			break;
+		}
+		if (ptr == str) {
+			/* stopset character is first in str. */
+			break;
+		}
+		if (ptr == str + 1 && str[0] != '\\') {
+			/* stopset chacter is second and wasn't preceded by a '\'. */
+			break;
+		}
 		str = ptr + 1;
-	} while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\');
+	} while (ptr[-1] == '\\' && ptr[-2] != '\\');
 
 	return ptr;
 }
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 2b04f47f4db0..b1d259f590e9 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -21,6 +21,7 @@
 #include <perf/cpumap.h>
 
 #include "env.h"
+#include "perf.h"
 #include "svghelper.h"
 
 static u64 first_time, last_time;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index e398abfd13a0..66fd1249660a 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -287,8 +287,9 @@ static bool want_demangle(bool is_kernel_sym)
  * Demangle C++ function signature, typically replaced by demangle-cxx.cpp
  * version.
  */
-__weak char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused,
-			      bool modifiers __maybe_unused)
+#ifndef HAVE_CXA_DEMANGLE_SUPPORT
+char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused,
+		       bool modifiers __maybe_unused)
 {
 #ifdef HAVE_LIBBFD_SUPPORT
 	int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0);
@@ -302,6 +303,7 @@ __weak char *cxx_demangle_sym(const char *str __maybe_unused, bool params __mayb
 	return NULL;
 #endif
 }
+#endif /* !HAVE_CXA_DEMANGLE_SUPPORT */
 
 static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
 {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 0037f1163919..49b08adc6ee3 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -154,6 +154,13 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
 	else if ((a == 0) && (b > 0))
 		return SYMBOL_B;
 
+	if (syma->type != symb->type) {
+		if (syma->type == STT_NOTYPE)
+			return SYMBOL_B;
+		if (symb->type == STT_NOTYPE)
+			return SYMBOL_A;
+	}
+
 	/* Prefer a non weak symbol over a weak one */
 	a = syma->binding == STB_WEAK;
 	b = symb->binding == STB_WEAK;
@@ -257,7 +264,7 @@ void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
 		 * like in:
 		 *   ffffffffc1937000 T hdmi_driver_init  [snd_hda_codec_hdmi]
 		 */
-		if (prev->end == prev->start && prev->type != STT_NOTYPE) {
+		if (prev->end == prev->start) {
 			const char *prev_mod;
 			const char *curr_mod;
 
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index a58444c4aed1..6923b0d5efed 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -1686,12 +1686,16 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
 	}
 
 	if (type & PERF_SAMPLE_RAW) {
-		u.val32[0] = sample->raw_size;
-		*array = u.val64;
-		array = (void *)array + sizeof(u32);
+		u32 *array32 = (void *)array;
+
+		*array32 = sample->raw_size;
+		array32++;
+
+		memcpy(array32, sample->raw_data, sample->raw_size);
+		array = (void *)(array32 + (sample->raw_size / sizeof(u32)));
 
-		memcpy(array, sample->raw_data, sample->raw_size);
-		array = (void *)array + sample->raw_size;
+		/* make sure the array is 64-bit aligned */
+		BUG_ON(((long)array) % sizeof(u64));
 	}
 
 	if (type & PERF_SAMPLE_BRANCH_STACK) {
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 69d8dcf5cf28..928aca4cd6e9 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -10,52 +10,12 @@
 #include <linux/compiler.h>
 #include <linux/zalloc.h>
 
-#ifdef HAVE_SYSCALL_TABLE_SUPPORT
 #include <string.h>
 #include "string2.h"
 
-#if defined(__x86_64__)
-#include <asm/syscalls_64.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_x86_64;
-#elif defined(__i386__)
-#include <asm/syscalls_32.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_x86_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_x86;
-#elif defined(__s390x__)
-#include <asm/syscalls_64.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_s390_64;
-#elif defined(__powerpc64__)
-#include <asm/syscalls_64.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_powerpc_64;
-#elif defined(__powerpc__)
-#include <asm/syscalls_32.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_powerpc_32;
-#elif defined(__aarch64__)
-#include <asm/syscalls.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_arm64;
-#elif defined(__mips__)
-#include <asm/syscalls_n64.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_mips_n64;
-#elif defined(__loongarch__)
-#include <asm/syscalls.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_loongarch;
-#elif defined(__riscv)
-#include <asm/syscalls.c>
-const int syscalltbl_native_max_id = SYSCALLTBL_RISCV_MAX_ID;
-static const char *const *syscalltbl_native = syscalltbl_riscv;
-#else
-const int syscalltbl_native_max_id = 0;
-static const char *const syscalltbl_native[] = {
-	[0] = "unknown",
-};
-#endif
+#include <syscall_table.h>
+const int syscalltbl_native_max_id = SYSCALLTBL_MAX_ID;
+static const char *const *syscalltbl_native = syscalltbl;
 
 struct syscall {
 	int id;
@@ -163,47 +123,3 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g
 	*idx = -1;
 	return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
 }
-
-#else /* HAVE_SYSCALL_TABLE_SUPPORT */
-
-#include <libaudit.h>
-
-struct syscalltbl *syscalltbl__new(void)
-{
-	struct syscalltbl *tbl = zalloc(sizeof(*tbl));
-	if (tbl)
-		tbl->audit_machine = audit_detect_machine();
-	return tbl;
-}
-
-void syscalltbl__delete(struct syscalltbl *tbl)
-{
-	free(tbl);
-}
-
-const char *syscalltbl__name(const struct syscalltbl *tbl, int id)
-{
-	return audit_syscall_to_name(id, tbl->audit_machine);
-}
-
-int syscalltbl__id(struct syscalltbl *tbl, const char *name)
-{
-	return audit_name_to_syscall(name, tbl->audit_machine);
-}
-
-int syscalltbl__id_at_idx(struct syscalltbl *tbl __maybe_unused, int idx)
-{
-	return idx;
-}
-
-int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
-				  const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
-{
-	return -1;
-}
-
-int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
-{
-	return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
-}
-#endif /* HAVE_SYSCALL_TABLE_SUPPORT */
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
index 2b53b7ed25a6..362411a6d849 100644
--- a/tools/perf/util/syscalltbl.h
+++ b/tools/perf/util/syscalltbl.h
@@ -3,7 +3,6 @@
 #define __PERF_SYSCALLTBL_H
 
 struct syscalltbl {
-	int audit_machine;
 	struct {
 		int max_id;
 		int nr_entries;
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 41d53e1b43e7..9c015fc2bcfb 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -99,7 +99,7 @@ unsigned long long read_size(struct tep_event *event, void *ptr, int size)
 	return tep_read_number(event->tep, ptr, size);
 }
 
-void event_format__fprintf(struct tep_event *event,
+void event_format__fprintf(const struct tep_event *event,
 			   int cpu, void *data, int size, FILE *fp)
 {
 	struct tep_record record;
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 5596fcda2c10..4e81e02a4f18 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -13,14 +13,94 @@
 #include <event-parse.h>
 #endif
 
+#include "archinsn.h"
 #include "debug.h"
+#include "event.h"
 #include "trace-event.h"
 #include "evsel.h"
+#include <linux/perf_event.h>
 #include <linux/zalloc.h>
 #include "util/sample.h"
 
+unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
+
 struct scripting_context *scripting_context;
 
+struct script_spec {
+	struct list_head	node;
+	struct scripting_ops	*ops;
+	char			spec[];
+};
+
+static LIST_HEAD(script_specs);
+
+static struct script_spec *script_spec__new(const char *spec,
+					    struct scripting_ops *ops)
+{
+	struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1);
+
+	if (s != NULL) {
+		strcpy(s->spec, spec);
+		s->ops = ops;
+	}
+
+	return s;
+}
+
+static void script_spec__add(struct script_spec *s)
+{
+	list_add_tail(&s->node, &script_specs);
+}
+
+static struct script_spec *script_spec__find(const char *spec)
+{
+	struct script_spec *s;
+
+	list_for_each_entry(s, &script_specs, node)
+		if (strcasecmp(s->spec, spec) == 0)
+			return s;
+	return NULL;
+}
+
+static int script_spec_register(const char *spec, struct scripting_ops *ops)
+{
+	struct script_spec *s;
+
+	s = script_spec__find(spec);
+	if (s)
+		return -1;
+
+	s = script_spec__new(spec, ops);
+	if (!s)
+		return -1;
+
+	script_spec__add(s);
+	return 0;
+}
+
+struct scripting_ops *script_spec__lookup(const char *spec)
+{
+	struct script_spec *s = script_spec__find(spec);
+
+	if (!s)
+		return NULL;
+
+	return s->ops;
+}
+
+int script_spec__for_each(int (*cb)(struct scripting_ops *ops, const char *spec))
+{
+	struct script_spec *s;
+	int ret = 0;
+
+	list_for_each_entry(s, &script_specs, node) {
+		ret = cb(s->ops, s->spec);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
 void scripting_context__update(struct scripting_context *c,
 			       union perf_event *event,
 			       struct perf_sample *sample,
@@ -28,12 +108,14 @@ void scripting_context__update(struct scripting_context *c,
 			       struct addr_location *al,
 			       struct addr_location *addr_al)
 {
-	c->event_data = sample->raw_data;
-	c->pevent = NULL;
 #ifdef HAVE_LIBTRACEEVENT
-	if (evsel->tp_format)
-		c->pevent = evsel->tp_format->tep;
+	const struct tep_event *tp_format = evsel__tp_format(evsel);
+
+	c->pevent = tp_format ? tp_format->tep : NULL;
+#else
+	c->pevent = NULL;
 #endif
+	c->event_data = sample->raw_data;
 	c->event = event;
 	c->sample = sample;
 	c->evsel = evsel;
@@ -191,3 +273,100 @@ void setup_perl_scripting(void)
 }
 #endif
 #endif
+
+#if !defined(__i386__) && !defined(__x86_64__)
+void arch_fetch_insn(struct perf_sample *sample __maybe_unused,
+		     struct thread *thread __maybe_unused,
+		     struct machine *machine __maybe_unused)
+{
+}
+#endif
+
+void script_fetch_insn(struct perf_sample *sample, struct thread *thread,
+		       struct machine *machine, bool native_arch)
+{
+	if (sample->insn_len == 0 && native_arch)
+		arch_fetch_insn(sample, thread, machine);
+}
+
+static const struct {
+	u32 flags;
+	const char *name;
+} sample_flags[] = {
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "jcc"},
+	{PERF_IP_FLAG_BRANCH, "jmp"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT, "int"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT, "iret"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET, "syscall"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET, "sysret"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "async"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |	PERF_IP_FLAG_INTERRUPT,
+	 "hw int"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"},
+	{PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_BRANCH_MISS, "br miss"},
+	{0, NULL}
+};
+
+static const char *sample_flags_to_name(u32 flags)
+{
+	int i;
+
+	for (i = 0; sample_flags[i].name ; i++) {
+		if (sample_flags[i].flags == flags)
+			return sample_flags[i].name;
+	}
+
+	return NULL;
+}
+
+int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz)
+{
+	u32 xf = PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_INTR_DISABLE |
+		 PERF_IP_FLAG_INTR_TOGGLE;
+	const char *chars = PERF_IP_FLAG_CHARS;
+	const size_t n = strlen(PERF_IP_FLAG_CHARS);
+	const char *name = NULL;
+	size_t i, pos = 0;
+	char xs[16] = {0};
+
+	if (flags & xf)
+		snprintf(xs, sizeof(xs), "(%s%s%s)",
+			 flags & PERF_IP_FLAG_IN_TX ? "x" : "",
+			 flags & PERF_IP_FLAG_INTR_DISABLE ? "D" : "",
+			 flags & PERF_IP_FLAG_INTR_TOGGLE ? "t" : "");
+
+	name = sample_flags_to_name(flags & ~xf);
+	if (name)
+		return snprintf(str, sz, "%-15s%6s", name, xs);
+
+	if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
+		name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_BEGIN));
+		if (name)
+			return snprintf(str, sz, "tr strt %-7s%6s", name, xs);
+	}
+
+	if (flags & PERF_IP_FLAG_TRACE_END) {
+		name = sample_flags_to_name(flags & ~(xf | PERF_IP_FLAG_TRACE_END));
+		if (name)
+			return snprintf(str, sz, "tr end  %-7s%6s", name, xs);
+	}
+
+	for (i = 0; i < n; i++, flags >>= 1) {
+		if ((flags & 1) && pos < sz)
+			str[pos++] = chars[i];
+	}
+	for (; i < 32; i++, flags >>= 1) {
+		if ((flags & 1) && pos < sz)
+			str[pos++] = '?';
+	}
+	if (pos < sz)
+		str[pos] = 0;
+
+	return pos;
+}
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 79b939f947dd..ac9fde2f980c 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -39,7 +39,7 @@ trace_event__tp_format(const char *sys, const char *name);
 
 struct tep_event *trace_event__tp_format_id(int id);
 
-void event_format__fprintf(struct tep_event *event,
+void event_format__fprintf(const struct tep_event *event,
 			   int cpu, void *data, int size, FILE *fp);
 
 int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size);
@@ -113,10 +113,11 @@ struct scripting_ops {
 
 extern unsigned int scripting_max_stack;
 
-int script_spec_register(const char *spec, struct scripting_ops *ops);
+struct scripting_ops *script_spec__lookup(const char *spec);
+int script_spec__for_each(int (*cb)(struct scripting_ops *ops, const char *spec));
 
 void script_fetch_insn(struct perf_sample *sample, struct thread *thread,
-		       struct machine *machine);
+		       struct machine *machine, bool native_arch);
 
 void setup_perl_scripting(void);
 void setup_python_scripting(void);
diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c
index b9823f414f10..ec72d29f3d58 100644
--- a/tools/perf/util/values.c
+++ b/tools/perf/util/values.c
@@ -8,6 +8,7 @@
 
 #include "values.h"
 #include "debug.h"
+#include "evsel.h"
 
 int perf_read_values_init(struct perf_read_values *values)
 {
@@ -22,21 +23,17 @@ int perf_read_values_init(struct perf_read_values *values)
 	values->threads = 0;
 
 	values->counters_max = 16;
-	values->counterrawid = malloc(values->counters_max
-				      * sizeof(*values->counterrawid));
-	values->countername = malloc(values->counters_max
-				     * sizeof(*values->countername));
-	if (!values->counterrawid || !values->countername) {
-		pr_debug("failed to allocate read_values counters arrays");
+	values->counters = malloc(values->counters_max * sizeof(*values->counters));
+	if (!values->counters) {
+		pr_debug("failed to allocate read_values counters array");
 		goto out_free_counter;
 	}
-	values->counters = 0;
+	values->num_counters = 0;
 
 	return 0;
 
 out_free_counter:
-	zfree(&values->counterrawid);
-	zfree(&values->countername);
+	zfree(&values->counters);
 out_free_pid:
 	zfree(&values->pid);
 	zfree(&values->tid);
@@ -56,10 +53,7 @@ void perf_read_values_destroy(struct perf_read_values *values)
 	zfree(&values->value);
 	zfree(&values->pid);
 	zfree(&values->tid);
-	zfree(&values->counterrawid);
-	for (i = 0; i < values->counters; i++)
-		zfree(&values->countername[i]);
-	zfree(&values->countername);
+	zfree(&values->counters);
 }
 
 static int perf_read_values__enlarge_threads(struct perf_read_values *values)
@@ -116,81 +110,71 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values,
 
 static int perf_read_values__enlarge_counters(struct perf_read_values *values)
 {
-	char **countername;
-	int i, counters_max = values->counters_max * 2;
-	u64 *counterrawid = realloc(values->counterrawid, counters_max * sizeof(*values->counterrawid));
+	int counters_max = values->counters_max * 2;
+	struct evsel **new_counters = realloc(values->counters,
+					     counters_max * sizeof(*values->counters));
 
-	if (!counterrawid) {
-		pr_debug("failed to enlarge read_values rawid array");
+	if (!new_counters) {
+		pr_debug("failed to enlarge read_values counters array");
 		goto out_enomem;
 	}
 
-	countername = realloc(values->countername, counters_max * sizeof(*values->countername));
-	if (!countername) {
-		pr_debug("failed to enlarge read_values rawid array");
-		goto out_free_rawid;
-	}
-
-	for (i = 0; i < values->threads; i++) {
+	for (int i = 0; i < values->threads; i++) {
 		u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value));
-		int j;
 
 		if (!value) {
 			pr_debug("failed to enlarge read_values ->values array");
-			goto out_free_name;
+			goto out_free_counters;
 		}
 
-		for (j = values->counters_max; j < counters_max; j++)
+		for (int j = values->counters_max; j < counters_max; j++)
 			value[j] = 0;
 
 		values->value[i] = value;
 	}
 
 	values->counters_max = counters_max;
-	values->counterrawid = counterrawid;
-	values->countername  = countername;
+	values->counters = new_counters;
 
 	return 0;
-out_free_name:
-	free(countername);
-out_free_rawid:
-	free(counterrawid);
+out_free_counters:
+	free(new_counters);
 out_enomem:
 	return -ENOMEM;
 }
 
 static int perf_read_values__findnew_counter(struct perf_read_values *values,
-					     u64 rawid, const char *name)
+					     struct evsel *evsel)
 {
 	int i;
 
-	for (i = 0; i < values->counters; i++)
-		if (values->counterrawid[i] == rawid)
+	for (i = 0; i < values->num_counters; i++)
+		if (values->counters[i] == evsel)
 			return i;
 
-	if (values->counters == values->counters_max) {
-		i = perf_read_values__enlarge_counters(values);
-		if (i)
-			return i;
+	if (values->num_counters == values->counters_max) {
+		int err = perf_read_values__enlarge_counters(values);
+
+		if (err)
+			return err;
 	}
 
-	i = values->counters++;
-	values->counterrawid[i] = rawid;
-	values->countername[i] = strdup(name);
+	i = values->num_counters++;
+	values->counters[i] = evsel;
 
 	return i;
 }
 
 int perf_read_values_add_value(struct perf_read_values *values,
 				u32 pid, u32 tid,
-				u64 rawid, const char *name, u64 value)
+				struct evsel *evsel, u64 value)
 {
 	int tindex, cindex;
 
 	tindex = perf_read_values__findnew_thread(values, pid, tid);
 	if (tindex < 0)
 		return tindex;
-	cindex = perf_read_values__findnew_counter(values, rawid, name);
+	cindex = perf_read_values__findnew_counter(values, evsel);
 	if (cindex < 0)
 		return cindex;
 
@@ -205,15 +189,15 @@ static void perf_read_values__display_pretty(FILE *fp,
 	int pidwidth, tidwidth;
 	int *counterwidth;
 
-	counterwidth = malloc(values->counters * sizeof(*counterwidth));
+	counterwidth = malloc(values->num_counters * sizeof(*counterwidth));
 	if (!counterwidth) {
 		fprintf(fp, "INTERNAL ERROR: Failed to allocate counterwidth array\n");
 		return;
 	}
 	tidwidth = 3;
 	pidwidth = 3;
-	for (j = 0; j < values->counters; j++)
-		counterwidth[j] = strlen(values->countername[j]);
+	for (j = 0; j < values->num_counters; j++)
+		counterwidth[j] = strlen(evsel__name(values->counters[j]));
 	for (i = 0; i < values->threads; i++) {
 		int width;
 
@@ -223,7 +207,7 @@ static void perf_read_values__display_pretty(FILE *fp,
 		width = snprintf(NULL, 0, "%d", values->tid[i]);
 		if (width > tidwidth)
 			tidwidth = width;
-		for (j = 0; j < values->counters; j++) {
+		for (j = 0; j < values->num_counters; j++) {
 			width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
 			if (width > counterwidth[j])
 				counterwidth[j] = width;
@@ -231,14 +215,14 @@ static void perf_read_values__display_pretty(FILE *fp,
 	}
 
 	fprintf(fp, "# %*s  %*s", pidwidth, "PID", tidwidth, "TID");
-	for (j = 0; j < values->counters; j++)
-		fprintf(fp, "  %*s", counterwidth[j], values->countername[j]);
+	for (j = 0; j < values->num_counters; j++)
+		fprintf(fp, "  %*s", counterwidth[j], evsel__name(values->counters[j]));
 	fprintf(fp, "\n");
 
 	for (i = 0; i < values->threads; i++) {
 		fprintf(fp, "  %*d  %*d", pidwidth, values->pid[i],
 			tidwidth, values->tid[i]);
-		for (j = 0; j < values->counters; j++)
+		for (j = 0; j < values->num_counters; j++)
 			fprintf(fp, "  %*" PRIu64,
 				counterwidth[j], values->value[i][j]);
 		fprintf(fp, "\n");
@@ -266,16 +250,16 @@ static void perf_read_values__display_raw(FILE *fp,
 		if (width > tidwidth)
 			tidwidth = width;
 	}
-	for (j = 0; j < values->counters; j++) {
-		width = strlen(values->countername[j]);
+	for (j = 0; j < values->num_counters; j++) {
+		width = strlen(evsel__name(values->counters[j]));
 		if (width > namewidth)
 			namewidth = width;
-		width = snprintf(NULL, 0, "%" PRIx64, values->counterrawid[j]);
+		width = snprintf(NULL, 0, "%x", values->counters[j]->core.idx);
 		if (width > rawwidth)
 			rawwidth = width;
 	}
 	for (i = 0; i < values->threads; i++) {
-		for (j = 0; j < values->counters; j++) {
+		for (j = 0; j < values->num_counters; j++) {
 			width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
 			if (width > countwidth)
 				countwidth = width;
@@ -287,12 +271,12 @@ static void perf_read_values__display_raw(FILE *fp,
 		namewidth, "Name", rawwidth, "Raw",
 		countwidth, "Count");
 	for (i = 0; i < values->threads; i++)
-		for (j = 0; j < values->counters; j++)
-			fprintf(fp, "  %*d  %*d  %*s  %*" PRIx64 "  %*" PRIu64,
+		for (j = 0; j < values->num_counters; j++)
+			fprintf(fp, "  %*d  %*d  %*s  %*x  %*" PRIu64,
 				pidwidth, values->pid[i],
 				tidwidth, values->tid[i],
-				namewidth, values->countername[j],
-				rawwidth, values->counterrawid[j],
+				namewidth, evsel__name(values->counters[j]),
+				rawwidth, values->counters[j]->core.idx,
 				countwidth, values->value[i][j]);
 }
 
diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h
index 791c1ad606c2..bbca33daca19 100644
--- a/tools/perf/util/values.h
+++ b/tools/perf/util/values.h
@@ -5,14 +5,15 @@
 #include <stdio.h>
 #include <linux/types.h>
 
+struct evsel;
+
 struct perf_read_values {
 	int threads;
 	int threads_max;
 	u32 *pid, *tid;
-	int counters;
+	int num_counters;
 	int counters_max;
-	u64 *counterrawid;
-	char **countername;
+	struct evsel **counters;
 	u64 **value;
 };
 
@@ -21,7 +22,7 @@ void perf_read_values_destroy(struct perf_read_values *values);
 
 int perf_read_values_add_value(struct perf_read_values *values,
 				u32 pid, u32 tid,
-				u64 rawid, const char *name, u64 value);
+				struct evsel *evsel, u64 value);
 
 void perf_read_values_display(FILE *fp, struct perf_read_values *values,
 			      int raw);
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 175004ce44b2..835123add0ed 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -52,8 +52,11 @@ DESTDIR ?=
 # and _should_ modify the PACKAGE_BUGREPORT definition
 
 VERSION:=			$(shell ./utils/version-gen.sh)
-LIB_MAJ=			0.0.1
-LIB_MIN=			1
+LIB_FIX=			1
+LIB_MIN=			0
+LIB_MAJ=			1
+LIB_VER=			$(LIB_MAJ).$(LIB_MIN).$(LIB_FIX)
+
 
 PACKAGE =			cpupower
 PACKAGE_BUGREPORT =		linux-pm@vger.kernel.org
@@ -87,11 +90,19 @@ INSTALL_SCRIPT = ${INSTALL} -m 644
 # to something more interesting, like "arm-linux-".  If you want
 # to compile vs uClibc, that can be done here as well.
 CROSS ?= #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
+ifneq ($(CROSS), )
+CC = $(CROSS)gcc
+LD = $(CROSS)gcc
+AR = $(CROSS)ar
+STRIP = $(CROSS)strip
+RANLIB = $(CROSS)ranlib
+else
 CC ?= $(CROSS)gcc
 LD ?= $(CROSS)gcc
 AR ?= $(CROSS)ar
 STRIP ?= $(CROSS)strip
 RANLIB ?= $(CROSS)ranlib
+endif
 HOSTCC = gcc
 MKDIR = mkdir
 
@@ -192,14 +203,14 @@ $(OUTPUT)lib/%.o: $(LIB_SRC) $(LIB_HEADERS)
 	$(ECHO) "  CC      " $@
 	$(QUIET) $(CC) $(CFLAGS) -fPIC -o $@ -c lib/$*.c
 
-$(OUTPUT)libcpupower.so.$(LIB_MAJ): $(LIB_OBJS)
+$(OUTPUT)libcpupower.so.$(LIB_VER): $(LIB_OBJS)
 	$(ECHO) "  LD      " $@
 	$(QUIET) $(CC) -shared $(CFLAGS) $(LDFLAGS) -o $@ \
-		-Wl,-soname,libcpupower.so.$(LIB_MIN) $(LIB_OBJS)
+		-Wl,-soname,libcpupower.so.$(LIB_MAJ) $(LIB_OBJS)
 	@ln -sf $(@F) $(OUTPUT)libcpupower.so
-	@ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MIN)
+	@ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MAJ)
 
-libcpupower: $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+libcpupower: $(OUTPUT)libcpupower.so.$(LIB_VER)
 
 # Let all .o files depend on its .c file and all headers
 # Might be worth to put this into utils/Makefile at some point of time
@@ -209,7 +220,7 @@ $(OUTPUT)%.o: %.c
 	$(ECHO) "  CC      " $@
 	$(QUIET) $(CC) $(CFLAGS) -I./lib -I ./utils -o $@ -c $*.c
 
-$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_VER)
 	$(ECHO) "  CC      " $@
 ifeq ($(strip $(STATIC)),true)
 	$(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lrt -lpci -L$(OUTPUT) -o $@
@@ -254,7 +265,7 @@ update-po: $(OUTPUT)po/$(PACKAGE).pot
 	done;
 endif
 
-compile-bench: $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+compile-bench: $(OUTPUT)libcpupower.so.$(LIB_VER)
 	@V=$(V) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT)
 
 # we compile into subdirectories. if the target directory is not the
diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c
index 080678d9d74e..bd67c758b33a 100644
--- a/tools/power/cpupower/bench/parse.c
+++ b/tools/power/cpupower/bench/parse.c
@@ -121,6 +121,10 @@ out_dir:
 struct config *prepare_default_config()
 {
 	struct config *config = malloc(sizeof(struct config));
+	if (!config) {
+		perror("malloc");
+		return NULL;
+	}
 
 	dprintf("loading defaults\n");
 
diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile
index e1ebb1d60cd4..741f21477432 100644
--- a/tools/power/cpupower/bindings/python/Makefile
+++ b/tools/power/cpupower/bindings/python/Makefile
@@ -11,6 +11,7 @@ HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; el
 LIB_DIR := ../../lib
 PY_INCLUDE = $(firstword $(shell python-config --includes))
 OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o)
+INSTALL_DIR = $(shell python3 -c "import site; print(site.getsitepackages()[0])")
 
 all: _raw_pylibcpupower.so
 
@@ -28,6 +29,15 @@ else ifeq ($(HAVE_PYCONFIG),0)
 endif
 	swig -python raw_pylibcpupower.swg
 
+# Only installs the Python bindings
+install: _raw_pylibcpupower.so
+	install -D _raw_pylibcpupower.so $(INSTALL_DIR)/_raw_pylibcpupower.so
+	install -D raw_pylibcpupower.py $(INSTALL_DIR)/raw_pylibcpupower.py
+
+uninstall:
+	rm -f $(INSTALL_DIR)/_raw_pylibcpupower.so
+	rm -f $(INSTALL_DIR)/raw_pylibcpupower.py
+
 # Will only clean the bindings folder; will not clean the actual cpupower folder
 clean:
 	rm -f raw_pylibcpupower.py raw_pylibcpupower_wrap.c raw_pylibcpupower_wrap.o _raw_pylibcpupower.so
diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README
index 0a4bb2581e8a..952e2e02fd32 100644
--- a/tools/power/cpupower/bindings/python/README
+++ b/tools/power/cpupower/bindings/python/README
@@ -48,6 +48,31 @@ To run the test script:
 $ python test_raw_pylibcpupower.py
 
 
+developing/using the bindings directly
+--------------------------------------
+
+You need to add the Python bindings directory to your $PYTHONPATH.
+
+You would set the path in the Bash terminal or in the Bash profile:
+
+PYTHONPATH=~/linux/tools/power/cpupower/bindings/python:$PYTHONPATH
+
+This allows you to set a specific repo of the bindings to use.
+
+
+installing/uninstalling
+-----------------------
+
+Python uses a system specific site-packages folder to look up modules to import
+by default. You do not need to install cpupower to use the SWIG bindings.
+
+You can install and uninstall the bindings to the site-packages with:
+
+sudo make install
+
+sudo make uninstall
+
+
 credits
 -------
 
diff --git a/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg b/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg
index 96556d87a745..d82af6fa93c3 100644
--- a/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg
+++ b/tools/power/cpupower/bindings/python/raw_pylibcpupower.swg
@@ -134,6 +134,9 @@ void cpufreq_put_stats(struct cpufreq_stats *stats);
 
 unsigned long cpufreq_get_transitions(unsigned int cpu);
 
+char *cpufreq_get_energy_performance_preference(unsigned int cpu);
+void cpufreq_put_energy_performance_preference(char *ptr);
+
 int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy);
 
 int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq);
@@ -160,6 +163,8 @@ int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate,
 				   unsigned int disable);
 unsigned long cpuidle_state_latency(unsigned int cpu,
 						unsigned int idlestate);
+unsigned long cpuidle_state_residency(unsigned int cpu,
+						unsigned int idlestate);
 unsigned long cpuidle_state_usage(unsigned int cpu,
 					unsigned int idlestate);
 unsigned long long cpuidle_state_time(unsigned int cpu,
diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c
index 1516d23c17c9..8dda3db2dff0 100644
--- a/tools/power/cpupower/lib/cpufreq.c
+++ b/tools/power/cpupower/lib/cpufreq.c
@@ -102,6 +102,10 @@ unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu,
 	if (len == 0)
 		return 0;
 
+	if (!strcmp(linebuf, "enabled\n"))
+		return 1;
+	if (!strcmp(linebuf, "disabled\n"))
+		return 0;
 	value = strtoul(linebuf, &endp, 0);
 
 	if (endp == linebuf || errno == ERANGE)
@@ -123,12 +127,14 @@ static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
 enum cpufreq_string {
 	SCALING_DRIVER,
 	SCALING_GOVERNOR,
+	ENERGY_PERFORMANCE_PREFERENCE,
 	MAX_CPUFREQ_STRING_FILES
 };
 
 static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = {
 	[SCALING_DRIVER] = "scaling_driver",
 	[SCALING_GOVERNOR] = "scaling_governor",
+	[ENERGY_PERFORMANCE_PREFERENCE] = "energy_performance_preference",
 };
 
 
@@ -203,6 +209,18 @@ unsigned long cpufreq_get_transition_latency(unsigned int cpu)
 	return sysfs_cpufreq_get_one_value(cpu, CPUINFO_LATENCY);
 }
 
+char *cpufreq_get_energy_performance_preference(unsigned int cpu)
+{
+	return sysfs_cpufreq_get_one_string(cpu, ENERGY_PERFORMANCE_PREFERENCE);
+}
+
+void cpufreq_put_energy_performance_preference(char *ptr)
+{
+	if (!ptr)
+		return;
+	free(ptr);
+}
+
 int cpufreq_get_hardware_limits(unsigned int cpu,
 				unsigned long *min,
 				unsigned long *max)
diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h
index 2f3c84035806..bfc617311ebd 100644
--- a/tools/power/cpupower/lib/cpufreq.h
+++ b/tools/power/cpupower/lib/cpufreq.h
@@ -68,6 +68,14 @@ unsigned long cpufreq_get_freq_hardware(unsigned int cpu);
 unsigned long cpufreq_get_transition_latency(unsigned int cpu);
 
 
+/* determine energy performance preference
+ *
+ * returns NULL on failure, else the string that represents the energy performance
+ * preference requested.
+ */
+char *cpufreq_get_energy_performance_preference(unsigned int cpu);
+void cpufreq_put_energy_performance_preference(char *ptr);
+
 /* determine hardware CPU frequency limits
  *
  * These may be limited further by thermal, energy or other
diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c
index 7a2ef691b20e..ce8dfb8e46ab 100644
--- a/tools/power/cpupower/lib/cpupower.c
+++ b/tools/power/cpupower/lib/cpupower.c
@@ -10,6 +10,7 @@
 #include <stdio.h>
 #include <errno.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "cpupower.h"
 #include "cpupower_intern.h"
@@ -150,15 +151,25 @@ static int __compare(const void *t1, const void *t2)
 		return 0;
 }
 
+static int __compare_core_cpu_list(const void *t1, const void *t2)
+{
+	struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1;
+	struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2;
+
+	return strcmp(top1->core_cpu_list, top2->core_cpu_list);
+}
+
 /*
  * Returns amount of cpus, negative on error, cpu_top must be
  * passed to cpu_topology_release to free resources
  *
- * Array is sorted after ->pkg, ->core, then ->cpu
+ * Array is sorted after ->cpu_smt_list ->pkg, ->core
  */
 int get_cpu_topology(struct cpupower_topology *cpu_top)
 {
 	int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF);
+	char path[SYSFS_PATH_MAX];
+	char *last_cpu_list;
 
 	cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus);
 	if (cpu_top->core_info == NULL)
@@ -183,6 +194,34 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
 			cpu_top->core_info[cpu].core = -1;
 			continue;
 		}
+		if (cpu_top->core_info[cpu].core == -1) {
+			strncpy(cpu_top->core_info[cpu].core_cpu_list, "-1", CPULIST_BUFFER);
+			continue;
+		}
+		snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s",
+			 cpu, "core_cpus_list");
+		if (cpupower_read_sysfs(
+			path,
+			cpu_top->core_info[cpu].core_cpu_list,
+			CPULIST_BUFFER) < 1) {
+			printf("Warning CPU%u has a 0 size core_cpus_list string", cpu);
+		}
+	}
+
+	/* Count the number of distinct cpu lists to get the physical core
+	 * count.
+	 */
+	qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
+	      __compare_core_cpu_list);
+
+	last_cpu_list = cpu_top->core_info[0].core_cpu_list;
+	cpu_top->cores = 1;
+	for (cpu = 1; cpu < cpus; cpu++) {
+		if (strcmp(cpu_top->core_info[cpu].core_cpu_list, last_cpu_list) != 0 &&
+		    cpu_top->core_info[cpu].pkg != -1) {
+			last_cpu_list = cpu_top->core_info[cpu].core_cpu_list;
+			cpu_top->cores++;
+		}
 	}
 
 	qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
@@ -203,13 +242,6 @@ int get_cpu_topology(struct cpupower_topology *cpu_top)
 	if (!(cpu_top->core_info[0].pkg == -1))
 		cpu_top->pkgs++;
 
-	/* Intel's cores count is not consecutively numbered, there may
-	 * be a core_id of 3, but none of 2. Assume there always is 0
-	 * Get amount of cores by counting duplicates in a package
-	for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) {
-		if (cpu_top->core_info[cpu].core == 0)
-	cpu_top->cores++;
-	*/
 	return cpus;
 }
 
diff --git a/tools/power/cpupower/lib/cpupower.h b/tools/power/cpupower/lib/cpupower.h
index e4e4292eacec..2e67a080f203 100644
--- a/tools/power/cpupower/lib/cpupower.h
+++ b/tools/power/cpupower/lib/cpupower.h
@@ -2,6 +2,8 @@
 #ifndef __CPUPOWER_CPUPOWER_H__
 #define __CPUPOWER_CPUPOWER_H__
 
+#define CPULIST_BUFFER 5
+
 struct cpupower_topology {
 	/* Amount of CPU cores, packages and threads per core in the system */
 	unsigned int cores;
@@ -16,6 +18,7 @@ struct cpuid_core_info {
 	int pkg;
 	int core;
 	int cpu;
+	char core_cpu_list[CPULIST_BUFFER];
 
 	/* flags */
 	unsigned int is_online:1;
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index c96b77365c63..fc750e127404 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -120,7 +120,6 @@ static void print_duration(unsigned long duration)
 		} else
 			printf("%lu ns", duration);
 	}
-	return;
 }
 
 static int get_boost_mode_x86(unsigned int cpu)
@@ -255,7 +254,12 @@ static int get_freq_kernel(unsigned int cpu, unsigned int human)
 
 static int get_freq_hardware(unsigned int cpu, unsigned int human)
 {
-	unsigned long freq = cpufreq_get_freq_hardware(cpu);
+	unsigned long freq;
+
+	if (cpupower_cpu_info.caps & CPUPOWER_CAP_APERF)
+		return -EINVAL;
+
+	freq = cpufreq_get_freq_hardware(cpu);
 	printf(_("  current CPU frequency: "));
 	if (!freq) {
 		printf("Unable to call hardware\n");
@@ -418,12 +422,32 @@ static int get_freq_stats(unsigned int cpu, unsigned int human)
 	return 0;
 }
 
+/* --epp / -z */
+
+static int get_epp(unsigned int cpu, bool interactive)
+{
+	char *epp;
+
+	epp = cpufreq_get_energy_performance_preference(cpu);
+	if (!epp)
+		return -EINVAL;
+	if (interactive)
+		printf(_("  energy performance preference: %s\n"), epp);
+
+	cpufreq_put_energy_performance_preference(epp);
+
+	return 0;
+}
+
 /* --latency / -y */
 
 static int get_latency(unsigned int cpu, unsigned int human)
 {
 	unsigned long latency = cpufreq_get_transition_latency(cpu);
 
+	if (!get_epp(cpu, false))
+		return -EINVAL;
+
 	printf(_("  maximum transition latency: "));
 	if (!latency || latency == UINT_MAX) {
 		printf(_(" Cannot determine or is not supported.\n"));
@@ -457,6 +481,7 @@ static void debug_output_one(unsigned int cpu)
 	get_related_cpus(cpu);
 	get_affected_cpus(cpu);
 	get_latency(cpu, 1);
+	get_epp(cpu, true);
 	get_hardware_limits(cpu, 1);
 
 	freqs = cpufreq_get_available_frequencies(cpu);
@@ -497,6 +522,7 @@ static struct option info_opts[] = {
 	{"human",	 no_argument,		 NULL,	 'm'},
 	{"no-rounding", no_argument,	 NULL,	 'n'},
 	{"performance", no_argument,	 NULL,	 'c'},
+	{"epp",		 no_argument,		 NULL,	 'z'},
 	{ },
 };
 
@@ -510,7 +536,7 @@ int cmd_freq_info(int argc, char **argv)
 	int output_param = 0;
 
 	do {
-		ret = getopt_long(argc, argv, "oefwldpgrasmybnc", info_opts,
+		ret = getopt_long(argc, argv, "oefwldpgrasmybncz", info_opts,
 				  NULL);
 		switch (ret) {
 		case '?':
@@ -534,6 +560,7 @@ int cmd_freq_info(int argc, char **argv)
 		case 's':
 		case 'y':
 		case 'c':
+		case 'z':
 			if (output_param) {
 				output_param = -1;
 				cont = 0;
@@ -643,6 +670,9 @@ int cmd_freq_info(int argc, char **argv)
 		case 'c':
 			ret = get_perf_cap(cpu);
 			break;
+		case 'z':
+			ret = get_epp(cpu, true);
+			break;
 		}
 		if (ret)
 			return ret;
diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index 0a56e22240fc..795562e879de 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -177,6 +177,8 @@ enum amd_pstate_value {
 	AMD_PSTATE_HIGHEST_PERF,
 	AMD_PSTATE_MAX_FREQ,
 	AMD_PSTATE_LOWEST_NONLINEAR_FREQ,
+	AMD_PSTATE_HW_PREFCORE,
+	AMD_PSTATE_PREFCORE_RANKING,
 	MAX_AMD_PSTATE_VALUE_READ_FILES,
 };
 
@@ -184,6 +186,8 @@ static const char *amd_pstate_value_files[MAX_AMD_PSTATE_VALUE_READ_FILES] = {
 	[AMD_PSTATE_HIGHEST_PERF] = "amd_pstate_highest_perf",
 	[AMD_PSTATE_MAX_FREQ] = "amd_pstate_max_freq",
 	[AMD_PSTATE_LOWEST_NONLINEAR_FREQ] = "amd_pstate_lowest_nonlinear_freq",
+	[AMD_PSTATE_HW_PREFCORE] = "amd_pstate_hw_prefcore",
+	[AMD_PSTATE_PREFCORE_RANKING] = "amd_pstate_prefcore_ranking",
 };
 
 static unsigned long amd_pstate_get_data(unsigned int cpu,
@@ -215,7 +219,9 @@ void amd_pstate_boost_init(unsigned int cpu, int *support, int *active)
 
 void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding)
 {
-	printf(_("    AMD PSTATE Highest Performance: %lu. Maximum Frequency: "),
+
+	printf(_("  amd-pstate limits:\n"));
+	printf(_("    Highest Performance: %lu. Maximum Frequency: "),
 	       amd_pstate_get_data(cpu, AMD_PSTATE_HIGHEST_PERF));
 	/*
 	 * If boost isn't active, the cpuinfo_max doesn't indicate real max
@@ -224,22 +230,26 @@ void amd_pstate_show_perf_and_freq(unsigned int cpu, int no_rounding)
 	print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_MAX_FREQ), no_rounding);
 	printf(".\n");
 
-	printf(_("    AMD PSTATE Nominal Performance: %lu. Nominal Frequency: "),
+	printf(_("    Nominal Performance: %lu. Nominal Frequency: "),
 	       acpi_cppc_get_data(cpu, NOMINAL_PERF));
 	print_speed(acpi_cppc_get_data(cpu, NOMINAL_FREQ) * 1000,
 		    no_rounding);
 	printf(".\n");
 
-	printf(_("    AMD PSTATE Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "),
+	printf(_("    Lowest Non-linear Performance: %lu. Lowest Non-linear Frequency: "),
 	       acpi_cppc_get_data(cpu, LOWEST_NONLINEAR_PERF));
 	print_speed(amd_pstate_get_data(cpu, AMD_PSTATE_LOWEST_NONLINEAR_FREQ),
 		    no_rounding);
 	printf(".\n");
 
-	printf(_("    AMD PSTATE Lowest Performance: %lu. Lowest Frequency: "),
+	printf(_("    Lowest Performance: %lu. Lowest Frequency: "),
 	       acpi_cppc_get_data(cpu, LOWEST_PERF));
 	print_speed(acpi_cppc_get_data(cpu, LOWEST_FREQ) * 1000, no_rounding);
 	printf(".\n");
+
+	printf(_("    Preferred Core Support: %lu. Preferred Core Ranking: %lu.\n"),
+	       amd_pstate_get_data(cpu, AMD_PSTATE_HW_PREFCORE),
+	       amd_pstate_get_data(cpu, AMD_PSTATE_PREFCORE_RANKING));
 }
 
 /* AMD P-State Helper Functions ************************************/
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index f746099b5dac..ad493157f826 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -6,6 +6,7 @@
  */
 
 
+#include <errno.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <stdlib.h>
@@ -91,7 +92,11 @@ int fill_string_with_spaces(char *s, int n)
 	return 0;
 }
 
-#define MAX_COL_WIDTH 6
+#define MAX_COL_WIDTH		6
+#define TOPOLOGY_DEPTH_PKG	3
+#define TOPOLOGY_DEPTH_CORE	2
+#define TOPOLOGY_DEPTH_CPU	1
+
 void print_header(int topology_depth)
 {
 	int unsigned mon;
@@ -113,12 +118,19 @@ void print_header(int topology_depth)
 	}
 	printf("\n");
 
-	if (topology_depth > 2)
+	switch (topology_depth) {
+	case TOPOLOGY_DEPTH_PKG:
 		printf(" PKG|");
-	if (topology_depth > 1)
+		break;
+	case TOPOLOGY_DEPTH_CORE:
 		printf("CORE|");
-	if (topology_depth > 0)
+		break;
+	case	TOPOLOGY_DEPTH_CPU:
 		printf(" CPU|");
+		break;
+	default:
+		return;
+	}
 
 	for (mon = 0; mon < avail_monitors; mon++) {
 		if (mon != 0)
@@ -152,12 +164,19 @@ void print_results(int topology_depth, int cpu)
 	    cpu_top.core_info[cpu].pkg == -1)
 		return;
 
-	if (topology_depth > 2)
+	switch (topology_depth) {
+	case TOPOLOGY_DEPTH_PKG:
 		printf("%4d|", cpu_top.core_info[cpu].pkg);
-	if (topology_depth > 1)
+		break;
+	case TOPOLOGY_DEPTH_CORE:
 		printf("%4d|", cpu_top.core_info[cpu].core);
-	if (topology_depth > 0)
+		break;
+	case TOPOLOGY_DEPTH_CPU:
 		printf("%4d|", cpu_top.core_info[cpu].cpu);
+		break;
+	default:
+		return;
+	}
 
 	for (mon = 0; mon < avail_monitors; mon++) {
 		if (mon != 0)
@@ -294,7 +313,10 @@ int fork_it(char **argv)
 
 	if (!child_pid) {
 		/* child */
-		execvp(argv[0], argv);
+		if (execvp(argv[0], argv) == -1) {
+			printf("Invalid monitor command %s\n", argv[0]);
+			exit(errno);
+		}
 	} else {
 		/* parent */
 		if (child_pid == -1) {
@@ -423,11 +445,13 @@ int cmd_monitor(int argc, char **argv)
 
 	if (avail_monitors == 0) {
 		printf(_("No HW Cstate monitors found\n"));
+		cpu_topology_release(cpu_top);
 		return 1;
 	}
 
 	if (mode == list) {
 		list_monitors();
+		cpu_topology_release(cpu_top);
 		exit(EXIT_SUCCESS);
 	}
 
@@ -448,15 +472,15 @@ int cmd_monitor(int argc, char **argv)
 	/* ToDo: Topology parsing needs fixing first to do
 	   this more generically */
 	if (cpu_top.pkgs > 1)
-		print_header(3);
+		print_header(TOPOLOGY_DEPTH_PKG);
 	else
-		print_header(1);
+		print_header(TOPOLOGY_DEPTH_CPU);
 
 	for (cpu = 0; cpu < cpu_count; cpu++) {
 		if (cpu_top.pkgs > 1)
-			print_results(3, cpu);
+			print_results(TOPOLOGY_DEPTH_PKG, cpu);
 		else
-			print_results(1, cpu);
+			print_results(TOPOLOGY_DEPTH_CPU, cpu);
 	}
 
 	for (num = 0; num < avail_monitors; num++) {
diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
index 55e55b6b42f9..f5a2a326b1b7 100644
--- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
@@ -117,7 +117,7 @@ static int hsw_ext_start(void)
 
 	for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) {
 		for (cpu = 0; cpu < cpu_count; cpu++) {
-			hsw_ext_get_count(num, &val, cpu);
+			is_valid[cpu] = !hsw_ext_get_count(num, &val, cpu);
 			previous_count[num][cpu] = val;
 		}
 	}
@@ -134,7 +134,7 @@ static int hsw_ext_stop(void)
 
 	for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) {
 		for (cpu = 0; cpu < cpu_count; cpu++) {
-			is_valid[cpu] = !hsw_ext_get_count(num, &val, cpu);
+			is_valid[cpu] |= !hsw_ext_get_count(num, &val, cpu);
 			current_count[num][cpu] = val;
 		}
 	}
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
index ae6af354a81d..73b6b10cbdd2 100644
--- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
@@ -33,7 +33,7 @@ static int mperf_get_count_percent(unsigned int self_id, double *percent,
 				   unsigned int cpu);
 static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
 				unsigned int cpu);
-static struct timespec time_start, time_end;
+static struct timespec *time_start, *time_end;
 
 static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
 	{
@@ -148,7 +148,7 @@ static int mperf_measure_stats(unsigned int cpu)
 	ret = get_aperf_mperf(cpu, &aval, &mval);
 	aperf_current_count[cpu] = aval;
 	mperf_current_count[cpu] = mval;
-	is_valid[cpu] = !ret;
+	is_valid[cpu] |= !ret;
 
 	return 0;
 }
@@ -174,7 +174,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent,
 		dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n",
 		       mperf_cstates[id].name, mperf_diff, tsc_diff);
 	} else if (max_freq_mode == MAX_FREQ_SYSFS) {
-		timediff = max_frequency * timespec_diff_us(time_start, time_end);
+		timediff = max_frequency * timespec_diff_us(time_start[cpu], time_end[cpu]);
 		*percent = 100.0 * mperf_diff / timediff;
 		dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n",
 		       mperf_cstates[id].name, mperf_diff, timediff);
@@ -207,7 +207,7 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
 	if (max_freq_mode == MAX_FREQ_TSC_REF) {
 		/* Calculate max_freq from TSC count */
 		tsc_diff = tsc_at_measure_end[cpu] - tsc_at_measure_start[cpu];
-		time_diff = timespec_diff_us(time_start, time_end);
+		time_diff = timespec_diff_us(time_start[cpu], time_end[cpu]);
 		max_frequency = tsc_diff / time_diff;
 	}
 
@@ -226,9 +226,8 @@ static int mperf_start(void)
 {
 	int cpu;
 
-	clock_gettime(CLOCK_REALTIME, &time_start);
-
 	for (cpu = 0; cpu < cpu_count; cpu++) {
+		clock_gettime(CLOCK_REALTIME, &time_start[cpu]);
 		mperf_get_tsc(&tsc_at_measure_start[cpu]);
 		mperf_init_stats(cpu);
 	}
@@ -243,9 +242,9 @@ static int mperf_stop(void)
 	for (cpu = 0; cpu < cpu_count; cpu++) {
 		mperf_measure_stats(cpu);
 		mperf_get_tsc(&tsc_at_measure_end[cpu]);
+		clock_gettime(CLOCK_REALTIME, &time_end[cpu]);
 	}
 
-	clock_gettime(CLOCK_REALTIME, &time_end);
 	return 0;
 }
 
@@ -349,6 +348,8 @@ struct cpuidle_monitor *mperf_register(void)
 	aperf_current_count = calloc(cpu_count, sizeof(unsigned long long));
 	tsc_at_measure_start = calloc(cpu_count, sizeof(unsigned long long));
 	tsc_at_measure_end = calloc(cpu_count, sizeof(unsigned long long));
+	time_start = calloc(cpu_count, sizeof(struct timespec));
+	time_end = calloc(cpu_count, sizeof(struct timespec));
 	mperf_monitor.name_len = strlen(mperf_monitor.name);
 	return &mperf_monitor;
 }
@@ -361,6 +362,8 @@ void mperf_unregister(void)
 	free(aperf_current_count);
 	free(tsc_at_measure_start);
 	free(tsc_at_measure_end);
+	free(time_start);
+	free(time_end);
 	free(is_valid);
 }
 
diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
index 16eaf006f61f..6b1733782ffa 100644
--- a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
@@ -151,7 +151,7 @@ static int nhm_stop(void)
 
 	for (num = 0; num < NHM_CSTATE_COUNT; num++) {
 		for (cpu = 0; cpu < cpu_count; cpu++) {
-			is_valid[cpu] = !nhm_get_count(num, &val, cpu);
+			is_valid[cpu] |= !nhm_get_count(num, &val, cpu);
 			current_count[num][cpu] = val;
 		}
 	}
diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
index 811d63ab17a7..5969b88a85b4 100644
--- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
@@ -115,7 +115,7 @@ static int snb_start(void)
 
 	for (num = 0; num < SNB_CSTATE_COUNT; num++) {
 		for (cpu = 0; cpu < cpu_count; cpu++) {
-			snb_get_count(num, &val, cpu);
+			is_valid[cpu] = !snb_get_count(num, &val, cpu);
 			previous_count[num][cpu] = val;
 		}
 	}
@@ -132,7 +132,7 @@ static int snb_stop(void)
 
 	for (num = 0; num < SNB_CSTATE_COUNT; num++) {
 		for (cpu = 0; cpu < cpu_count; cpu++) {
-			is_valid[cpu] = !snb_get_count(num, &val, cpu);
+			is_valid[cpu] |= !snb_get_count(num, &val, cpu);
 			current_count[num][cpu] = val;
 		}
 	}
diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile
index 7221f2f55e8b..8d3a02a20f3d 100644
--- a/tools/power/x86/intel-speed-select/Makefile
+++ b/tools/power/x86/intel-speed-select/Makefile
@@ -13,7 +13,7 @@ endif
 # Do not use make's built-in rules
 # (this improves performance and avoids hard-to-debug behaviour);
 MAKEFLAGS += -r
-override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I/usr/include/libnl3
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I$(shell $(CC) -print-sysroot)/usr/include/libnl3
 override LDFLAGS += -lnl-genl-3 -lnl-3
 
 ALL_TARGETS := intel-speed-select
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 5127be34869e..eaa420ac848d 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -16,7 +16,7 @@ struct process_cmd_struct {
 	int arg;
 };
 
-static const char *version_str = "v1.20";
+static const char *version_str = "v1.22";
 
 static const int supported_api_ver = 3;
 static struct isst_if_platform_info isst_platform_info;
@@ -46,8 +46,9 @@ static int force_online_offline;
 static int auto_mode;
 static int fact_enable_fail;
 static int cgroupv2;
+static int max_pkg_id;
 static int max_die_id;
-static int max_punit_id;
+static int max_die_id_package_0;
 
 /* clos related */
 static int current_clos = -1;
@@ -557,6 +558,8 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void
 		if (id.pkg < 0 || id.die < 0 || id.punit < 0)
 			continue;
 
+		id.die = id.die % (max_die_id_package_0 + 1);
+
 		valid_mask[id.pkg][id.die] = 1;
 
 		if (cpus[id.pkg][id.die][id.punit] == -1)
@@ -564,11 +567,11 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void
 	}
 
 	for (i = 0; i < MAX_PACKAGE_COUNT; i++) {
-		if (max_die_id == max_punit_id) {
+		if (max_die_id > max_pkg_id) {
 			for (k = 0; k < MAX_PUNIT_PER_DIE && k < MAX_DIE_PER_PACKAGE; k++) {
 				id.cpu = cpus[i][k][k];
 				id.pkg = i;
-				id.die = k;
+				id.die = get_physical_die_id(id.cpu);
 				id.punit = k;
 				if (isst_is_punit_valid(&id))
 					callback(&id, arg1, arg2, arg3, arg4);
@@ -586,7 +589,10 @@ void for_each_online_power_domain_in_set(void (*callback)(struct isst_id *, void
 			for (k = 0; k < MAX_PUNIT_PER_DIE; k++) {
 				id.cpu = cpus[i][j][k];
 				id.pkg = i;
-				id.die = j;
+				if (id.cpu >= 0)
+					id.die = get_physical_die_id(id.cpu);
+				else
+					id.die = id.pkg;
 				id.punit = k;
 				if (isst_is_punit_valid(&id))
 					callback(&id, arg1, arg2, arg3, arg4);
@@ -788,6 +794,8 @@ static void create_cpu_map(void)
 		cpu_map[i].die_id = die_id;
 		cpu_map[i].core_id = core_id;
 
+		if (max_pkg_id < pkg_id)
+			max_pkg_id = pkg_id;
 
 		punit_id = 0;
 
@@ -812,8 +820,8 @@ static void create_cpu_map(void)
 		if (max_die_id < die_id)
 			max_die_id = die_id;
 
-		if (max_punit_id < cpu_map[i].punit_id)
-			max_punit_id = cpu_map[i].punit_id;
+		if (!pkg_id && max_die_id_package_0 < die_id)
+			max_die_id_package_0 = die_id;
 
 		debug_printf(
 			"map logical_cpu:%d core: %d die:%d pkg:%d punit:%d punit_cpu:%d punit_core:%d\n",
diff --git a/tools/power/x86/intel-speed-select/isst-core-tpmi.c b/tools/power/x86/intel-speed-select/isst-core-tpmi.c
index 32ea70c7dbd8..da53aaa27fc9 100644
--- a/tools/power/x86/intel-speed-select/isst-core-tpmi.c
+++ b/tools/power/x86/intel-speed-select/isst-core-tpmi.c
@@ -329,7 +329,7 @@ static int tpmi_get_get_trls(struct isst_id *id, int config_index,
 	return 0;
 }
 
-static int tpmi_get_get_trl(struct isst_id *id, int level, int config_index,
+static int tpmi_get_get_trl(struct isst_id *id, int config_index, int level,
 			    int *trl)
 {
 	struct isst_pkg_ctdp_level_info ctdp_level;
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index 07ebd08f3202..da5a59a4c545 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -173,7 +173,11 @@ static int print_package_info(struct isst_id *id, FILE *outf)
 
 	if (out_format_is_json()) {
 		if (api_version() > 1) {
-			if (id->cpu < 0)
+			if (id->die < 0 && id->cpu < 0)
+				snprintf(header, sizeof(header),
+					 "package-%d:die-IO:powerdomain-%d:cpu-None",
+					 id->pkg, id->punit);
+			else if (id->cpu < 0)
 				snprintf(header, sizeof(header),
 					 "package-%d:die-%d:powerdomain-%d:cpu-None",
 					 id->pkg, id->die, id->punit);
@@ -190,7 +194,10 @@ static int print_package_info(struct isst_id *id, FILE *outf)
 	}
 	snprintf(header, sizeof(header), "package-%d", id->pkg);
 	format_and_print(outf, level++, header, NULL);
-	snprintf(header, sizeof(header), "die-%d", id->die);
+	if (id->die < 0)
+		snprintf(header, sizeof(header), "die-IO");
+	else
+		snprintf(header, sizeof(header), "die-%d", id->die);
 	format_and_print(outf, level++, header, NULL);
 	if (api_version() > 1) {
 		snprintf(header, sizeof(header), "powerdomain-%d", id->punit);
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index a7f7ed01421c..99bf905ade81 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -136,7 +136,7 @@ displays the statistics gathered since it was forked.
 The system configuration dump (if --quiet is not used) is followed by statistics.  The first row of the statistics labels the content of each column (below).  The second row of statistics is the system summary line.  The system summary line has a '-' in the columns for the Package, Core, and CPU.  The contents of the system summary line depends on the type of column.  Columns that count items (eg. IRQ) show the sum across all CPUs in the system.  Columns that show a percentage show the average across all CPUs in the system.  Columns that dump raw MSR values simply show 0 in the summary.  After the system summary row, each row describes a specific Package/Core/CPU.  Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system.
 .SH COLUMN DESCRIPTIONS
 .PP
-\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any.  This counter is disabled by default, and is enabled with "--enable usec", or --debug.  On the summary row, usec refers to the total elapsed time to collect the counters on all cpus.
+\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any.  This counter is disabled by default, and is enabled with "--enable usec", or --debug.  On the summary row, usec refers to the total elapsed time to snapshot the procfs/sysfs and collect the counters on all cpus.
 .PP
 \fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected.  This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug".  On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU.
 .PP
@@ -190,7 +190,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics
 .PP
 \fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors.
 .PP
-\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS). Disabled by default.  Enable with --enable SysWatt.
+\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS).
 .PP
 \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package.  Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system.  Note that the meaning of this field is model specific.  For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits.  Comparing PkgWatt and PkgTmp to system limits is necessary.
 .PP
@@ -516,14 +516,40 @@ that they count at TSC rate, which is true on all processors tested to date.
 Volume 3B: System Programming Guide"
 https://www.intel.com/products/processor/manuals/
 
+.SH RUN THE LATEST VERSION
+If turbostat complains that it doesn't recognize your processor,
+please try the latest version.
+
+The latest version of turbostat does not require the latest version of the Linux kernel.
+However, some features, such as perf(1) counters, do require kernel support.
+
+The latest turbostat release is available in the upstream Linux Kernel source tree.
+eg. "git pull https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git"
+and run make in tools/power/x86/turbostat/.
+
+n.b. "make install" will update your system manually, but a distro update may subsequently downgrade your turbostat to an older version.
+For this reason, manually installing to /usr/local/bin may be what you want.
+
+Note that turbostat/Makefile has a "make snapshot" target, which will create a tar file
+that can build without a local kernel source tree.
+
+If the upstream version isn't new enough, the development tree can be found here:
+"git pull https://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat"
+
+If the development tree doesn't work, please contact the author via chat,
+or via email with the word "turbostat" on the Subject line.
+
 .SH FILES
 .ta
 .nf
+/sys/bus/event_source/devices/
 /dev/cpu/*/msr
+/sys/class/intel_pmt/
+/sys/devices/system/cpu/
 .fi
 
 .SH "SEE ALSO"
-msr(4), vmstat(8)
+perf(1), msr(4), vmstat(8)
 .PP
 .SH AUTHOR
 .nf
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 58a487c225a7..26057af6b5a1 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3,7 +3,7 @@
  * turbostat -- show CPU frequency and C-state residency
  * on modern Intel and AMD processors.
  *
- * Copyright (c) 2024 Intel Corporation.
+ * Copyright (c) 2025 Intel Corporation.
  * Len Brown <len.brown@intel.com>
  */
 
@@ -95,6 +95,8 @@
 #define INTEL_ECORE_TYPE	0x20
 #define INTEL_PCORE_TYPE	0x40
 
+#define ROUND_UP_TO_PAGE_SIZE(n) (((n) + 0x1000UL-1UL) & ~(0x1000UL-1UL))
+
 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M };
 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE };
@@ -202,6 +204,8 @@ struct msr_counter bic[] = {
 	{ 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 },
 	{ 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 },
 	{ 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 },
+	{ 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 },
+	{ 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 },
 };
 
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -266,14 +270,16 @@ struct msr_counter bic[] = {
 #define	BIC_Diec6		(1ULL << 58)
 #define	BIC_SysWatt		(1ULL << 59)
 #define	BIC_Sys_J		(1ULL << 60)
+#define	BIC_NMI			(1ULL << 61)
+#define	BIC_CPU_c1e		(1ULL << 62)
 
-#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
-#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
+#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die)
+#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
 #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
-#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
+#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
+#define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
 
-#define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_SysWatt | BIC_Sys_J)
+#define BIC_DISABLED_BY_DEFAULT	(BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
 
 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
 unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
@@ -326,6 +332,7 @@ unsigned int rapl_joules;
 unsigned int summary_only;
 unsigned int list_header_only;
 unsigned int dump_only;
+unsigned int force_load;
 unsigned int has_aperf;
 unsigned int has_aperf_access;
 unsigned int has_epb;
@@ -353,7 +360,7 @@ unsigned long long cpuidle_cur_sys_lpi_us;
 unsigned int tj_max;
 unsigned int tj_max_override;
 double rapl_power_units, rapl_time_units;
-double rapl_dram_energy_units, rapl_energy_units;
+double rapl_dram_energy_units, rapl_energy_units, rapl_psys_energy_units;
 double rapl_joule_counter_range;
 unsigned int crystal_hz;
 unsigned long long tsc_hz;
@@ -365,6 +372,9 @@ unsigned int has_hwp_activity_window;	/* IA32_HWP_REQUEST[bits 41:32] */
 unsigned int has_hwp_epp;	/* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;	/* IA32_HWP_REQUEST_PKG */
 unsigned int first_counter_read = 1;
+
+static struct timeval procsysfs_tv_begin;
+
 int ignore_stdin;
 bool no_msr;
 bool no_perf;
@@ -419,6 +429,7 @@ struct platform_features {
 	bool has_per_core_rapl;	/* Indicates cores energy collection is per-core, not per-package. AMD specific for now */
 	bool has_rapl_divisor;	/* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */
 	bool has_fixed_rapl_unit;	/* Fixed Energy Unit used for DRAM RAPL Domain */
+	bool has_fixed_rapl_psys_unit;	/* Fixed Energy Unit used for PSYS RAPL Domain */
 	int rapl_quirk_tdp;	/* Hardcoded TDP value when cannot be retrieved from hardware */
 	int tcc_offset_bits;	/* TCC Offset bits in MSR_IA32_TEMPERATURE_TARGET */
 	bool enable_tsc_tweak;	/* Use CPU Base freq instead of TSC freq for aperf/mperf counter */
@@ -819,6 +830,7 @@ static const struct platform_features spr_features = {
 	.has_msr_core_c1_res = 1,
 	.has_irtl_msrs = 1,
 	.has_cst_prewake_bit = 1,
+	.has_fixed_rapl_psys_unit = 1,
 	.trl_msrs = TRL_BASE | TRL_CORECOUNT,
 	.rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS,
 };
@@ -1024,6 +1036,7 @@ static const struct platform_data turbostat_pdata[] = {
 	{ INTEL_ARROWLAKE_U, &adl_features },
 	{ INTEL_ARROWLAKE, &adl_features },
 	{ INTEL_LUNARLAKE_M, &lnl_features },
+	{ INTEL_PANTHERLAKE_L, &lnl_features },
 	{ INTEL_ATOM_SILVERMONT, &slv_features },
 	{ INTEL_ATOM_SILVERMONT_D, &slvd_features },
 	{ INTEL_ATOM_AIRMONT, &amt_features },
@@ -1036,13 +1049,14 @@ static const struct platform_data turbostat_pdata[] = {
 	{ INTEL_ATOM_GRACEMONT, &adl_features },
 	{ INTEL_ATOM_CRESTMONT_X, &srf_features },
 	{ INTEL_ATOM_CRESTMONT, &grr_features },
+	{ INTEL_ATOM_DARKMONT_X, &srf_features },
 	{ INTEL_XEON_PHI_KNL, &knl_features },
 	{ INTEL_XEON_PHI_KNM, &knl_features },
 	/*
 	 * Missing support for
 	 * INTEL_ICELAKE
 	 * INTEL_ATOM_SILVERMONT_MID
-	 * INTEL_ATOM_AIRMONT_MID
+	 * INTEL_ATOM_SILVERMONT_MID2
 	 * INTEL_ATOM_AIRMONT_NP
 	 */
 	{ 0, NULL },
@@ -1054,9 +1068,10 @@ void probe_platform_features(unsigned int family, unsigned int model)
 {
 	int i;
 
-	platform = &default_features;
 
 	if (authentic_amd || hygon_genuine) {
+		/* fallback to default features on unsupported models */
+		force_load++;
 		if (max_extended_level >= 0x80000007) {
 			unsigned int eax, ebx, ecx, edx;
 
@@ -1065,11 +1080,11 @@ void probe_platform_features(unsigned int family, unsigned int model)
 			if ((edx & (1 << 14)) && family >= 0x17)
 				platform = &amd_features_with_rapl;
 		}
-		return;
+		goto end;
 	}
 
 	if (!genuine_intel)
-		return;
+		goto end;
 
 	for (i = 0; turbostat_pdata[i].features; i++) {
 		if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) {
@@ -1077,6 +1092,19 @@ void probe_platform_features(unsigned int family, unsigned int model)
 			return;
 		}
 	}
+
+end:
+	if (force_load && !platform) {
+		fprintf(outf, "Forced to run on unsupported platform!\n");
+		platform = &default_features;
+	}
+
+	if (platform)
+		return;
+
+	fprintf(stderr, "Unsupported platform detected.\n"
+		"\tSee RUN THE LATEST VERSION on turbostat(8)\n");
+	exit(1);
 }
 
 /* Model specific support End */
@@ -1094,8 +1122,8 @@ int backwards_count;
 char *progname;
 
 #define CPU_SUBSET_MAXCPUS	1024	/* need to use before probe... */
-cpu_set_t *cpu_present_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
-size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
+cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
+size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
 #define MAX_ADDED_THREAD_COUNTERS 24
 #define MAX_ADDED_CORE_COUNTERS 8
 #define MAX_ADDED_PACKAGE_COUNTERS 16
@@ -1271,7 +1299,7 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
 	 .msr = MSR_PLATFORM_ENERGY_STATUS,
 	 .msr_mask = 0x00000000FFFFFFFF,
 	 .msr_shift = 0,
-	 .platform_rapl_msr_scale = &rapl_energy_units,
+	 .platform_rapl_msr_scale = &rapl_psys_energy_units,
 	 .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM,
 	 .bic = BIC_SysWatt | BIC_Sys_J,
 	 .compat_scale = 1.0,
@@ -1510,6 +1538,17 @@ static struct msr_counter_arch_info msr_counter_arch_infos[] = {
 #define PMT_COUNTER_MTL_DC6_LSB    0
 #define PMT_COUNTER_MTL_DC6_MSB    63
 #define PMT_MTL_DC6_GUID           0x1a067102
+#define PMT_MTL_DC6_SEQ            0
+
+#define PMT_COUNTER_CWF_MC1E_OFFSET_BASE          20936
+#define PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT     24
+#define PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE 12
+#define PMT_COUNTER_CWF_CPUS_PER_MODULE           4
+#define PMT_COUNTER_CWF_MC1E_LSB                  0
+#define PMT_COUNTER_CWF_MC1E_MSB                  63
+#define PMT_CWF_MC1E_GUID                         0x14421519
+
+unsigned long long tcore_clock_freq_hz = 800000000;
 
 #define PMT_COUNTER_NAME_SIZE_BYTES      16
 #define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32
@@ -1533,6 +1572,7 @@ struct pmt_mmio {
 enum pmt_datatype {
 	PMT_TYPE_RAW,
 	PMT_TYPE_XTAL_TIME,
+	PMT_TYPE_TCORE_CLOCK,
 };
 
 struct pmt_domain_info {
@@ -1562,6 +1602,93 @@ struct pmt_counter {
 	struct pmt_domain_info *domains;
 };
 
+/*
+ * PMT telemetry directory iterator.
+ * Used to iterate telemetry files in sysfs in correct order.
+ */
+struct pmt_diriter_t {
+	DIR *dir;
+	struct dirent **namelist;
+	unsigned int num_names;
+	unsigned int current_name_idx;
+};
+
+int pmt_telemdir_filter(const struct dirent *e)
+{
+	unsigned int dummy;
+
+	return sscanf(e->d_name, "telem%u", &dummy);
+}
+
+int pmt_telemdir_sort(const struct dirent **a, const struct dirent **b)
+{
+	unsigned int aidx = 0, bidx = 0;
+
+	sscanf((*a)->d_name, "telem%u", &aidx);
+	sscanf((*b)->d_name, "telem%u", &bidx);
+
+	return aidx >= bidx;
+}
+
+const struct dirent *pmt_diriter_next(struct pmt_diriter_t *iter)
+{
+	const struct dirent *ret = NULL;
+
+	if (!iter->dir)
+		return NULL;
+
+	if (iter->current_name_idx >= iter->num_names)
+		return NULL;
+
+	ret = iter->namelist[iter->current_name_idx];
+	++iter->current_name_idx;
+
+	return ret;
+}
+
+const struct dirent *pmt_diriter_begin(struct pmt_diriter_t *iter, const char *pmt_root_path)
+{
+	int num_names = iter->num_names;
+
+	if (!iter->dir) {
+		iter->dir = opendir(pmt_root_path);
+		if (iter->dir == NULL)
+			return NULL;
+
+		num_names = scandir(pmt_root_path, &iter->namelist, pmt_telemdir_filter, pmt_telemdir_sort);
+		if (num_names == -1)
+			return NULL;
+	}
+
+	iter->current_name_idx = 0;
+	iter->num_names = num_names;
+
+	return pmt_diriter_next(iter);
+}
+
+void pmt_diriter_init(struct pmt_diriter_t *iter)
+{
+	memset(iter, 0, sizeof(*iter));
+}
+
+void pmt_diriter_remove(struct pmt_diriter_t *iter)
+{
+	if (iter->namelist) {
+		for (unsigned int i = 0; i < iter->num_names; i++) {
+			free(iter->namelist[i]);
+			iter->namelist[i] = NULL;
+		}
+	}
+
+	free(iter->namelist);
+	iter->namelist = NULL;
+	iter->num_names = 0;
+	iter->current_name_idx = 0;
+
+	closedir(iter->dir);
+	iter->dir = NULL;
+}
+
 unsigned int pmt_counter_get_width(const struct pmt_counter *p)
 {
 	return (p->msb - p->lsb) + 1;
@@ -1611,6 +1738,7 @@ struct thread_data {
 	unsigned long long c1;
 	unsigned long long instr_count;
 	unsigned long long irq_count;
+	unsigned long long nmi_count;
 	unsigned int smi_count;
 	unsigned int cpu_id;
 	unsigned int apic_id;
@@ -1917,6 +2045,7 @@ struct timeval tv_even, tv_odd, tv_delta;
 
 int *irq_column_2_cpu;		/* /proc/interrupts column numbers */
 int *irqs_per_cpu;		/* indexed by cpu_num */
+int *nmi_per_cpu;		/* indexed by cpu_num */
 
 void setup_all_buffers(bool startup);
 
@@ -1944,6 +2073,8 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
 {
 	int retval, pkg_no, core_no, thread_no, node_no;
 
+	retval = 0;
+
 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
 		for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
 			for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
@@ -1959,14 +2090,12 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk
 					c = GET_CORE(core_base, core_no, node_no, pkg_no);
 					p = GET_PKG(pkg_base, pkg_no);
 
-					retval = func(t, c, p);
-					if (retval)
-						return retval;
+					retval |= func(t, c, p);
 				}
 			}
 		}
 	}
-	return 0;
+	return retval;
 }
 
 int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c, struct pkg_data *p)
@@ -2085,13 +2214,17 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 int probe_msr(int cpu, off_t offset)
 {
 	ssize_t retval;
-	unsigned long long dummy;
+	unsigned long long value;
 
 	assert(!no_msr);
 
-	retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset);
+	retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset);
 
-	if (retval != sizeof(dummy))
+	/*
+	 * Expect MSRs to accumulate some non-zero value since the system was powered on.
+	 * Treat zero as a read failure.
+	 */
+	if (retval != sizeof(value) || value == 0)
 		return 1;
 
 	return 0;
@@ -2135,41 +2268,54 @@ void help(void)
 		"when COMMAND completes.\n"
 		"If no COMMAND is specified, turbostat wakes every 5-seconds\n"
 		"to print statistics, until interrupted.\n"
-		"  -a, --add	add a counter\n"
+		"  -a, --add counter\n"
+		"		add a counter\n"
 		"		  eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
 		"		  eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n"
 		"		  eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n"
-		"  -c, --cpu	cpu-set	limit output to summary plus cpu-set:\n"
+		"  -c, --cpu cpu-set\n"
+		"		limit output to summary plus cpu-set:\n"
 		"		  {core | package | j,k,l..m,n-p }\n"
-		"  -d, --debug	displays usec, Time_Of_Day_Seconds and more debugging\n"
+		"  -d, --debug\n"
+		"		displays usec, Time_Of_Day_Seconds and more debugging\n"
 		"		debug messages are printed to stderr\n"
-		"  -D, --Dump	displays the raw counter values\n"
-		"  -e, --enable	[all | column]\n"
+		"  -D, --Dump\n"
+		"		displays the raw counter values\n"
+		"  -e, --enable [all | column]\n"
 		"		shows all or the specified disabled column\n"
-		"  -H, --hide [column|column,column,...]\n"
+		"  -f, --force\n"
+		"		force load turbostat with minimum default features on unsupported platforms.\n"
+		"  -H, --hide [column | column,column,...]\n"
 		"		hide the specified column(s)\n"
 		"  -i, --interval sec.subsec\n"
-		"		Override default 5-second measurement interval\n"
-		"  -J, --Joules	displays energy in Joules instead of Watts\n"
-		"  -l, --list	list column headers only\n"
-		"  -M, --no-msr Disable all uses of the MSR driver\n"
-		"  -P, --no-perf Disable all uses of the perf API\n"
+		"		override default 5-second measurement interval\n"
+		"  -J, --Joules\n"
+		"		displays energy in Joules instead of Watts\n"
+		"  -l, --list\n"
+		"		list column headers only\n"
+		"  -M, --no-msr\n"
+		"		disable all uses of the MSR driver\n"
+		"  -P, --no-perf\n"
+		"		disable all uses of the perf API\n"
 		"  -n, --num_iterations num\n"
 		"		number of the measurement iterations\n"
 		"  -N, --header_iterations num\n"
 		"		print header every num iterations\n"
 		"  -o, --out file\n"
 		"		create or truncate \"file\" for all output\n"
-		"  -q, --quiet	skip decoding system configuration header\n"
-		"  -s, --show [column|column,column,...]\n"
+		"  -q, --quiet\n"
+		"		skip decoding system configuration header\n"
+		"  -s, --show [column | column,column,...]\n"
 		"		show only the specified column(s)\n"
 		"  -S, --Summary\n"
 		"		limits output to 1-line system summary per interval\n"
 		"  -T, --TCC temperature\n"
 		"		sets the Thermal Control Circuit temperature in\n"
 		"		  degrees Celsius\n"
-		"  -h, --help	print this help message\n"
-		"  -v, --version	print version information\n" "\n" "For more help, run \"man turbostat\"\n");
+		"  -h, --help\n"
+		"		print this help message\n"
+		"  -v, --version\n"
+		"		print version information\n\nFor more help, run \"man turbostat\"\n");
 }
 
 /*
@@ -2289,6 +2435,12 @@ void print_header(char *delim)
 		else
 			outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
 	}
+	if (DO_BIC(BIC_NMI)) {
+		if (sums_need_wide_columns)
+			outp += sprintf(outp, "%s     NMI", (printed++ ? delim : ""));
+		else
+			outp += sprintf(outp, "%sNMI", (printed++ ? delim : ""));
+	}
 
 	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
@@ -2335,6 +2487,7 @@ void print_header(char *delim)
 			break;
 
 		case PMT_TYPE_XTAL_TIME:
+		case PMT_TYPE_TCORE_CLOCK:
 			outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
 			break;
 		}
@@ -2409,6 +2562,7 @@ void print_header(char *delim)
 			break;
 
 		case PMT_TYPE_XTAL_TIME:
+		case PMT_TYPE_TCORE_CLOCK:
 			outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
 			break;
 		}
@@ -2540,6 +2694,7 @@ void print_header(char *delim)
 			break;
 
 		case PMT_TYPE_XTAL_TIME:
+		case PMT_TYPE_TCORE_CLOCK:
 			outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name);
 			break;
 		}
@@ -2575,6 +2730,8 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
 
 		if (DO_BIC(BIC_IRQ))
 			outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
+		if (DO_BIC(BIC_NMI))
+			outp += sprintf(outp, "IRQ: %lld\n", t->nmi_count);
 		if (DO_BIC(BIC_SMI))
 			outp += sprintf(outp, "SMI: %d\n", t->smi_count);
 
@@ -2794,6 +2951,14 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 			outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
 	}
 
+	/* NMI */
+	if (DO_BIC(BIC_NMI)) {
+		if (sums_need_wide_columns)
+			outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->nmi_count);
+		else
+			outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->nmi_count);
+	}
+
 	/* SMI */
 	if (DO_BIC(BIC_SMI))
 		outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
@@ -2848,7 +3013,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
 	for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) {
 		const unsigned long value_raw = t->pmt_counter[i];
-		const double value_converted = 100.0 * value_raw / crystal_hz / interval_float;
+		double value_converted;
 		switch (ppmt->type) {
 		case PMT_TYPE_RAW:
 			if (pmt_counter_get_width(ppmt) <= 32)
@@ -2860,8 +3025,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 			break;
 
 		case PMT_TYPE_XTAL_TIME:
+			value_converted = 100.0 * value_raw / crystal_hz / interval_float;
 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
 			break;
+
+		case PMT_TYPE_TCORE_CLOCK:
+			value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
+			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
 		}
 	}
 
@@ -2928,7 +3098,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
 	for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) {
 		const unsigned long value_raw = c->pmt_counter[i];
-		const double value_converted = 100.0 * value_raw / crystal_hz / interval_float;
+		double value_converted;
 		switch (ppmt->type) {
 		case PMT_TYPE_RAW:
 			if (pmt_counter_get_width(ppmt) <= 32)
@@ -2940,8 +3110,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 			break;
 
 		case PMT_TYPE_XTAL_TIME:
+			value_converted = 100.0 * value_raw / crystal_hz / interval_float;
 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
 			break;
+
+		case PMT_TYPE_TCORE_CLOCK:
+			value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
+			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
 		}
 	}
 
@@ -3126,7 +3301,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
 	for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) {
 		const unsigned long value_raw = p->pmt_counter[i];
-		const double value_converted = 100.0 * value_raw / crystal_hz / interval_float;
+		double value_converted;
 		switch (ppmt->type) {
 		case PMT_TYPE_RAW:
 			if (pmt_counter_get_width(ppmt) <= 32)
@@ -3138,8 +3313,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 			break;
 
 		case PMT_TYPE_XTAL_TIME:
+			value_converted = 100.0 * value_raw / crystal_hz / interval_float;
 			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
 			break;
+
+		case PMT_TYPE_TCORE_CLOCK:
+			value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float;
+			outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted);
 		}
 	}
 
@@ -3409,6 +3589,9 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d
 	if (DO_BIC(BIC_IRQ))
 		old->irq_count = new->irq_count - old->irq_count;
 
+	if (DO_BIC(BIC_NMI))
+		old->nmi_count = new->nmi_count - old->nmi_count;
+
 	if (DO_BIC(BIC_SMI))
 		old->smi_count = new->smi_count - old->smi_count;
 
@@ -3447,12 +3630,10 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
 
 	/* always calculate thread delta */
 	retval = delta_thread(t, t2, c2);	/* c2 is core delta */
-	if (retval)
-		return retval;
 
 	/* calculate package delta only for 1st core in package */
 	if (is_cpu_first_core_in_package(t, c, p))
-		retval = delta_package(p, p2);
+		retval |= delta_package(p, p2);
 
 	return retval;
 }
@@ -3489,6 +3670,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	t->instr_count = 0;
 
 	t->irq_count = 0;
+	t->nmi_count = 0;
 	t->smi_count = 0;
 
 	c->c3 = 0;
@@ -3580,7 +3762,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
 	/* remember first tv_begin */
 	if (average.threads.tv_begin.tv_sec == 0)
-		average.threads.tv_begin = t->tv_begin;
+		average.threads.tv_begin = procsysfs_tv_begin;
 
 	/* remember last tv_end */
 	average.threads.tv_end = t->tv_end;
@@ -3593,6 +3775,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 	average.threads.instr_count += t->instr_count;
 
 	average.threads.irq_count += t->irq_count;
+	average.threads.nmi_count += t->nmi_count;
 	average.threads.smi_count += t->smi_count;
 
 	for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
@@ -3734,6 +3917,9 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data
 
 	if (average.threads.irq_count > 9999999)
 		sums_need_wide_columns = 1;
+	if (average.threads.nmi_count > 9999999)
+		sums_need_wide_columns = 1;
+
 
 	average.cores.c3 /= topo.allowed_cores;
 	average.cores.c6 /= topo.allowed_cores;
@@ -4546,7 +4732,8 @@ unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb)
 
 unsigned long pmt_read_counter(struct pmt_counter *ppmt, unsigned int domain_id)
 {
-	assert(domain_id < ppmt->num_domains);
+	if (domain_id >= ppmt->num_domains)
+		return 0;
 
 	const unsigned long *pmmio = ppmt->domains[domain_id].pcounter;
 	const unsigned long value = pmmio ? *pmmio : 0;
@@ -4590,6 +4777,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
 	if (DO_BIC(BIC_IRQ))
 		t->irq_count = irqs_per_cpu[cpu];
+	if (DO_BIC(BIC_NMI))
+		t->nmi_count = nmi_per_cpu[cpu];
 
 	get_cstate_counters(cpu, t, c, p);
 
@@ -5335,6 +5524,7 @@ void free_all_buffers(void)
 
 	free(irq_column_2_cpu);
 	free(irqs_per_cpu);
+	free(nmi_per_cpu);
 
 	for (i = 0; i <= topo.max_cpu_num; ++i) {
 		if (cpus[i].put_ids)
@@ -5566,6 +5756,8 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
 {
 	int retval, pkg_no, node_no, core_no, thread_no;
 
+	retval = 0;
+
 	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
 		for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
 			for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
@@ -5587,14 +5779,12 @@ int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
 					p = GET_PKG(pkg_base, pkg_no);
 					p2 = GET_PKG(pkg_base2, pkg_no);
 
-					retval = func(t, c, p, t2, c2, p2);
-					if (retval)
-						return retval;
+					retval |= func(t, c, p, t2, c2, p2);
 				}
 			}
 		}
 	}
-	return 0;
+	return retval;
 }
 
 /*
@@ -5791,31 +5981,37 @@ int snapshot_proc_interrupts(void)
 
 		irq_column_2_cpu[column] = cpu_number;
 		irqs_per_cpu[cpu_number] = 0;
+		nmi_per_cpu[cpu_number] = 0;
 	}
 
 	/* read /proc/interrupt count lines and sum up irqs per cpu */
 	while (1) {
 		int column;
 		char buf[64];
+		int this_row_is_nmi = 0;
 
-		retval = fscanf(fp, " %s:", buf);	/* flush irq# "N:" */
+		retval = fscanf(fp, " %s:", buf);	/* irq# "N:" */
 		if (retval != 1)
 			break;
 
+		if (strncmp(buf, "NMI", strlen("NMI")) == 0)
+			this_row_is_nmi = 1;
+
 		/* read the count per cpu */
 		for (column = 0; column < topo.num_cpus; ++column) {
 
 			int cpu_number, irq_count;
 
 			retval = fscanf(fp, " %d", &irq_count);
+
 			if (retval != 1)
 				break;
 
 			cpu_number = irq_column_2_cpu[column];
 			irqs_per_cpu[cpu_number] += irq_count;
-
+			if (this_row_is_nmi)
+				nmi_per_cpu[cpu_number] += irq_count;
 		}
-
 		while (getc(fp) != '\n') ;	/* flush interrupt description */
 
 	}
@@ -5912,7 +6108,9 @@ int snapshot_sys_lpi_us(void)
  */
 int snapshot_proc_sysfs_files(void)
 {
-	if (DO_BIC(BIC_IRQ))
+	gettimeofday(&procsysfs_tv_begin, (struct timezone *)NULL);
+
+	if (DO_BIC(BIC_IRQ) || DO_BIC(BIC_NMI))
 		if (snapshot_proc_interrupts())
 			return 1;
 
@@ -7043,6 +7241,11 @@ void rapl_probe_intel(void)
 	else
 		rapl_dram_energy_units = rapl_energy_units;
 
+	if (platform->has_fixed_rapl_psys_unit)
+		rapl_psys_energy_units = 1.0;
+	else
+		rapl_psys_energy_units = rapl_energy_units;
+
 	time_unit = msr >> 16 & 0xF;
 	if (time_unit == 0)
 		time_unit = 0xA;
@@ -8233,6 +8436,7 @@ void process_cpuid()
 		aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1;
 
 	BIC_PRESENT(BIC_IRQ);
+	BIC_PRESENT(BIC_NMI);
 	BIC_PRESENT(BIC_TSC_MHz);
 }
 
@@ -8292,6 +8496,33 @@ int dir_filter(const struct dirent *dirp)
 		return 0;
 }
 
+char *possible_file = "/sys/devices/system/cpu/possible";
+char possible_buf[1024];
+
+int initialize_cpu_possible_set(void)
+{
+	FILE *fp;
+
+	fp = fopen(possible_file, "r");
+	if (!fp) {
+		warn("open %s", possible_file);
+		return -1;
+	}
+	if (fread(possible_buf, sizeof(char), 1024, fp) == 0) {
+		warn("read %s", possible_file);
+		goto err;
+	}
+	if (parse_cpu_str(possible_buf, cpu_possible_set, cpu_possible_setsize)) {
+		warnx("%s: cpu str malformat %s\n", possible_file, cpu_effective_str);
+		goto err;
+	}
+	return 0;
+
+err:
+	fclose(fp);
+	return -1;
+}
+
 void topology_probe(bool startup)
 {
 	int i;
@@ -8324,6 +8555,16 @@ void topology_probe(bool startup)
 	for_all_proc_cpus(mark_cpu_present);
 
 	/*
+	 * Allocate and initialize cpu_possible_set
+	 */
+	cpu_possible_set = CPU_ALLOC((topo.max_cpu_num + 1));
+	if (cpu_possible_set == NULL)
+		err(3, "CPU_ALLOC");
+	cpu_possible_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+	CPU_ZERO_S(cpu_possible_setsize, cpu_possible_set);
+	initialize_cpu_possible_set();
+
+	/*
 	 * Allocate and initialize cpu_effective_set
 	 */
 	cpu_effective_set = CPU_ALLOC((topo.max_cpu_num + 1));
@@ -8583,7 +8824,11 @@ void allocate_irq_buffers(void)
 
 	irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
 	if (irqs_per_cpu == NULL)
-		err(-1, "calloc %d", topo.max_cpu_num + 1);
+		err(-1, "calloc %d IRQ", topo.max_cpu_num + 1);
+
+	nmi_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+	if (nmi_per_cpu == NULL)
+		err(-1, "calloc %d NMI", topo.max_cpu_num + 1);
 }
 
 int update_topo(struct thread_data *t, struct core_data *c, struct pkg_data *p)
@@ -8854,49 +9099,36 @@ int parse_telem_info_file(int fd_dir, const char *info_filename, const char *for
 
 struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
 {
-	DIR *dirp;
-	struct dirent *entry;
+	struct pmt_diriter_t pmt_iter;
+	const struct dirent *entry;
 	struct stat st;
-	unsigned int telem_idx;
 	int fd_telem_dir, fd_pmt;
 	unsigned long guid, size, offset;
 	size_t mmap_size;
 	void *mmio;
-	struct pmt_mmio *ret = NULL;
+	struct pmt_mmio *head = NULL, *last = NULL;
+	struct pmt_mmio *new_pmt = NULL;
 
 	if (stat(SYSFS_TELEM_PATH, &st) == -1)
 		return NULL;
 
-	dirp = opendir(SYSFS_TELEM_PATH);
-	if (dirp == NULL)
+	pmt_diriter_init(&pmt_iter);
+	entry = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH);
+	if (!entry) {
+		pmt_diriter_remove(&pmt_iter);
 		return NULL;
+	}
 
-	for (;;) {
-		entry = readdir(dirp);
-
-		if (entry == NULL)
-			break;
-
-		if (strcmp(entry->d_name, ".") == 0)
-			continue;
-
-		if (strcmp(entry->d_name, "..") == 0)
-			continue;
-
-		if (sscanf(entry->d_name, "telem%u", &telem_idx) != 1)
-			continue;
-
-		if (fstatat(dirfd(dirp), entry->d_name, &st, 0) == -1) {
+	for ( ; entry != NULL; entry = pmt_diriter_next(&pmt_iter)) {
+		if (fstatat(dirfd(pmt_iter.dir), entry->d_name, &st, 0) == -1)
 			break;
-		}
 
 		if (!S_ISDIR(st.st_mode))
 			continue;
 
-		fd_telem_dir = openat(dirfd(dirp), entry->d_name, O_RDONLY);
-		if (fd_telem_dir == -1) {
+		fd_telem_dir = openat(dirfd(pmt_iter.dir), entry->d_name, O_RDONLY);
+		if (fd_telem_dir == -1)
 			break;
-		}
 
 		if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
 			close(fd_telem_dir);
@@ -8924,38 +9156,54 @@ struct pmt_mmio *pmt_mmio_open(unsigned int target_guid)
 		if (fd_pmt == -1)
 			goto loop_cleanup_and_break;
 
-		mmap_size = (size + 0x1000UL) & (~0x1000UL);
+		mmap_size = ROUND_UP_TO_PAGE_SIZE(size);
 		mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0);
 		if (mmio != MAP_FAILED) {
-
 			if (debug)
 				fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio);
 
-			ret = calloc(1, sizeof(*ret));
+			new_pmt = calloc(1, sizeof(*new_pmt));
 
-			if (!ret) {
+			if (!new_pmt) {
 				fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__);
 				exit(1);
 			}
 
-			ret->guid = guid;
-			ret->mmio_base = mmio;
-			ret->pmt_offset = offset;
-			ret->size = size;
+			/*
+			 * Create linked list of mmaped regions,
+			 * but preserve the ordering from sysfs.
+			 * Ordering is important for the user to
+			 * use the seq=%u parameter when adding a counter.
+			 */
+			new_pmt->guid = guid;
+			new_pmt->mmio_base = mmio;
+			new_pmt->pmt_offset = offset;
+			new_pmt->size = size;
+			new_pmt->next = pmt_mmios;
+
+			if (last)
+				last->next = new_pmt;
+			else
+				head = new_pmt;
 
-			ret->next = pmt_mmios;
-			pmt_mmios = ret;
+			last = new_pmt;
 		}
 
 loop_cleanup_and_break:
 		close(fd_pmt);
 		close(fd_telem_dir);
-		break;
 	}
 
-	closedir(dirp);
+	pmt_diriter_remove(&pmt_iter);
 
-	return ret;
+	/*
+	 * If we found something, stick just
+	 * created linked list to the front.
+	 */
+	if (head)
+		pmt_mmios = head;
+
+	return head;
 }
 
 struct pmt_mmio *pmt_mmio_find(unsigned int guid)
@@ -8992,7 +9240,7 @@ void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offs
 	return ret;
 }
 
-struct pmt_mmio *pmt_add_guid(unsigned int guid)
+struct pmt_mmio *pmt_add_guid(unsigned int guid, unsigned int seq)
 {
 	struct pmt_mmio *ret;
 
@@ -9000,6 +9248,11 @@ struct pmt_mmio *pmt_add_guid(unsigned int guid)
 	if (!ret)
 		ret = pmt_mmio_open(guid);
 
+	while (ret && seq) {
+		ret = ret->next;
+		--seq;
+	}
+
 	return ret;
 }
 
@@ -9046,7 +9299,7 @@ void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio,
 	pcounter->domains[domain_id].pcounter = pmmio;
 }
 
-int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type,
+int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum pmt_datatype type,
 		    unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope,
 		    enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode)
 {
@@ -9066,10 +9319,10 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type,
 		exit(1);
 	}
 
-	mmio = pmt_add_guid(guid);
+	mmio = pmt_add_guid(guid, seq);
 	if (!mmio) {
 		if (mode != PMT_OPEN_TRY) {
-			fprintf(stderr, "%s: failed to map PMT MMIO for guid %x\n", __func__, guid);
+			fprintf(stderr, "%s: failed to map PMT MMIO for guid %x, seq %u\n", __func__, guid, seq);
 			exit(1);
 		}
 
@@ -9124,10 +9377,68 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type,
 
 void pmt_init(void)
 {
+	int cpu_num;
+	unsigned long seq, offset, mod_num;
+
 	if (BIC_IS_ENABLED(BIC_Diec6)) {
-		pmt_add_counter(PMT_MTL_DC6_GUID, "Die%c6", PMT_TYPE_XTAL_TIME, PMT_COUNTER_MTL_DC6_LSB,
-				PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, SCOPE_PACKAGE, FORMAT_DELTA,
-				0, PMT_OPEN_TRY);
+		pmt_add_counter(PMT_MTL_DC6_GUID, PMT_MTL_DC6_SEQ, "Die%c6", PMT_TYPE_XTAL_TIME,
+				PMT_COUNTER_MTL_DC6_LSB, PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET,
+				SCOPE_PACKAGE, FORMAT_DELTA, 0, PMT_OPEN_TRY);
+	}
+
+	if (BIC_IS_ENABLED(BIC_CPU_c1e)) {
+		seq = 0;
+		offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
+		mod_num = 0;	/* Relative module number for current PMT file. */
+
+		/* Open the counter for each CPU. */
+		for (cpu_num = 0; cpu_num < topo.max_cpu_num;) {
+
+			if (cpu_is_not_allowed(cpu_num))
+				goto next_loop_iter;
+
+			/*
+			 * Set the scope to CPU, even though CWF report the counter per module.
+			 * CPUs inside the same module will read from the same location, instead of reporting zeros.
+			 *
+			 * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK.
+			 */
+			pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK,
+					PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU,
+					FORMAT_DELTA, cpu_num, PMT_OPEN_TRY);
+
+			/*
+			 * Rather complex logic for each time we go to the next loop iteration,
+			 * so keep it as a label.
+			 */
+next_loop_iter:
+			/*
+			 * Advance the cpu number and check if we should also advance offset to
+			 * the next counter inside the PMT file.
+			 *
+			 * On Clearwater Forest platform, the counter is reported per module,
+			 * so open the same counter for all of the CPUs inside the module.
+			 * That way, reported table show the correct value for all of the CPUs inside the module,
+			 * instead of zeros.
+			 */
+			++cpu_num;
+			if (cpu_num % PMT_COUNTER_CWF_CPUS_PER_MODULE == 0) {
+				offset += PMT_COUNTER_CWF_MC1E_OFFSET_INCREMENT;
+				++mod_num;
+			}
+
+			/*
+			 * There are PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE in each PMT file.
+			 *
+			 * If that number is reached, seq must be incremented to advance to the next file in a sequence.
+			 * Offset inside that file and a module counter has to be reset.
+			 */
+			if (mod_num == PMT_COUNTER_CWF_MC1E_NUM_MODULES_PER_FILE) {
+				++seq;
+				offset = PMT_COUNTER_CWF_MC1E_OFFSET_BASE;
+				mod_num = 0;
+			}
+		}
 	}
 }
 
@@ -9163,6 +9474,18 @@ void turbostat_init()
 	}
 }
 
+void affinitize_child(void)
+{
+	/* Prefer cpu_possible_set, if available */
+	if (sched_setaffinity(0, cpu_possible_setsize, cpu_possible_set)) {
+		warn("sched_setaffinity cpu_possible_set");
+
+		/* Otherwise, allow child to run on same cpu set as turbostat */
+		if (sched_setaffinity(0, cpu_allowed_setsize, cpu_allowed_set))
+			warn("sched_setaffinity cpu_allowed_set");
+	}
+}
+
 int fork_it(char **argv)
 {
 	pid_t child_pid;
@@ -9178,6 +9501,7 @@ int fork_it(char **argv)
 	child_pid = fork();
 	if (!child_pid) {
 		/* child */
+		affinitize_child();
 		execvp(argv[0], argv);
 		err(errno, "exec %s", argv[0]);
 	} else {
@@ -9204,10 +9528,9 @@ int fork_it(char **argv)
 	timersub(&tv_odd, &tv_even, &tv_delta);
 	if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
 		fprintf(outf, "%s: Counter reset detected\n", progname);
-	else {
-		compute_average(EVEN_COUNTERS);
-		format_all_counters(EVEN_COUNTERS);
-	}
+
+	compute_average(EVEN_COUNTERS);
+	format_all_counters(EVEN_COUNTERS);
 
 	fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0);
 
@@ -9236,7 +9559,7 @@ int get_and_dump_counters(void)
 
 void print_version()
 {
-	fprintf(outf, "turbostat version 2024.11.30 - Len Brown <lenb@kernel.org>\n");
+	fprintf(outf, "turbostat version 2025.02.02 - Len Brown <lenb@kernel.org>\n");
 }
 
 #define COMMAND_LINE_SIZE 2048
@@ -9561,7 +9884,7 @@ next:
 
 	}
 	if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) {
-		fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event ) required\n");
+		fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event) required\n");
 		fail++;
 	}
 
@@ -9599,15 +9922,100 @@ bool starts_with(const char *str, const char *prefix)
 	return strncmp(prefix, str, strlen(prefix)) == 0;
 }
 
+int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigned int *out_seq)
+{
+	struct pmt_diriter_t pmt_iter;
+	const struct dirent *dirname;
+	struct stat stat, target_stat;
+	int fd_telem_dir = -1;
+	int fd_target_dir;
+	unsigned int seq = 0;
+	unsigned long guid, target_guid;
+	int ret = -1;
+
+	fd_target_dir = open(target_path, O_RDONLY | O_DIRECTORY);
+	if (fd_target_dir == -1) {
+		return -1;
+	}
+
+	if (fstat(fd_target_dir, &target_stat) == -1) {
+		fprintf(stderr, "%s: Failed to stat the target: %s", __func__, strerror(errno));
+		exit(1);
+	}
+
+	if (parse_telem_info_file(fd_target_dir, "guid", "%lx", &target_guid)) {
+		fprintf(stderr, "%s: Failed to parse the target guid file: %s", __func__, strerror(errno));
+		exit(1);
+	}
+
+	close(fd_target_dir);
+
+	pmt_diriter_init(&pmt_iter);
+
+	for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL;
+	     dirname = pmt_diriter_next(&pmt_iter)) {
+
+		fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY);
+		if (fd_telem_dir == -1)
+			continue;
+
+		if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) {
+			fprintf(stderr, "%s: Failed to parse the guid file: %s", __func__, strerror(errno));
+			continue;
+		}
+
+		if (fstat(fd_telem_dir, &stat) == -1) {
+			fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__,
+				dirname->d_name, strerror(errno));
+			continue;
+		}
+
+		/*
+		 * If reached the same directory as target, exit the loop.
+		 * Seq has the correct value now.
+		 */
+		if (stat.st_dev == target_stat.st_dev && stat.st_ino == target_stat.st_ino) {
+			ret = 0;
+			break;
+		}
+
+		/*
+		 * If reached directory with the same guid,
+		 * but it's not the target directory yet,
+		 * increment seq and continue the search.
+		 */
+		if (guid == target_guid)
+			++seq;
+
+		close(fd_telem_dir);
+		fd_telem_dir = -1;
+	}
+
+	pmt_diriter_remove(&pmt_iter);
+
+	if (fd_telem_dir != -1)
+		close(fd_telem_dir);
+
+	if (!ret) {
+		*out_guid = target_guid;
+		*out_seq = seq;
+	}
+
+	return ret;
+}
+
 void parse_add_command_pmt(char *add_command)
 {
 	char *name = NULL;
 	char *type_name = NULL;
 	char *format_name = NULL;
+	char *direct_path = NULL;
+	static const char direct_path_prefix[] = "path=";
 	unsigned int offset;
 	unsigned int lsb;
 	unsigned int msb;
 	unsigned int guid;
+	unsigned int seq = 0; /* By default, pick first file in a sequence with a given GUID. */
 	unsigned int domain_id;
 	enum counter_scope scope = 0;
 	enum pmt_datatype type = PMT_TYPE_RAW;
@@ -9687,6 +10095,13 @@ void parse_add_command_pmt(char *add_command)
 			goto next;
 		}
 
+		if (sscanf(add_command, "seq=%x", &seq) == 1)
+			goto next;
+
+		if (strncmp(add_command, direct_path_prefix, strlen(direct_path_prefix)) == 0) {
+			direct_path = add_command + strlen(direct_path_prefix);
+			goto next;
+		}
 next:
 		add_command = strchr(add_command, ',');
 		if (add_command) {
@@ -9737,6 +10152,11 @@ next:
 			has_type = true;
 		}
 
+		if (strcmp("tcore_clock", type_name) == 0) {
+			type = PMT_TYPE_TCORE_CLOCK;
+			has_type = true;
+		}
+
 		if (!has_type) {
 			printf("%s: invalid %s: %s\n", __func__, "type", type_name);
 			exit(1);
@@ -9758,8 +10178,24 @@ next:
 		exit(1);
 	}
 
+	if (direct_path && has_guid) {
+		printf("%s: path and guid+seq parameters are mutually exclusive\n"
+		       "notice: passed guid=0x%x and path=%s\n", __func__, guid, direct_path);
+		exit(1);
+	}
+
+	if (direct_path) {
+		if (pmt_parse_from_path(direct_path, &guid, &seq)) {
+			printf("%s: failed to parse PMT file from %s\n", __func__, direct_path);
+			exit(1);
+		}
+
+		/* GUID was just infered from the direct path. */
+		has_guid = true;
+	}
+
 	if (!has_guid) {
-		printf("%s: missing %s\n", __func__, "guid");
+		printf("%s: missing %s\n", __func__, "guid or path");
 		exit(1);
 	}
 
@@ -9773,7 +10209,7 @@ next:
 		exit(1);
 	}
 
-	pmt_add_counter(guid, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED);
+	pmt_add_counter(guid, seq, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED);
 }
 
 void parse_add_command(char *add_command)
@@ -9921,6 +10357,7 @@ void cmdline(int argc, char **argv)
 		{ "Dump", no_argument, 0, 'D' },
 		{ "debug", no_argument, 0, 'd' },	/* internal, not documented */
 		{ "enable", required_argument, 0, 'e' },
+		{ "force", no_argument, 0, 'f' },
 		{ "interval", required_argument, 0, 'i' },
 		{ "IPC", no_argument, 0, 'I' },
 		{ "num_iterations", required_argument, 0, 'n' },
@@ -9981,6 +10418,9 @@ void cmdline(int argc, char **argv)
 			/* --enable specified counter */
 			bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
 			break;
+		case 'f':
+			force_load++;
+			break;
 		case 'd':
 			debug++;
 			ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
index 625f5b046776..dc4333d23189 100644
--- a/tools/sched_ext/include/scx/common.bpf.h
+++ b/tools/sched_ext/include/scx/common.bpf.h
@@ -7,9 +7,16 @@
 #ifndef __SCX_COMMON_BPF_H
 #define __SCX_COMMON_BPF_H
 
+/*
+ * The generated kfunc prototypes in vmlinux.h are missing address space
+ * attributes which cause build failures. For now, suppress the generated
+ * prototypes. See https://github.com/sched-ext/scx/issues/1111.
+ */
+#define BPF_NO_KFUNC_PROTOTYPES
+
 #ifdef LSP
 #define __bpf__
-#include "../vmlinux/vmlinux.h"
+#include "../vmlinux.h"
 #else
 #include "vmlinux.h"
 #endif
@@ -18,12 +25,17 @@
 #include <bpf/bpf_tracing.h>
 #include <asm-generic/errno.h>
 #include "user_exit_info.h"
+#include "enum_defs.autogen.h"
 
 #define PF_WQ_WORKER			0x00000020	/* I'm a workqueue worker */
 #define PF_KTHREAD			0x00200000	/* I am a kernel thread */
 #define PF_EXITING			0x00000004
 #define CLOCK_MONOTONIC			1
 
+extern int LINUX_KERNEL_VERSION __kconfig;
+extern const char CONFIG_CC_VERSION_TEXT[64] __kconfig __weak;
+extern const char CONFIG_LOCALVERSION[64] __kconfig __weak;
+
 /*
  * Earlier versions of clang/pahole lost upper 32bits in 64bit enums which can
  * lead to really confusing misbehaviors. Let's trigger a build failure.
@@ -58,20 +70,28 @@ void scx_bpf_dump_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym
 u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym __weak;
 u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym __weak;
 void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym __weak;
+u32 scx_bpf_nr_node_ids(void) __ksym __weak;
 u32 scx_bpf_nr_cpu_ids(void) __ksym __weak;
+int scx_bpf_cpu_node(s32 cpu) __ksym __weak;
 const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym __weak;
 const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym __weak;
 void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym __weak;
+const struct cpumask *scx_bpf_get_idle_cpumask_node(int node) __ksym __weak;
 const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym;
+const struct cpumask *scx_bpf_get_idle_smtmask_node(int node) __ksym __weak;
 const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym;
 void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym;
 bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym;
+s32 scx_bpf_pick_idle_cpu_node(const cpumask_t *cpus_allowed, int node, u64 flags) __ksym __weak;
 s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
+s32 scx_bpf_pick_any_cpu_node(const cpumask_t *cpus_allowed, int node, u64 flags) __ksym __weak;
 s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
 bool scx_bpf_task_running(const struct task_struct *p) __ksym;
 s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
 struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
 struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;
+u64 scx_bpf_now(void) __ksym __weak;
+void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __weak;
 
 /*
  * Use the following as @it__iter when calling scx_bpf_dsq_move[_vtime]() from
@@ -79,6 +99,9 @@ struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;
  */
 #define BPF_FOR_EACH_ITER	(&___it)
 
+#define scx_read_event(e, name)							\
+	(bpf_core_field_exists((e)->name) ? (e)->name : 0)
+
 static inline __attribute__((format(printf, 1, 2)))
 void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
 
@@ -98,7 +121,7 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
 	_Pragma("GCC diagnostic push")						\
 	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")			\
 	___bpf_fill(___param, args);						\
-	_Pragma("GCC diagnostic pop")						\
+	_Pragma("GCC diagnostic pop")
 
 /*
  * scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments
@@ -136,6 +159,20 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
 	___scx_bpf_bstr_format_checker(fmt, ##args);				\
 })
 
+/*
+ * scx_bpf_dump_header() is a wrapper around scx_bpf_dump that adds a header
+ * of system information for debugging.
+ */
+#define scx_bpf_dump_header()							\
+({										\
+	scx_bpf_dump("kernel: %d.%d.%d %s\ncc: %s\n",				\
+		     LINUX_KERNEL_VERSION >> 16,				\
+		     LINUX_KERNEL_VERSION >> 8 & 0xFF,				\
+		     LINUX_KERNEL_VERSION & 0xFF,				\
+		     CONFIG_LOCALVERSION,					\
+		     CONFIG_CC_VERSION_TEXT);					\
+})
+
 #define BPF_STRUCT_OPS(name, args...)						\
 SEC("struct_ops/"#name)								\
 BPF_PROG(name, ##args)
@@ -251,8 +288,16 @@ void bpf_obj_drop_impl(void *kptr, void *meta) __ksym;
 #define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL))
 #define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL)
 
-void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym;
-void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym;
+int bpf_list_push_front_impl(struct bpf_list_head *head,
+				    struct bpf_list_node *node,
+				    void *meta, __u64 off) __ksym;
+#define bpf_list_push_front(head, node) bpf_list_push_front_impl(head, node, NULL, 0)
+
+int bpf_list_push_back_impl(struct bpf_list_head *head,
+				   struct bpf_list_node *node,
+				   void *meta, __u64 off) __ksym;
+#define bpf_list_push_back(head, node) bpf_list_push_back_impl(head, node, NULL, 0)
+
 struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym;
 struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym;
 struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
@@ -317,6 +362,66 @@ u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
 				   const struct cpumask *src2) __ksym;
 u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym;
 
+int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words) __ksym;
+int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym;
+void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym;
+
+#define def_iter_struct(name)							\
+struct bpf_iter_##name {							\
+    struct bpf_iter_bits it;							\
+    const struct cpumask *bitmap;						\
+};
+
+#define def_iter_new(name)							\
+static inline int bpf_iter_##name##_new(					\
+	struct bpf_iter_##name *it, const u64 *unsafe_ptr__ign, u32 nr_words)	\
+{										\
+	it->bitmap = scx_bpf_get_##name##_cpumask();				\
+	return bpf_iter_bits_new(&it->it, (const u64 *)it->bitmap,		\
+				 sizeof(struct cpumask) / 8);			\
+}
+
+#define def_iter_next(name)							\
+static inline int *bpf_iter_##name##_next(struct bpf_iter_##name *it) {		\
+	return bpf_iter_bits_next(&it->it);					\
+}
+
+#define def_iter_destroy(name)							\
+static inline void bpf_iter_##name##_destroy(struct bpf_iter_##name *it) {	\
+	scx_bpf_put_cpumask(it->bitmap);					\
+	bpf_iter_bits_destroy(&it->it);						\
+}
+#define def_for_each_cpu(cpu, name) for_each_##name##_cpu(cpu)
+
+/// Provides iterator for possible and online cpus.
+///
+/// # Example
+///
+/// ```
+/// static inline void example_use() {
+///     int *cpu;
+///
+///     for_each_possible_cpu(cpu){
+///         bpf_printk("CPU %d is possible", *cpu);
+///     }
+///
+///     for_each_online_cpu(cpu){
+///         bpf_printk("CPU %d is online", *cpu);
+///     }
+/// }
+/// ```
+def_iter_struct(possible);
+def_iter_new(possible);
+def_iter_next(possible);
+def_iter_destroy(possible);
+#define for_each_possible_cpu(cpu) bpf_for_each(possible, cpu, NULL, 0)
+
+def_iter_struct(online);
+def_iter_new(online);
+def_iter_next(online);
+def_iter_destroy(online);
+#define for_each_online_cpu(cpu) bpf_for_each(online, cpu, NULL, 0)
+
 /*
  * Access a cpumask in read-only mode (typically to check bits).
  */
@@ -325,10 +430,115 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask)
 	return (const struct cpumask *)mask;
 }
 
+/*
+ * Return true if task @p cannot migrate to a different CPU, false
+ * otherwise.
+ */
+static inline bool is_migration_disabled(const struct task_struct *p)
+{
+	if (bpf_core_field_exists(p->migration_disabled))
+		return p->migration_disabled;
+	return false;
+}
+
 /* rcu */
 void bpf_rcu_read_lock(void) __ksym;
 void bpf_rcu_read_unlock(void) __ksym;
 
+/*
+ * Time helpers, most of which are from jiffies.h.
+ */
+
+/**
+ * time_delta - Calculate the delta between new and old time stamp
+ * @after: first comparable as u64
+ * @before: second comparable as u64
+ *
+ * Return: the time difference, which is >= 0
+ */
+static inline s64 time_delta(u64 after, u64 before)
+{
+	return (s64)(after - before) > 0 ? (s64)(after - before) : 0;
+}
+
+/**
+ * time_after - returns true if the time a is after time b.
+ * @a: first comparable as u64
+ * @b: second comparable as u64
+ *
+ * Do this with "<0" and ">=0" to only test the sign of the result. A
+ * good compiler would generate better code (and a really good compiler
+ * wouldn't care). Gcc is currently neither.
+ *
+ * Return: %true is time a is after time b, otherwise %false.
+ */
+static inline bool time_after(u64 a, u64 b)
+{
+	 return (s64)(b - a) < 0;
+}
+
+/**
+ * time_before - returns true if the time a is before time b.
+ * @a: first comparable as u64
+ * @b: second comparable as u64
+ *
+ * Return: %true is time a is before time b, otherwise %false.
+ */
+static inline bool time_before(u64 a, u64 b)
+{
+	return time_after(b, a);
+}
+
+/**
+ * time_after_eq - returns true if the time a is after or the same as time b.
+ * @a: first comparable as u64
+ * @b: second comparable as u64
+ *
+ * Return: %true is time a is after or the same as time b, otherwise %false.
+ */
+static inline bool time_after_eq(u64 a, u64 b)
+{
+	 return (s64)(a - b) >= 0;
+}
+
+/**
+ * time_before_eq - returns true if the time a is before or the same as time b.
+ * @a: first comparable as u64
+ * @b: second comparable as u64
+ *
+ * Return: %true is time a is before or the same as time b, otherwise %false.
+ */
+static inline bool time_before_eq(u64 a, u64 b)
+{
+	return time_after_eq(b, a);
+}
+
+/**
+ * time_in_range - Calculate whether a is in the range of [b, c].
+ * @a: time to test
+ * @b: beginning of the range
+ * @c: end of the range
+ *
+ * Return: %true is time a is in the range [b, c], otherwise %false.
+ */
+static inline bool time_in_range(u64 a, u64 b, u64 c)
+{
+	return time_after_eq(a, b) && time_before_eq(a, c);
+}
+
+/**
+ * time_in_range_open - Calculate whether a is in the range of [b, c).
+ * @a: time to test
+ * @b: beginning of the range
+ * @c: end of the range
+ *
+ * Return: %true is time a is in the range [b, c), otherwise %false.
+ */
+static inline bool time_in_range_open(u64 a, u64 b, u64 c)
+{
+	return time_after_eq(a, b) && time_before(a, c);
+}
+
 
 /*
  * Other helpers
@@ -392,6 +602,22 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 	__u.__val;					\
 })
 
+#define READ_ONCE_ARENA(type, x)				\
+({								\
+	union { type __val; char __c[1]; } __u =		\
+		{ .__c = { 0 } };				\
+	__read_once_size((void *)&(x), __u.__c, sizeof(x));	\
+	__u.__val;						\
+})
+
+#define WRITE_ONCE_ARENA(type, x, val)				\
+({								\
+	union { type __val; char __c[1]; } __u =		\
+		{ .__val = (val) }; 				\
+	__write_once_size((void *)&(x), __u.__c, sizeof(x));	\
+	__u.__val;						\
+})
+
 /*
  * log2_u32 - Compute the base 2 logarithm of a 32-bit exponential value.
  * @v: The value for which we're computing the base 2 logarithm.
@@ -423,5 +649,6 @@ static inline u32 log2_u64(u64 v)
 }
 
 #include "compat.bpf.h"
+#include "enums.bpf.h"
 
 #endif	/* __SCX_COMMON_BPF_H */
diff --git a/tools/sched_ext/include/scx/common.h b/tools/sched_ext/include/scx/common.h
index 5b0f90152152..1dc76bd84296 100644
--- a/tools/sched_ext/include/scx/common.h
+++ b/tools/sched_ext/include/scx/common.h
@@ -16,6 +16,7 @@
 #include <stdlib.h>
 #include <stdint.h>
 #include <errno.h>
+#include "enum_defs.autogen.h"
 
 typedef uint8_t u8;
 typedef uint16_t u16;
@@ -71,5 +72,11 @@ typedef int64_t s64;
 
 #include "user_exit_info.h"
 #include "compat.h"
+#include "enums.h"
+
+/* not available when building kernel tools/sched_ext */
+#if __has_include(<lib/sdt_task.h>)
+#include <lib/sdt_task.h>
+#endif
 
 #endif	/* __SCHED_EXT_COMMON_H */
diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h
index d56520100a26..9252e1a00556 100644
--- a/tools/sched_ext/include/scx/compat.bpf.h
+++ b/tools/sched_ext/include/scx/compat.bpf.h
@@ -125,6 +125,106 @@ bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter,
 	false;									\
 })
 
+/**
+ * __COMPAT_is_enq_cpu_selected - Test if SCX_ENQ_CPU_SELECTED is on
+ * in a compatible way. We will preserve this __COMPAT helper until v6.16.
+ *
+ * @enq_flags: enqueue flags from ops.enqueue()
+ *
+ * Return: True if SCX_ENQ_CPU_SELECTED is turned on in @enq_flags
+ */
+static inline bool __COMPAT_is_enq_cpu_selected(u64 enq_flags)
+{
+#ifdef HAVE_SCX_ENQ_CPU_SELECTED
+	/*
+	 * This is the case that a BPF code compiled against vmlinux.h
+	 * where the enum SCX_ENQ_CPU_SELECTED exists.
+	 */
+
+	/*
+	 * We should temporarily suspend the macro expansion of
+	 * 'SCX_ENQ_CPU_SELECTED'. This avoids 'SCX_ENQ_CPU_SELECTED' being
+	 * rewritten to '__SCX_ENQ_CPU_SELECTED' when 'SCX_ENQ_CPU_SELECTED'
+	 * is defined in 'scripts/gen_enums.py'.
+	 */
+#pragma push_macro("SCX_ENQ_CPU_SELECTED")
+#undef SCX_ENQ_CPU_SELECTED
+	u64 flag;
+
+	/*
+	 * When the kernel did not have SCX_ENQ_CPU_SELECTED,
+	 * select_task_rq_scx() has never been skipped. Thus, this case
+	 * should be considered that the CPU has already been selected.
+	 */
+	if (!bpf_core_enum_value_exists(enum scx_enq_flags,
+					SCX_ENQ_CPU_SELECTED))
+		return true;
+
+	flag = bpf_core_enum_value(enum scx_enq_flags, SCX_ENQ_CPU_SELECTED);
+	return enq_flags & flag;
+
+	/*
+	 * Once done, resume the macro expansion of 'SCX_ENQ_CPU_SELECTED'.
+	 */
+#pragma pop_macro("SCX_ENQ_CPU_SELECTED")
+#else
+	/*
+	 * This is the case that a BPF code compiled against vmlinux.h
+	 * where the enum SCX_ENQ_CPU_SELECTED does NOT exist.
+	 */
+	return true;
+#endif /* HAVE_SCX_ENQ_CPU_SELECTED */
+}
+
+
+#define scx_bpf_now()								\
+	(bpf_ksym_exists(scx_bpf_now) ?						\
+	 scx_bpf_now() :							\
+	 bpf_ktime_get_ns())
+
+/*
+ * v6.15: Introduce event counters.
+ *
+ * Preserve the following macro until v6.17.
+ */
+#define __COMPAT_scx_bpf_events(events, size)					\
+	(bpf_ksym_exists(scx_bpf_events) ?					\
+	 scx_bpf_events(events, size) : ({}))
+
+/*
+ * v6.15: Introduce NUMA-aware kfuncs to operate with per-node idle
+ * cpumasks.
+ *
+ * Preserve the following __COMPAT_scx_*_node macros until v6.17.
+ */
+#define __COMPAT_scx_bpf_nr_node_ids()						\
+	(bpf_ksym_exists(scx_bpf_nr_node_ids) ?					\
+	 scx_bpf_nr_node_ids() : 1U)
+
+#define __COMPAT_scx_bpf_cpu_node(cpu)						\
+	(bpf_ksym_exists(scx_bpf_cpu_node) ?					\
+	 scx_bpf_cpu_node(cpu) : 0)
+
+#define __COMPAT_scx_bpf_get_idle_cpumask_node(node)				\
+	(bpf_ksym_exists(scx_bpf_get_idle_cpumask_node) ?			\
+	 scx_bpf_get_idle_cpumask_node(node) :					\
+	 scx_bpf_get_idle_cpumask())						\
+
+#define __COMPAT_scx_bpf_get_idle_smtmask_node(node)				\
+	(bpf_ksym_exists(scx_bpf_get_idle_smtmask_node) ?			\
+	 scx_bpf_get_idle_smtmask_node(node) :					\
+	 scx_bpf_get_idle_smtmask())
+
+#define __COMPAT_scx_bpf_pick_idle_cpu_node(cpus_allowed, node, flags)		\
+	(bpf_ksym_exists(scx_bpf_pick_idle_cpu_node) ?				\
+	 scx_bpf_pick_idle_cpu_node(cpus_allowed, node, flags) :		\
+	 scx_bpf_pick_idle_cpu(cpus_allowed, flags))
+
+#define __COMPAT_scx_bpf_pick_any_cpu_node(cpus_allowed, node, flags)		\
+	(bpf_ksym_exists(scx_bpf_pick_any_cpu_node) ?				\
+	 scx_bpf_pick_any_cpu_node(cpus_allowed, node, flags) :			\
+	 scx_bpf_pick_any_cpu(cpus_allowed, flags))
+
 /*
  * Define sched_ext_ops. This may be expanded to define multiple variants for
  * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h
index cc56ff9aa252..35c67c5174ac 100644
--- a/tools/sched_ext/include/scx/compat.h
+++ b/tools/sched_ext/include/scx/compat.h
@@ -106,8 +106,20 @@ static inline bool __COMPAT_struct_has_field(const char *type, const char *field
 	return false;
 }
 
-#define SCX_OPS_SWITCH_PARTIAL							\
-	__COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL")
+#define SCX_OPS_FLAG(name) __COMPAT_ENUM_OR_ZERO("scx_ops_flags", #name)
+
+#define SCX_OPS_KEEP_BUILTIN_IDLE SCX_OPS_FLAG(SCX_OPS_KEEP_BUILTIN_IDLE)
+#define SCX_OPS_ENQ_LAST SCX_OPS_FLAG(SCX_OPS_ENQ_LAST)
+#define SCX_OPS_ENQ_EXITING  SCX_OPS_FLAG(SCX_OPS_ENQ_EXITING)
+#define SCX_OPS_SWITCH_PARTIAL SCX_OPS_FLAG(SCX_OPS_SWITCH_PARTIAL)
+#define SCX_OPS_ENQ_MIGRATION_DISABLED SCX_OPS_FLAG(SCX_OPS_ENQ_MIGRATION_DISABLED)
+#define SCX_OPS_ALLOW_QUEUED_WAKEUP SCX_OPS_FLAG(SCX_OPS_ALLOW_QUEUED_WAKEUP)
+#define SCX_OPS_BUILTIN_IDLE_PER_NODE SCX_OPS_FLAG(SCX_OPS_BUILTIN_IDLE_PER_NODE)
+
+#define SCX_PICK_IDLE_FLAG(name) __COMPAT_ENUM_OR_ZERO("scx_pick_idle_cpu_flags", #name)
+
+#define SCX_PICK_IDLE_CORE SCX_PICK_IDLE_FLAG(SCX_PICK_IDLE_CORE)
+#define SCX_PICK_IDLE_IN_NODE SCX_PICK_IDLE_FLAG(SCX_PICK_IDLE_IN_NODE)
 
 static inline long scx_hotplug_seq(void)
 {
@@ -149,6 +161,7 @@ static inline long scx_hotplug_seq(void)
 	__skel = __scx_name##__open();						\
 	SCX_BUG_ON(!__skel, "Could not open " #__scx_name);			\
 	__skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq();		\
+	SCX_ENUM_INIT(__skel);							\
 	__skel; 								\
 })
 
diff --git a/tools/sched_ext/include/scx/enum_defs.autogen.h b/tools/sched_ext/include/scx/enum_defs.autogen.h
new file mode 100644
index 000000000000..6e6c45f14fe1
--- /dev/null
+++ b/tools/sched_ext/include/scx/enum_defs.autogen.h
@@ -0,0 +1,120 @@
+/*
+ * WARNING: This file is autogenerated from gen_enum_defs.py [1].
+ *
+ * [1] https://github.com/sched-ext/scx/blob/main/scripts/gen_enum_defs.py
+ */
+
+#ifndef __ENUM_DEFS_AUTOGEN_H__
+#define __ENUM_DEFS_AUTOGEN_H__
+
+#define HAVE_SCX_DSP_DFL_MAX_BATCH
+#define HAVE_SCX_DSP_MAX_LOOPS
+#define HAVE_SCX_WATCHDOG_MAX_TIMEOUT
+#define HAVE_SCX_EXIT_BT_LEN
+#define HAVE_SCX_EXIT_MSG_LEN
+#define HAVE_SCX_EXIT_DUMP_DFL_LEN
+#define HAVE_SCX_CPUPERF_ONE
+#define HAVE_SCX_OPS_TASK_ITER_BATCH
+#define HAVE_SCX_CPU_PREEMPT_RT
+#define HAVE_SCX_CPU_PREEMPT_DL
+#define HAVE_SCX_CPU_PREEMPT_STOP
+#define HAVE_SCX_CPU_PREEMPT_UNKNOWN
+#define HAVE_SCX_DEQ_SLEEP
+#define HAVE_SCX_DEQ_CORE_SCHED_EXEC
+#define HAVE_SCX_DSQ_FLAG_BUILTIN
+#define HAVE_SCX_DSQ_FLAG_LOCAL_ON
+#define HAVE_SCX_DSQ_INVALID
+#define HAVE_SCX_DSQ_GLOBAL
+#define HAVE_SCX_DSQ_LOCAL
+#define HAVE_SCX_DSQ_LOCAL_ON
+#define HAVE_SCX_DSQ_LOCAL_CPU_MASK
+#define HAVE_SCX_DSQ_ITER_REV
+#define HAVE___SCX_DSQ_ITER_HAS_SLICE
+#define HAVE___SCX_DSQ_ITER_HAS_VTIME
+#define HAVE___SCX_DSQ_ITER_USER_FLAGS
+#define HAVE___SCX_DSQ_ITER_ALL_FLAGS
+#define HAVE_SCX_DSQ_LNODE_ITER_CURSOR
+#define HAVE___SCX_DSQ_LNODE_PRIV_SHIFT
+#define HAVE_SCX_ENQ_WAKEUP
+#define HAVE_SCX_ENQ_HEAD
+#define HAVE_SCX_ENQ_CPU_SELECTED
+#define HAVE_SCX_ENQ_PREEMPT
+#define HAVE_SCX_ENQ_REENQ
+#define HAVE_SCX_ENQ_LAST
+#define HAVE___SCX_ENQ_INTERNAL_MASK
+#define HAVE_SCX_ENQ_CLEAR_OPSS
+#define HAVE_SCX_ENQ_DSQ_PRIQ
+#define HAVE_SCX_TASK_DSQ_ON_PRIQ
+#define HAVE_SCX_TASK_QUEUED
+#define HAVE_SCX_TASK_RESET_RUNNABLE_AT
+#define HAVE_SCX_TASK_DEQD_FOR_SLEEP
+#define HAVE_SCX_TASK_STATE_SHIFT
+#define HAVE_SCX_TASK_STATE_BITS
+#define HAVE_SCX_TASK_STATE_MASK
+#define HAVE_SCX_TASK_CURSOR
+#define HAVE_SCX_ECODE_RSN_HOTPLUG
+#define HAVE_SCX_ECODE_ACT_RESTART
+#define HAVE_SCX_EXIT_NONE
+#define HAVE_SCX_EXIT_DONE
+#define HAVE_SCX_EXIT_UNREG
+#define HAVE_SCX_EXIT_UNREG_BPF
+#define HAVE_SCX_EXIT_UNREG_KERN
+#define HAVE_SCX_EXIT_SYSRQ
+#define HAVE_SCX_EXIT_ERROR
+#define HAVE_SCX_EXIT_ERROR_BPF
+#define HAVE_SCX_EXIT_ERROR_STALL
+#define HAVE_SCX_KF_UNLOCKED
+#define HAVE_SCX_KF_CPU_RELEASE
+#define HAVE_SCX_KF_DISPATCH
+#define HAVE_SCX_KF_ENQUEUE
+#define HAVE_SCX_KF_SELECT_CPU
+#define HAVE_SCX_KF_REST
+#define HAVE___SCX_KF_RQ_LOCKED
+#define HAVE___SCX_KF_TERMINAL
+#define HAVE_SCX_KICK_IDLE
+#define HAVE_SCX_KICK_PREEMPT
+#define HAVE_SCX_KICK_WAIT
+#define HAVE_SCX_OPI_BEGIN
+#define HAVE_SCX_OPI_NORMAL_BEGIN
+#define HAVE_SCX_OPI_NORMAL_END
+#define HAVE_SCX_OPI_CPU_HOTPLUG_BEGIN
+#define HAVE_SCX_OPI_CPU_HOTPLUG_END
+#define HAVE_SCX_OPI_END
+#define HAVE_SCX_OPS_ENABLING
+#define HAVE_SCX_OPS_ENABLED
+#define HAVE_SCX_OPS_DISABLING
+#define HAVE_SCX_OPS_DISABLED
+#define HAVE_SCX_OPS_KEEP_BUILTIN_IDLE
+#define HAVE_SCX_OPS_ENQ_LAST
+#define HAVE_SCX_OPS_ENQ_EXITING
+#define HAVE_SCX_OPS_SWITCH_PARTIAL
+#define HAVE_SCX_OPS_HAS_CGROUP_WEIGHT
+#define HAVE_SCX_OPS_ALL_FLAGS
+#define HAVE_SCX_OPSS_NONE
+#define HAVE_SCX_OPSS_QUEUEING
+#define HAVE_SCX_OPSS_QUEUED
+#define HAVE_SCX_OPSS_DISPATCHING
+#define HAVE_SCX_OPSS_QSEQ_SHIFT
+#define HAVE_SCX_PICK_IDLE_CORE
+#define HAVE_SCX_OPS_NAME_LEN
+#define HAVE_SCX_SLICE_DFL
+#define HAVE_SCX_SLICE_INF
+#define HAVE_SCX_RQ_ONLINE
+#define HAVE_SCX_RQ_CAN_STOP_TICK
+#define HAVE_SCX_RQ_BAL_PENDING
+#define HAVE_SCX_RQ_BAL_KEEP
+#define HAVE_SCX_RQ_BYPASSING
+#define HAVE_SCX_RQ_IN_WAKEUP
+#define HAVE_SCX_RQ_IN_BALANCE
+#define HAVE_SCX_TASK_NONE
+#define HAVE_SCX_TASK_INIT
+#define HAVE_SCX_TASK_READY
+#define HAVE_SCX_TASK_ENABLED
+#define HAVE_SCX_TASK_NR_STATES
+#define HAVE_SCX_TG_ONLINE
+#define HAVE_SCX_TG_INITED
+#define HAVE_SCX_WAKE_FORK
+#define HAVE_SCX_WAKE_TTWU
+#define HAVE_SCX_WAKE_SYNC
+
+#endif /* __ENUM_DEFS_AUTOGEN_H__ */
diff --git a/tools/sched_ext/include/scx/enums.autogen.bpf.h b/tools/sched_ext/include/scx/enums.autogen.bpf.h
new file mode 100644
index 000000000000..0e941a0d6f88
--- /dev/null
+++ b/tools/sched_ext/include/scx/enums.autogen.bpf.h
@@ -0,0 +1,105 @@
+/*
+ * WARNING: This file is autogenerated from scripts/gen_enums.py. If you would
+ * like to access an enum that is currently missing, add it to the script
+ * and run it from the root directory to update this file.
+ */
+
+const volatile u64 __SCX_OPS_NAME_LEN __weak;
+#define SCX_OPS_NAME_LEN __SCX_OPS_NAME_LEN
+
+const volatile u64 __SCX_SLICE_DFL __weak;
+#define SCX_SLICE_DFL __SCX_SLICE_DFL
+
+const volatile u64 __SCX_SLICE_INF __weak;
+#define SCX_SLICE_INF __SCX_SLICE_INF
+
+const volatile u64 __SCX_DSQ_FLAG_BUILTIN __weak;
+#define SCX_DSQ_FLAG_BUILTIN __SCX_DSQ_FLAG_BUILTIN
+
+const volatile u64 __SCX_DSQ_FLAG_LOCAL_ON __weak;
+#define SCX_DSQ_FLAG_LOCAL_ON __SCX_DSQ_FLAG_LOCAL_ON
+
+const volatile u64 __SCX_DSQ_INVALID __weak;
+#define SCX_DSQ_INVALID __SCX_DSQ_INVALID
+
+const volatile u64 __SCX_DSQ_GLOBAL __weak;
+#define SCX_DSQ_GLOBAL __SCX_DSQ_GLOBAL
+
+const volatile u64 __SCX_DSQ_LOCAL __weak;
+#define SCX_DSQ_LOCAL __SCX_DSQ_LOCAL
+
+const volatile u64 __SCX_DSQ_LOCAL_ON __weak;
+#define SCX_DSQ_LOCAL_ON __SCX_DSQ_LOCAL_ON
+
+const volatile u64 __SCX_DSQ_LOCAL_CPU_MASK __weak;
+#define SCX_DSQ_LOCAL_CPU_MASK __SCX_DSQ_LOCAL_CPU_MASK
+
+const volatile u64 __SCX_TASK_QUEUED __weak;
+#define SCX_TASK_QUEUED __SCX_TASK_QUEUED
+
+const volatile u64 __SCX_TASK_RESET_RUNNABLE_AT __weak;
+#define SCX_TASK_RESET_RUNNABLE_AT __SCX_TASK_RESET_RUNNABLE_AT
+
+const volatile u64 __SCX_TASK_DEQD_FOR_SLEEP __weak;
+#define SCX_TASK_DEQD_FOR_SLEEP __SCX_TASK_DEQD_FOR_SLEEP
+
+const volatile u64 __SCX_TASK_STATE_SHIFT __weak;
+#define SCX_TASK_STATE_SHIFT __SCX_TASK_STATE_SHIFT
+
+const volatile u64 __SCX_TASK_STATE_BITS __weak;
+#define SCX_TASK_STATE_BITS __SCX_TASK_STATE_BITS
+
+const volatile u64 __SCX_TASK_STATE_MASK __weak;
+#define SCX_TASK_STATE_MASK __SCX_TASK_STATE_MASK
+
+const volatile u64 __SCX_TASK_CURSOR __weak;
+#define SCX_TASK_CURSOR __SCX_TASK_CURSOR
+
+const volatile u64 __SCX_TASK_NONE __weak;
+#define SCX_TASK_NONE __SCX_TASK_NONE
+
+const volatile u64 __SCX_TASK_INIT __weak;
+#define SCX_TASK_INIT __SCX_TASK_INIT
+
+const volatile u64 __SCX_TASK_READY __weak;
+#define SCX_TASK_READY __SCX_TASK_READY
+
+const volatile u64 __SCX_TASK_ENABLED __weak;
+#define SCX_TASK_ENABLED __SCX_TASK_ENABLED
+
+const volatile u64 __SCX_TASK_NR_STATES __weak;
+#define SCX_TASK_NR_STATES __SCX_TASK_NR_STATES
+
+const volatile u64 __SCX_TASK_DSQ_ON_PRIQ __weak;
+#define SCX_TASK_DSQ_ON_PRIQ __SCX_TASK_DSQ_ON_PRIQ
+
+const volatile u64 __SCX_KICK_IDLE __weak;
+#define SCX_KICK_IDLE __SCX_KICK_IDLE
+
+const volatile u64 __SCX_KICK_PREEMPT __weak;
+#define SCX_KICK_PREEMPT __SCX_KICK_PREEMPT
+
+const volatile u64 __SCX_KICK_WAIT __weak;
+#define SCX_KICK_WAIT __SCX_KICK_WAIT
+
+const volatile u64 __SCX_ENQ_WAKEUP __weak;
+#define SCX_ENQ_WAKEUP __SCX_ENQ_WAKEUP
+
+const volatile u64 __SCX_ENQ_HEAD __weak;
+#define SCX_ENQ_HEAD __SCX_ENQ_HEAD
+
+const volatile u64 __SCX_ENQ_PREEMPT __weak;
+#define SCX_ENQ_PREEMPT __SCX_ENQ_PREEMPT
+
+const volatile u64 __SCX_ENQ_REENQ __weak;
+#define SCX_ENQ_REENQ __SCX_ENQ_REENQ
+
+const volatile u64 __SCX_ENQ_LAST __weak;
+#define SCX_ENQ_LAST __SCX_ENQ_LAST
+
+const volatile u64 __SCX_ENQ_CLEAR_OPSS __weak;
+#define SCX_ENQ_CLEAR_OPSS __SCX_ENQ_CLEAR_OPSS
+
+const volatile u64 __SCX_ENQ_DSQ_PRIQ __weak;
+#define SCX_ENQ_DSQ_PRIQ __SCX_ENQ_DSQ_PRIQ
+
diff --git a/tools/sched_ext/include/scx/enums.autogen.h b/tools/sched_ext/include/scx/enums.autogen.h
new file mode 100644
index 000000000000..88137a140e72
--- /dev/null
+++ b/tools/sched_ext/include/scx/enums.autogen.h
@@ -0,0 +1,41 @@
+/*
+ * WARNING: This file is autogenerated from scripts/gen_enums.py. If you would
+ * like to access an enum that is currently missing, add it to the script
+ * and run it from the root directory to update this file.
+ */
+
+#define SCX_ENUM_INIT(skel) do { \
+	SCX_ENUM_SET(skel, scx_public_consts, SCX_OPS_NAME_LEN); \
+	SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_DFL); \
+	SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_INF); \
+	SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_BUILTIN); \
+	SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_LOCAL_ON); \
+	SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_INVALID); \
+	SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_GLOBAL); \
+	SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL); \
+	SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_ON); \
+	SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_CPU_MASK); \
+	SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_QUEUED); \
+	SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_RESET_RUNNABLE_AT); \
+	SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_DEQD_FOR_SLEEP); \
+	SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_SHIFT); \
+	SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_BITS); \
+	SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_MASK); \
+	SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_CURSOR); \
+	SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NONE); \
+	SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_INIT); \
+	SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_READY); \
+	SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_ENABLED); \
+	SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NR_STATES); \
+	SCX_ENUM_SET(skel, scx_ent_dsq_flags, SCX_TASK_DSQ_ON_PRIQ); \
+	SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_IDLE); \
+	SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_PREEMPT); \
+	SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_WAIT); \
+	SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_WAKEUP); \
+	SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_HEAD); \
+	SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_PREEMPT); \
+	SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_REENQ); \
+	SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_LAST); \
+	SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_CLEAR_OPSS); \
+	SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_DSQ_PRIQ); \
+} while (0)
diff --git a/tools/sched_ext/include/scx/enums.bpf.h b/tools/sched_ext/include/scx/enums.bpf.h
new file mode 100644
index 000000000000..af704c5d6334
--- /dev/null
+++ b/tools/sched_ext/include/scx/enums.bpf.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Convenience macros for getting/setting struct scx_enums instances.
+ *
+ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
+ */
+#ifndef __SCX_ENUMS_BPF_H
+#define __SCX_ENUMS_BPF_H
+
+#include "enums.autogen.bpf.h"
+
+#endif /* __SCX_ENUMS_BPF_H */
diff --git a/tools/sched_ext/include/scx/enums.h b/tools/sched_ext/include/scx/enums.h
new file mode 100644
index 000000000000..34cbebe974b7
--- /dev/null
+++ b/tools/sched_ext/include/scx/enums.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Define struct scx_enums that stores the load-time values of enums
+ * used by the BPF program.
+ *
+ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
+ */
+
+#ifndef __SCX_ENUMS_H
+#define __SCX_ENUMS_H
+
+static inline void __ENUM_set(u64 *val, char *type, char *name)
+{
+	bool res;
+
+	res = __COMPAT_read_enum(type, name, val);
+	SCX_BUG_ON(!res, "enum not found(%s)", name);
+}
+
+#define SCX_ENUM_SET(skel, type, name) do {			\
+	__ENUM_set(&skel->rodata->__##name, #type, #name);	\
+	} while (0)
+
+
+#include "enums.autogen.h"
+
+#endif /* __SCX_ENUMS_H */
diff --git a/tools/sched_ext/include/scx/user_exit_info.h b/tools/sched_ext/include/scx/user_exit_info.h
index 8ce2734402e1..66f856640ee7 100644
--- a/tools/sched_ext/include/scx/user_exit_info.h
+++ b/tools/sched_ext/include/scx/user_exit_info.h
@@ -10,6 +10,11 @@
 #ifndef __USER_EXIT_INFO_H
 #define __USER_EXIT_INFO_H
 
+#ifdef LSP
+#define __bpf__
+#include "../vmlinux.h"
+#endif
+
 enum uei_sizes {
 	UEI_REASON_LEN		= 128,
 	UEI_MSG_LEN		= 1024,
@@ -25,9 +30,7 @@ struct user_exit_info {
 
 #ifdef __bpf__
 
-#ifdef LSP
-#include "../vmlinux/vmlinux.h"
-#else
+#ifndef LSP
 #include "vmlinux.h"
 #endif
 #include <bpf/bpf_core_read.h>
diff --git a/tools/sched_ext/scx_central.bpf.c b/tools/sched_ext/scx_central.bpf.c
index e6fad6211f6c..50bc1737c167 100644
--- a/tools/sched_ext/scx_central.bpf.c
+++ b/tools/sched_ext/scx_central.bpf.c
@@ -57,7 +57,7 @@ enum {
 
 const volatile s32 central_cpu;
 const volatile u32 nr_cpu_ids = 1;	/* !0 for veristat, set during init */
-const volatile u64 slice_ns = SCX_SLICE_DFL;
+const volatile u64 slice_ns;
 
 bool timer_pinned = true;
 u64 nr_total, nr_locals, nr_queued, nr_lost_pids;
@@ -87,11 +87,6 @@ struct {
 	__type(value, struct central_timer);
 } central_timer SEC(".maps");
 
-static bool vtime_before(u64 a, u64 b)
-{
-	return (s64)(a - b) < 0;
-}
-
 s32 BPF_STRUCT_OPS(central_select_cpu, struct task_struct *p,
 		   s32 prev_cpu, u64 wake_flags)
 {
@@ -245,7 +240,7 @@ void BPF_STRUCT_OPS(central_running, struct task_struct *p)
 	s32 cpu = scx_bpf_task_cpu(p);
 	u64 *started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids);
 	if (started_at)
-		*started_at = bpf_ktime_get_ns() ?: 1;	/* 0 indicates idle */
+		*started_at = scx_bpf_now() ?: 1;	/* 0 indicates idle */
 }
 
 void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable)
@@ -258,7 +253,7 @@ void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable)
 
 static int central_timerfn(void *map, int *key, struct bpf_timer *timer)
 {
-	u64 now = bpf_ktime_get_ns();
+	u64 now = scx_bpf_now();
 	u64 nr_to_kick = nr_queued;
 	s32 i, curr_cpu;
 
@@ -279,7 +274,7 @@ static int central_timerfn(void *map, int *key, struct bpf_timer *timer)
 		/* kick iff the current one exhausted its slice */
 		started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids);
 		if (started_at && *started_at &&
-		    vtime_before(now, *started_at + slice_ns))
+		    time_before(now, *started_at + slice_ns))
 			continue;
 
 		/* and there's something pending */
diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c
index e938156ed0a0..6ba6e610eeaa 100644
--- a/tools/sched_ext/scx_central.c
+++ b/tools/sched_ext/scx_central.c
@@ -10,6 +10,7 @@
 #include <unistd.h>
 #include <inttypes.h>
 #include <signal.h>
+#include <assert.h>
 #include <libgen.h>
 #include <bpf/bpf.h>
 #include <scx/common.h>
@@ -58,15 +59,24 @@ restart:
 
 	skel->rodata->central_cpu = 0;
 	skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
+	skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
+
+	assert(skel->rodata->nr_cpu_ids <= INT32_MAX);
 
 	while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
 		switch (opt) {
 		case 's':
 			skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
 			break;
-		case 'c':
-			skel->rodata->central_cpu = strtoul(optarg, NULL, 0);
+		case 'c': {
+			u32 central_cpu = strtoul(optarg, NULL, 0);
+			if (central_cpu >= skel->rodata->nr_cpu_ids) {
+				fprintf(stderr, "invalid central CPU id value, %u given (%u max)\n", central_cpu, skel->rodata->nr_cpu_ids);
+				return -1;
+			}
+			skel->rodata->central_cpu = (s32)central_cpu;
 			break;
+		}
 		case 'v':
 			verbose = true;
 			break;
@@ -95,7 +105,7 @@ restart:
 	 */
 	cpuset = CPU_ALLOC(skel->rodata->nr_cpu_ids);
 	SCX_BUG_ON(!cpuset, "Failed to allocate cpuset");
-	CPU_ZERO(cpuset);
+	CPU_ZERO_S(CPU_ALLOC_SIZE(skel->rodata->nr_cpu_ids), cpuset);
 	CPU_SET(skel->rodata->central_cpu, cpuset);
 	SCX_BUG_ON(sched_setaffinity(0, sizeof(*cpuset), cpuset),
 		   "Failed to affinitize to central CPU %d (max %d)",
diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c
index 4e3afcd260bf..2c720e3ecad5 100644
--- a/tools/sched_ext/scx_flatcg.bpf.c
+++ b/tools/sched_ext/scx_flatcg.bpf.c
@@ -57,7 +57,7 @@ enum {
 char _license[] SEC("license") = "GPL";
 
 const volatile u32 nr_cpus = 32;	/* !0 for veristat, set during init */
-const volatile u64 cgrp_slice_ns = SCX_SLICE_DFL;
+const volatile u64 cgrp_slice_ns;
 const volatile bool fifo_sched;
 
 u64 cvtime_now;
@@ -137,11 +137,6 @@ static u64 div_round_up(u64 dividend, u64 divisor)
 	return (dividend + divisor - 1) / divisor;
 }
 
-static bool vtime_before(u64 a, u64 b)
-{
-	return (s64)(a - b) < 0;
-}
-
 static bool cgv_node_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
 {
 	struct cgv_node *cgc_a, *cgc_b;
@@ -271,7 +266,7 @@ static void cgrp_cap_budget(struct cgv_node *cgv_node, struct fcg_cgrp_ctx *cgc)
 	 */
 	max_budget = (cgrp_slice_ns * nr_cpus * cgc->hweight) /
 		(2 * FCG_HWEIGHT_ONE);
-	if (vtime_before(cvtime, cvtime_now - max_budget))
+	if (time_before(cvtime, cvtime_now - max_budget))
 		cvtime = cvtime_now - max_budget;
 
 	cgv_node->cvtime = cvtime;
@@ -401,7 +396,7 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags)
 		 * Limit the amount of budget that an idling task can accumulate
 		 * to one slice.
 		 */
-		if (vtime_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL))
+		if (time_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL))
 			tvtime = cgc->tvtime_now - SCX_SLICE_DFL;
 
 		scx_bpf_dsq_insert_vtime(p, cgrp->kn->id, SCX_SLICE_DFL,
@@ -535,7 +530,7 @@ void BPF_STRUCT_OPS(fcg_running, struct task_struct *p)
 		 * from multiple CPUs and thus racy. Any error should be
 		 * contained and temporary. Let's just live with it.
 		 */
-		if (vtime_before(cgc->tvtime_now, p->scx.dsq_vtime))
+		if (time_before(cgc->tvtime_now, p->scx.dsq_vtime))
 			cgc->tvtime_now = p->scx.dsq_vtime;
 	}
 	bpf_cgroup_release(cgrp);
@@ -645,7 +640,7 @@ static bool try_pick_next_cgroup(u64 *cgidp)
 	cgv_node = container_of(rb_node, struct cgv_node, rb_node);
 	cgid = cgv_node->cgid;
 
-	if (vtime_before(cvtime_now, cgv_node->cvtime))
+	if (time_before(cvtime_now, cgv_node->cvtime))
 		cvtime_now = cgv_node->cvtime;
 
 	/*
@@ -734,7 +729,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev)
 	struct fcg_cpu_ctx *cpuc;
 	struct fcg_cgrp_ctx *cgc;
 	struct cgroup *cgrp;
-	u64 now = bpf_ktime_get_ns();
+	u64 now = scx_bpf_now();
 	bool picked_next = false;
 
 	cpuc = find_cpu_ctx();
@@ -744,7 +739,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev)
 	if (!cpuc->cur_cgid)
 		goto pick_next_cgroup;
 
-	if (vtime_before(now, cpuc->cur_at + cgrp_slice_ns)) {
+	if (time_before(now, cpuc->cur_at + cgrp_slice_ns)) {
 		if (scx_bpf_dsq_move_to_local(cpuc->cur_cgid)) {
 			stat_inc(FCG_STAT_CNS_KEEP);
 			return;
@@ -920,14 +915,14 @@ void BPF_STRUCT_OPS(fcg_cgroup_move, struct task_struct *p,
 		    struct cgroup *from, struct cgroup *to)
 {
 	struct fcg_cgrp_ctx *from_cgc, *to_cgc;
-	s64 vtime_delta;
+	s64 delta;
 
 	/* find_cgrp_ctx() triggers scx_ops_error() on lookup failures */
 	if (!(from_cgc = find_cgrp_ctx(from)) || !(to_cgc = find_cgrp_ctx(to)))
 		return;
 
-	vtime_delta = p->scx.dsq_vtime - from_cgc->tvtime_now;
-	p->scx.dsq_vtime = to_cgc->tvtime_now + vtime_delta;
+	delta = time_delta(p->scx.dsq_vtime, from_cgc->tvtime_now);
+	p->scx.dsq_vtime = to_cgc->tvtime_now + delta;
 }
 
 s32 BPF_STRUCT_OPS_SLEEPABLE(fcg_init)
diff --git a/tools/sched_ext/scx_flatcg.c b/tools/sched_ext/scx_flatcg.c
index 5d24ca9c29d9..6dd423eeb4ff 100644
--- a/tools/sched_ext/scx_flatcg.c
+++ b/tools/sched_ext/scx_flatcg.c
@@ -137,6 +137,7 @@ restart:
 	skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
 
 	skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+	skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
 
 	while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
 		double v;
diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
index ee264947e0c3..26c40ca4f36c 100644
--- a/tools/sched_ext/scx_qmap.bpf.c
+++ b/tools/sched_ext/scx_qmap.bpf.c
@@ -33,7 +33,7 @@ enum consts {
 
 char _license[] SEC("license") = "GPL";
 
-const volatile u64 slice_ns = SCX_SLICE_DFL;
+const volatile u64 slice_ns;
 const volatile u32 stall_user_nth;
 const volatile u32 stall_kernel_nth;
 const volatile u32 dsp_inf_loop_after;
@@ -231,7 +231,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
 	}
 
 	/* if select_cpu() wasn't called, try direct dispatch */
-	if (!(enq_flags & SCX_ENQ_CPU_SELECTED) &&
+	if (!__COMPAT_is_enq_cpu_selected(enq_flags) &&
 	    (cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) {
 		__sync_fetch_and_add(&nr_ddsp_from_enq, 1);
 		scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags);
@@ -763,6 +763,8 @@ static void dump_shared_dsq(void)
 
 static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
 {
+	struct scx_event_stats events;
+
 	bpf_rcu_read_lock();
 	dispatch_highpri(true);
 	bpf_rcu_read_unlock();
@@ -772,6 +774,25 @@ static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
 	if (print_shared_dsq)
 		dump_shared_dsq();
 
+	__COMPAT_scx_bpf_events(&events, sizeof(events));
+
+	bpf_printk("%35s: %lld", "SCX_EV_SELECT_CPU_FALLBACK",
+		   scx_read_event(&events, SCX_EV_SELECT_CPU_FALLBACK));
+	bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE",
+		   scx_read_event(&events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE));
+	bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_KEEP_LAST",
+		   scx_read_event(&events, SCX_EV_DISPATCH_KEEP_LAST));
+	bpf_printk("%35s: %lld", "SCX_EV_ENQ_SKIP_EXITING",
+		   scx_read_event(&events, SCX_EV_ENQ_SKIP_EXITING));
+	bpf_printk("%35s: %lld", "SCX_EV_ENQ_SLICE_DFL",
+		   scx_read_event(&events, SCX_EV_ENQ_SLICE_DFL));
+	bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DURATION",
+		   scx_read_event(&events, SCX_EV_BYPASS_DURATION));
+	bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DISPATCH",
+		   scx_read_event(&events, SCX_EV_BYPASS_DISPATCH));
+	bpf_printk("%35s: %lld", "SCX_EV_BYPASS_ACTIVATE",
+		   scx_read_event(&events, SCX_EV_BYPASS_ACTIVATE));
+
 	bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
 	return 0;
 }
diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c
index ac45a02b4055..c4912ab2e76f 100644
--- a/tools/sched_ext/scx_qmap.c
+++ b/tools/sched_ext/scx_qmap.c
@@ -64,6 +64,8 @@ int main(int argc, char **argv)
 
 	skel = SCX_OPS_OPEN(qmap_ops, scx_qmap);
 
+	skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
+
 	while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PHd:D:Spvh")) != -1) {
 		switch (opt) {
 		case 's':
diff --git a/tools/sched_ext/scx_simple.bpf.c b/tools/sched_ext/scx_simple.bpf.c
index 31f915b286c6..e6de99dba7db 100644
--- a/tools/sched_ext/scx_simple.bpf.c
+++ b/tools/sched_ext/scx_simple.bpf.c
@@ -52,11 +52,6 @@ static void stat_inc(u32 idx)
 		(*cnt_p)++;
 }
 
-static inline bool vtime_before(u64 a, u64 b)
-{
-	return (s64)(a - b) < 0;
-}
-
 s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags)
 {
 	bool is_idle = false;
@@ -84,7 +79,7 @@ void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags)
 		 * Limit the amount of budget that an idling task can accumulate
 		 * to one slice.
 		 */
-		if (vtime_before(vtime, vtime_now - SCX_SLICE_DFL))
+		if (time_before(vtime, vtime_now - SCX_SLICE_DFL))
 			vtime = vtime_now - SCX_SLICE_DFL;
 
 		scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime,
@@ -108,7 +103,7 @@ void BPF_STRUCT_OPS(simple_running, struct task_struct *p)
 	 * thus racy. Any error should be contained and temporary. Let's just
 	 * live with it.
 	 */
-	if (vtime_before(vtime_now, p->scx.dsq_vtime))
+	if (time_before(vtime_now, p->scx.dsq_vtime))
 		vtime_now = p->scx.dsq_vtime;
 }
 
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 0aa4005017c7..45f4abef7064 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -136,6 +136,33 @@ else
 NO_SUBDIR = :
 endif
 
+# Beautify output
+# ---------------------------------------------------------------------------
+#
+# Most of build commands in Kbuild start with "cmd_". You can optionally define
+# "quiet_cmd_*". If defined, the short log is printed. Otherwise, no log from
+# that command is printed by default.
+#
+# e.g.)
+#    quiet_cmd_depmod = DEPMOD  $(MODLIB)
+#          cmd_depmod = $(srctree)/scripts/depmod.sh $(DEPMOD) $(KERNELRELEASE)
+#
+# A simple variant is to prefix commands with $(Q) - that's useful
+# for commands that shall be hidden in non-verbose mode.
+#
+#    $(Q)$(MAKE) $(build)=scripts/basic
+#
+# To put more focus on warnings, be less verbose as default
+# Use 'make V=1' to see the full commands
+
+ifeq ($(V),1)
+  quiet =
+  Q =
+else
+  quiet = quiet_
+  Q = @
+endif
+
 # If the user is running make -s (silent mode), suppress echoing of commands
 # make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS.
 ifeq ($(filter 3.%,$(MAKE_VERSION)),)
@@ -146,8 +173,11 @@ endif
 
 ifneq ($(findstring s,$(short-opts)),)
   silent=1
+  quiet=silent_
 endif
 
+export quiet Q
+
 #
 # Define a callable command for descending to a new directory
 #
diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl
new file mode 100644
index 000000000000..ebbdb3c42e9f
--- /dev/null
+++ b/tools/scripts/syscall.tbl
@@ -0,0 +1,409 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# This file contains the system call numbers for all of the
+# more recently added architectures.
+#
+# As a basic principle, no duplication of functionality
+# should be added, e.g. we don't use lseek when llseek
+# is present. New architectures should use this file
+# and implement the less feature-full calls in user space.
+#
+0	common	io_setup			sys_io_setup			compat_sys_io_setup
+1	common	io_destroy			sys_io_destroy
+2	common	io_submit			sys_io_submit			compat_sys_io_submit
+3	common	io_cancel			sys_io_cancel
+4	time32	io_getevents			sys_io_getevents_time32
+4	64	io_getevents			sys_io_getevents
+5	common	setxattr			sys_setxattr
+6	common	lsetxattr			sys_lsetxattr
+7	common	fsetxattr			sys_fsetxattr
+8	common	getxattr			sys_getxattr
+9	common	lgetxattr			sys_lgetxattr
+10	common	fgetxattr			sys_fgetxattr
+11	common	listxattr			sys_listxattr
+12	common	llistxattr			sys_llistxattr
+13	common	flistxattr			sys_flistxattr
+14	common	removexattr			sys_removexattr
+15	common	lremovexattr			sys_lremovexattr
+16	common	fremovexattr			sys_fremovexattr
+17	common	getcwd				sys_getcwd
+18	common	lookup_dcookie			sys_ni_syscall
+19	common	eventfd2			sys_eventfd2
+20	common	epoll_create1			sys_epoll_create1
+21	common	epoll_ctl			sys_epoll_ctl
+22	common	epoll_pwait			sys_epoll_pwait			compat_sys_epoll_pwait
+23	common	dup				sys_dup
+24	common	dup3				sys_dup3
+25	32	fcntl64				sys_fcntl64			compat_sys_fcntl64
+25	64	fcntl				sys_fcntl
+26	common	inotify_init1			sys_inotify_init1
+27	common	inotify_add_watch		sys_inotify_add_watch
+28	common	inotify_rm_watch		sys_inotify_rm_watch
+29	common	ioctl				sys_ioctl			compat_sys_ioctl
+30	common	ioprio_set			sys_ioprio_set
+31	common	ioprio_get			sys_ioprio_get
+32	common	flock				sys_flock
+33	common	mknodat				sys_mknodat
+34	common	mkdirat				sys_mkdirat
+35	common	unlinkat			sys_unlinkat
+36	common	symlinkat			sys_symlinkat
+37	common	linkat				sys_linkat
+# renameat is superseded with flags by renameat2
+38	renameat renameat			sys_renameat
+39	common	umount2				sys_umount
+40	common	mount				sys_mount
+41	common	pivot_root			sys_pivot_root
+42	common	nfsservctl			sys_ni_syscall
+43	32	statfs64			sys_statfs64			compat_sys_statfs64
+43	64	statfs				sys_statfs
+44	32	fstatfs64			sys_fstatfs64			compat_sys_fstatfs64
+44	64	fstatfs				sys_fstatfs
+45	32	truncate64			sys_truncate64			compat_sys_truncate64
+45	64	truncate			sys_truncate
+46	32	ftruncate64			sys_ftruncate64			compat_sys_ftruncate64
+46	64	ftruncate			sys_ftruncate
+47	common	fallocate			sys_fallocate			compat_sys_fallocate
+48	common	faccessat			sys_faccessat
+49	common	chdir				sys_chdir
+50	common	fchdir				sys_fchdir
+51	common	chroot				sys_chroot
+52	common	fchmod				sys_fchmod
+53	common	fchmodat			sys_fchmodat
+54	common	fchownat			sys_fchownat
+55	common	fchown				sys_fchown
+56	common	openat				sys_openat
+57	common	close				sys_close
+58	common	vhangup				sys_vhangup
+59	common	pipe2				sys_pipe2
+60	common	quotactl			sys_quotactl
+61	common	getdents64			sys_getdents64
+62	32	llseek				sys_llseek
+62	64	lseek				sys_lseek
+63	common	read				sys_read
+64	common	write				sys_write
+65	common	readv				sys_readv			sys_readv
+66	common	writev				sys_writev			sys_writev
+67	common	pread64				sys_pread64			compat_sys_pread64
+68	common	pwrite64			sys_pwrite64			compat_sys_pwrite64
+69	common	preadv				sys_preadv			compat_sys_preadv
+70	common	pwritev				sys_pwritev			compat_sys_pwritev
+71	32	sendfile64			sys_sendfile64
+71	64	sendfile			sys_sendfile64
+72	time32	pselect6			sys_pselect6_time32		compat_sys_pselect6_time32
+72	64	pselect6			sys_pselect6
+73	time32	ppoll				sys_ppoll_time32		compat_sys_ppoll_time32
+73	64	ppoll				sys_ppoll
+74	common	signalfd4			sys_signalfd4			compat_sys_signalfd4
+75	common	vmsplice			sys_vmsplice
+76	common	splice				sys_splice
+77	common	tee				sys_tee
+78	common	readlinkat			sys_readlinkat
+79	stat64	fstatat64			sys_fstatat64
+79	64	newfstatat			sys_newfstatat
+80	stat64	fstat64				sys_fstat64
+80	64	fstat				sys_newfstat
+81	common	sync				sys_sync
+82	common	fsync				sys_fsync
+83	common	fdatasync			sys_fdatasync
+84	common	sync_file_range			sys_sync_file_range		compat_sys_sync_file_range
+85	common	timerfd_create			sys_timerfd_create
+86	time32	timerfd_settime			sys_timerfd_settime32
+86	64	timerfd_settime			sys_timerfd_settime
+87	time32	timerfd_gettime			sys_timerfd_gettime32
+87	64	timerfd_gettime			sys_timerfd_gettime
+88	time32	utimensat			sys_utimensat_time32
+88	64	utimensat			sys_utimensat
+89	common	acct				sys_acct
+90	common	capget				sys_capget
+91	common	capset				sys_capset
+92	common	personality			sys_personality
+93	common	exit				sys_exit
+94	common	exit_group			sys_exit_group
+95	common	waitid				sys_waitid			compat_sys_waitid
+96	common	set_tid_address			sys_set_tid_address
+97	common	unshare				sys_unshare
+98	time32	futex				sys_futex_time32
+98	64	futex				sys_futex
+99	common	set_robust_list			sys_set_robust_list		compat_sys_set_robust_list
+100	common	get_robust_list			sys_get_robust_list		compat_sys_get_robust_list
+101	time32	nanosleep			sys_nanosleep_time32
+101	64	nanosleep			sys_nanosleep
+102	common	getitimer			sys_getitimer			compat_sys_getitimer
+103	common	setitimer			sys_setitimer			compat_sys_setitimer
+104	common	kexec_load			sys_kexec_load			compat_sys_kexec_load
+105	common	init_module			sys_init_module
+106	common	delete_module			sys_delete_module
+107	common	timer_create			sys_timer_create		compat_sys_timer_create
+108	time32	timer_gettime			sys_timer_gettime32
+108	64	timer_gettime			sys_timer_gettime
+109	common	timer_getoverrun		sys_timer_getoverrun
+110	time32	timer_settime			sys_timer_settime32
+110	64	timer_settime			sys_timer_settime
+111	common	timer_delete			sys_timer_delete
+112	time32	clock_settime			sys_clock_settime32
+112	64	clock_settime			sys_clock_settime
+113	time32	clock_gettime			sys_clock_gettime32
+113	64	clock_gettime			sys_clock_gettime
+114	time32	clock_getres			sys_clock_getres_time32
+114	64	clock_getres			sys_clock_getres
+115	time32	clock_nanosleep			sys_clock_nanosleep_time32
+115	64	clock_nanosleep			sys_clock_nanosleep
+116	common	syslog				sys_syslog
+117	common	ptrace				sys_ptrace			compat_sys_ptrace
+118	common	sched_setparam			sys_sched_setparam
+119	common	sched_setscheduler		sys_sched_setscheduler
+120	common	sched_getscheduler		sys_sched_getscheduler
+121	common	sched_getparam			sys_sched_getparam
+122	common	sched_setaffinity		sys_sched_setaffinity		compat_sys_sched_setaffinity
+123	common	sched_getaffinity		sys_sched_getaffinity		compat_sys_sched_getaffinity
+124	common	sched_yield			sys_sched_yield
+125	common	sched_get_priority_max		sys_sched_get_priority_max
+126	common	sched_get_priority_min		sys_sched_get_priority_min
+127	time32	sched_rr_get_interval		sys_sched_rr_get_interval_time32
+127	64	sched_rr_get_interval		sys_sched_rr_get_interval
+128	common	restart_syscall			sys_restart_syscall
+129	common	kill				sys_kill
+130	common	tkill				sys_tkill
+131	common	tgkill				sys_tgkill
+132	common	sigaltstack			sys_sigaltstack			compat_sys_sigaltstack
+133	common	rt_sigsuspend			sys_rt_sigsuspend		compat_sys_rt_sigsuspend
+134	common	rt_sigaction			sys_rt_sigaction		compat_sys_rt_sigaction
+135	common	rt_sigprocmask			sys_rt_sigprocmask		compat_sys_rt_sigprocmask
+136	common	rt_sigpending			sys_rt_sigpending		compat_sys_rt_sigpending
+137	time32	rt_sigtimedwait			sys_rt_sigtimedwait_time32	compat_sys_rt_sigtimedwait_time32
+137	64	rt_sigtimedwait			sys_rt_sigtimedwait
+138	common	rt_sigqueueinfo			sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
+139	common	rt_sigreturn			sys_rt_sigreturn		compat_sys_rt_sigreturn
+140	common	setpriority			sys_setpriority
+141	common	getpriority			sys_getpriority
+142	common	reboot				sys_reboot
+143	common	setregid			sys_setregid
+144	common	setgid				sys_setgid
+145	common	setreuid			sys_setreuid
+146	common	setuid				sys_setuid
+147	common	setresuid			sys_setresuid
+148	common	getresuid			sys_getresuid
+149	common	setresgid			sys_setresgid
+150	common	getresgid			sys_getresgid
+151	common	setfsuid			sys_setfsuid
+152	common	setfsgid			sys_setfsgid
+153	common	times				sys_times			compat_sys_times
+154	common	setpgid				sys_setpgid
+155	common	getpgid				sys_getpgid
+156	common	getsid				sys_getsid
+157	common	setsid				sys_setsid
+158	common	getgroups			sys_getgroups
+159	common	setgroups			sys_setgroups
+160	common	uname				sys_newuname
+161	common	sethostname			sys_sethostname
+162	common	setdomainname			sys_setdomainname
+# getrlimit and setrlimit are superseded with prlimit64
+163	rlimit	getrlimit			sys_getrlimit			compat_sys_getrlimit
+164	rlimit	setrlimit			sys_setrlimit			compat_sys_setrlimit
+165	common	getrusage			sys_getrusage			compat_sys_getrusage
+166	common	umask				sys_umask
+167	common	prctl				sys_prctl
+168	common	getcpu				sys_getcpu
+169	time32	gettimeofday			sys_gettimeofday		compat_sys_gettimeofday
+169	64	gettimeofday			sys_gettimeofday
+170	time32	settimeofday			sys_settimeofday		compat_sys_settimeofday
+170	64	settimeofday			sys_settimeofday
+171	time32	adjtimex			sys_adjtimex_time32
+171	64	adjtimex			sys_adjtimex
+172	common	getpid				sys_getpid
+173	common	getppid				sys_getppid
+174	common	getuid				sys_getuid
+175	common	geteuid				sys_geteuid
+176	common	getgid				sys_getgid
+177	common	getegid				sys_getegid
+178	common	gettid				sys_gettid
+179	common	sysinfo				sys_sysinfo			compat_sys_sysinfo
+180	common	mq_open				sys_mq_open			compat_sys_mq_open
+181	common	mq_unlink			sys_mq_unlink
+182	time32	mq_timedsend			sys_mq_timedsend_time32
+182	64	mq_timedsend			sys_mq_timedsend
+183	time32	mq_timedreceive			sys_mq_timedreceive_time32
+183	64	mq_timedreceive			sys_mq_timedreceive
+184	common	mq_notify			sys_mq_notify			compat_sys_mq_notify
+185	common	mq_getsetattr			sys_mq_getsetattr		compat_sys_mq_getsetattr
+186	common	msgget				sys_msgget
+187	common	msgctl				sys_msgctl			compat_sys_msgctl
+188	common	msgrcv				sys_msgrcv			compat_sys_msgrcv
+189	common	msgsnd				sys_msgsnd			compat_sys_msgsnd
+190	common	semget				sys_semget
+191	common	semctl				sys_semctl			compat_sys_semctl
+192	time32	semtimedop			sys_semtimedop_time32
+192	64	semtimedop			sys_semtimedop
+193	common	semop				sys_semop
+194	common	shmget				sys_shmget
+195	common	shmctl				sys_shmctl			compat_sys_shmctl
+196	common	shmat				sys_shmat			compat_sys_shmat
+197	common	shmdt				sys_shmdt
+198	common	socket				sys_socket
+199	common	socketpair			sys_socketpair
+200	common	bind				sys_bind
+201	common	listen				sys_listen
+202	common	accept				sys_accept
+203	common	connect				sys_connect
+204	common	getsockname			sys_getsockname
+205	common	getpeername			sys_getpeername
+206	common	sendto				sys_sendto
+207	common	recvfrom			sys_recvfrom			compat_sys_recvfrom
+208	common	setsockopt			sys_setsockopt			sys_setsockopt
+209	common	getsockopt			sys_getsockopt			sys_getsockopt
+210	common	shutdown			sys_shutdown
+211	common	sendmsg				sys_sendmsg			compat_sys_sendmsg
+212	common	recvmsg				sys_recvmsg			compat_sys_recvmsg
+213	common	readahead			sys_readahead			compat_sys_readahead
+214	common	brk				sys_brk
+215	common	munmap				sys_munmap
+216	common	mremap				sys_mremap
+217	common	add_key				sys_add_key
+218	common	request_key			sys_request_key
+219	common	keyctl				sys_keyctl			compat_sys_keyctl
+220	common	clone				sys_clone
+221	common	execve				sys_execve			compat_sys_execve
+222	32	mmap2				sys_mmap2
+222	64	mmap				sys_mmap
+223	32	fadvise64_64			sys_fadvise64_64		compat_sys_fadvise64_64
+223	64	fadvise64			sys_fadvise64_64
+224	common	swapon				sys_swapon
+225	common	swapoff				sys_swapoff
+226	common	mprotect			sys_mprotect
+227	common	msync				sys_msync
+228	common	mlock				sys_mlock
+229	common	munlock				sys_munlock
+230	common	mlockall			sys_mlockall
+231	common	munlockall			sys_munlockall
+232	common	mincore				sys_mincore
+233	common	madvise				sys_madvise
+234	common	remap_file_pages		sys_remap_file_pages
+235	common	mbind				sys_mbind
+236	common	get_mempolicy			sys_get_mempolicy
+237	common	set_mempolicy			sys_set_mempolicy
+238	common	migrate_pages			sys_migrate_pages
+239	common	move_pages			sys_move_pages
+240	common	rt_tgsigqueueinfo		sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
+241	common	perf_event_open			sys_perf_event_open
+242	common	accept4				sys_accept4
+243	time32	recvmmsg			sys_recvmmsg_time32		compat_sys_recvmmsg_time32
+243	64	recvmmsg			sys_recvmmsg
+# Architectures may provide up to 16 syscalls of their own between 244 and 259
+244	arc	cacheflush			sys_cacheflush
+245	arc	arc_settls			sys_arc_settls
+246	arc	arc_gettls			sys_arc_gettls
+247	arc	sysfs				sys_sysfs
+248	arc	arc_usr_cmpxchg			sys_arc_usr_cmpxchg
+
+244	csky	set_thread_area			sys_set_thread_area
+245	csky	cacheflush			sys_cacheflush
+
+244	nios2	cacheflush			sys_cacheflush
+
+244	or1k	or1k_atomic			sys_or1k_atomic
+
+258	riscv	riscv_hwprobe			sys_riscv_hwprobe
+259	riscv	riscv_flush_icache		sys_riscv_flush_icache
+
+260	time32	wait4				sys_wait4			compat_sys_wait4
+260	64	wait4				sys_wait4
+261	common	prlimit64			sys_prlimit64
+262	common	fanotify_init			sys_fanotify_init
+263	common	fanotify_mark			sys_fanotify_mark
+264	common	name_to_handle_at		sys_name_to_handle_at
+265	common	open_by_handle_at		sys_open_by_handle_at
+266	time32	clock_adjtime			sys_clock_adjtime32
+266	64	clock_adjtime			sys_clock_adjtime
+267	common	syncfs				sys_syncfs
+268	common	setns				sys_setns
+269	common	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
+270	common	process_vm_readv		sys_process_vm_readv
+271	common	process_vm_writev		sys_process_vm_writev
+272	common	kcmp				sys_kcmp
+273	common	finit_module			sys_finit_module
+274	common	sched_setattr			sys_sched_setattr
+275	common	sched_getattr			sys_sched_getattr
+276	common	renameat2			sys_renameat2
+277	common	seccomp				sys_seccomp
+278	common	getrandom			sys_getrandom
+279	common	memfd_create			sys_memfd_create
+280	common	bpf				sys_bpf
+281	common	execveat			sys_execveat			compat_sys_execveat
+282	common	userfaultfd			sys_userfaultfd
+283	common	membarrier			sys_membarrier
+284	common	mlock2				sys_mlock2
+285	common	copy_file_range			sys_copy_file_range
+286	common	preadv2				sys_preadv2			compat_sys_preadv2
+287	common	pwritev2			sys_pwritev2			compat_sys_pwritev2
+288	common	pkey_mprotect			sys_pkey_mprotect
+289	common	pkey_alloc			sys_pkey_alloc
+290	common	pkey_free			sys_pkey_free
+291	common	statx				sys_statx
+292	time32	io_pgetevents			sys_io_pgetevents_time32	compat_sys_io_pgetevents
+292	64	io_pgetevents			sys_io_pgetevents
+293	common	rseq				sys_rseq
+294	common	kexec_file_load			sys_kexec_file_load
+# 295 through 402 are unassigned to sync up with generic numbers don't use
+403	32	clock_gettime64			sys_clock_gettime
+404	32	clock_settime64			sys_clock_settime
+405	32	clock_adjtime64			sys_clock_adjtime
+406	32	clock_getres_time64		sys_clock_getres
+407	32	clock_nanosleep_time64		sys_clock_nanosleep
+408	32	timer_gettime64			sys_timer_gettime
+409	32	timer_settime64			sys_timer_settime
+410	32	timerfd_gettime64		sys_timerfd_gettime
+411	32	timerfd_settime64		sys_timerfd_settime
+412	32	utimensat_time64		sys_utimensat
+413	32	pselect6_time64			sys_pselect6			compat_sys_pselect6_time64
+414	32	ppoll_time64			sys_ppoll			compat_sys_ppoll_time64
+416	32	io_pgetevents_time64		sys_io_pgetevents		compat_sys_io_pgetevents_time64
+417	32	recvmmsg_time64			sys_recvmmsg			compat_sys_recvmmsg_time64
+418	32	mq_timedsend_time64		sys_mq_timedsend
+419	32	mq_timedreceive_time64		sys_mq_timedreceive
+420	32	semtimedop_time64		sys_semtimedop
+421	32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
+422	32	futex_time64			sys_futex
+423	32	sched_rr_get_interval_time64	sys_sched_rr_get_interval
+424	common	pidfd_send_signal		sys_pidfd_send_signal
+425	common	io_uring_setup			sys_io_uring_setup
+426	common	io_uring_enter			sys_io_uring_enter
+427	common	io_uring_register		sys_io_uring_register
+428	common	open_tree			sys_open_tree
+429	common	move_mount			sys_move_mount
+430	common	fsopen				sys_fsopen
+431	common	fsconfig			sys_fsconfig
+432	common	fsmount				sys_fsmount
+433	common	fspick				sys_fspick
+434	common	pidfd_open			sys_pidfd_open
+435	common	clone3				sys_clone3
+436	common	close_range			sys_close_range
+437	common	openat2				sys_openat2
+438	common	pidfd_getfd			sys_pidfd_getfd
+439	common	faccessat2			sys_faccessat2
+440	common	process_madvise			sys_process_madvise
+441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
+443	common	quotactl_fd			sys_quotactl_fd
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
+447	memfd_secret	memfd_secret		sys_memfd_secret
+448	common	process_mrelease		sys_process_mrelease
+449	common	futex_waitv			sys_futex_waitv
+450	common	set_mempolicy_home_node		sys_set_mempolicy_home_node
+451	common	cachestat			sys_cachestat
+452	common	fchmodat2			sys_fchmodat2
+453	common	map_shadow_stack		sys_map_shadow_stack
+454	common	futex_wake			sys_futex_wake
+455	common	futex_wait			sys_futex_wait
+456	common	futex_requeue			sys_futex_requeue
+457	common	statmount			sys_statmount
+458	common	listmount			sys_listmount
+459	common	lsm_get_self_attr		sys_lsm_get_self_attr
+460	common	lsm_set_self_attr		sys_lsm_set_self_attr
+461	common	lsm_list_modules		sys_lsm_list_modules
+462	common	mseal				sys_mseal
+463	common	setxattrat			sys_setxattrat
+464	common	getxattrat			sys_getxattrat
+465	common	listxattrat			sys_listxattrat
+466	common	removexattrat			sys_removexattrat
diff --git a/tools/sound/dapm-graph b/tools/sound/dapm-graph
index f14bdfedee8f..b6196ee5065a 100755
--- a/tools/sound/dapm-graph
+++ b/tools/sound/dapm-graph
@@ -10,7 +10,7 @@ set -eu
 
 STYLE_COMPONENT_ON="color=dodgerblue;style=bold"
 STYLE_COMPONENT_OFF="color=gray40;style=filled;fillcolor=gray90"
-STYLE_NODE_ON="shape=box,style=bold,color=green4"
+STYLE_NODE_ON="shape=box,style=bold,color=green4,fillcolor=white"
 STYLE_NODE_OFF="shape=box,style=filled,color=gray30,fillcolor=gray95"
 
 # Print usage and exit
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index d0337c11f9ee..cc8948f49117 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -725,7 +725,7 @@ static void default_mock_decoder(struct cxl_decoder *cxld)
 	cxld->reset = mock_decoder_reset;
 }
 
-static int first_decoder(struct device *dev, void *data)
+static int first_decoder(struct device *dev, const void *data)
 {
 	struct cxl_decoder *cxld;
 
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 347c1e7b37bd..8d731bd63988 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -401,6 +401,10 @@ struct cxl_test_gen_media gen_media = {
 			.channel = 1,
 			.rank = 30,
 		},
+		.component_id = { 0x3, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d },
+		.cme_threshold_ev_flags = 3,
+		.cme_count = { 33, 0, 0 },
+		.sub_type = 0x2,
 	},
 };
 
@@ -429,6 +433,11 @@ struct cxl_test_dram dram = {
 		.bank_group = 5,
 		.bank = 2,
 		.column = {0xDE, 0xAD},
+		.component_id = { 0x1, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d },
+		.sub_channel = 8,
+		.cme_threshold_ev_flags = 2,
+		.cvme_count = { 14, 0, 0 },
+		.sub_type = 0x5,
 	},
 };
 
@@ -456,7 +465,10 @@ struct cxl_test_mem_module mem_module = {
 			.dirty_shutdown_cnt = { 0xde, 0xad, 0xbe, 0xef },
 			.cor_vol_err_cnt = { 0xde, 0xad, 0xbe, 0xef },
 			.cor_per_err_cnt = { 0xde, 0xad, 0xbe, 0xef },
-		}
+		},
+		/* .validity_flags = <set below> */
+		.component_id = { 0x2, 0x74, 0xc5, 0x8, 0x9a, 0x1a, 0xb, 0xfc, 0xd2, 0x7e, 0x2f, 0x31, 0x9b, 0x3c, 0x81, 0x4d },
+		.event_sub_type = 0x3,
 	},
 };
 
@@ -478,13 +490,18 @@ static int mock_set_timestamp(struct cxl_dev_state *cxlds,
 
 static void cxl_mock_add_event_logs(struct mock_event_store *mes)
 {
-	put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK,
+	put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK |
+			   CXL_GMER_VALID_COMPONENT | CXL_GMER_VALID_COMPONENT_ID_FORMAT,
 			   &gen_media.rec.media_hdr.validity_flags);
 
 	put_unaligned_le16(CXL_DER_VALID_CHANNEL | CXL_DER_VALID_BANK_GROUP |
-			   CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN,
+			   CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN | CXL_DER_VALID_SUB_CHANNEL |
+			   CXL_DER_VALID_COMPONENT | CXL_DER_VALID_COMPONENT_ID_FORMAT,
 			   &dram.rec.media_hdr.validity_flags);
 
+	put_unaligned_le16(CXL_MMER_VALID_COMPONENT | CXL_MMER_VALID_COMPONENT_ID_FORMAT,
+			   &mem_module.rec.validity_flags);
+
 	mes_add_event(mes, CXL_EVENT_TYPE_INFO, &maint_needed);
 	mes_add_event(mes, CXL_EVENT_TYPE_INFO,
 		      (struct cxl_event_record_raw *)&gen_media);
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 450c7566c33f..af2594e4f35d 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -228,16 +228,16 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
 }
 EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL");
 
-int __wrap_cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port,
+int __wrap_cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds,
 			       struct cxl_endpoint_dvsec_info *info)
 {
 	int rc = 0, index;
 	struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
 
-	if (ops && ops->is_mock_dev(dev))
+	if (ops && ops->is_mock_dev(cxlds->dev))
 		rc = 0;
 	else
-		rc = cxl_dvsec_rr_decode(dev, port, info);
+		rc = cxl_dvsec_rr_decode(cxlds, info);
 	put_cxl_mock_ops(index);
 
 	return rc;
diff --git a/tools/testing/ktest/examples/include/defaults.conf b/tools/testing/ktest/examples/include/defaults.conf
index 63a1a83f4f0b..f6d8517a471e 100644
--- a/tools/testing/ktest/examples/include/defaults.conf
+++ b/tools/testing/ktest/examples/include/defaults.conf
@@ -46,7 +46,7 @@ CLEAR_LOG = 1
 
 SSH_USER = root
 
-# For accesing the machine, we will ssh to root@machine.
+# For accessing the machine, we will ssh to root@machine.
 SSH := ssh ${SSH_USER}@${MACHINE}
 
 # Update this. The default here is ktest will ssh to the target box
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index dacad94e2be4..8c8da966c641 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1245,7 +1245,7 @@ sub __read_config {
 	    # Config variables are only active while reading the
 	    # config and can be defined anywhere. They also ignore
 	    # TEST_START and DEFAULTS, but are skipped if they are in
-	    # on of these sections that have SKIP defined.
+	    # one of these sections that have SKIP defined.
 	    # The save variable can be
 	    # defined multiple times and the new one simply overrides
 	    # the previous one.
@@ -2419,6 +2419,11 @@ sub get_version {
     return if ($have_version);
     doprint "$make kernelrelease ... ";
     $version = `$make -s kernelrelease | tail -1`;
+    if (!length($version)) {
+	run_command "$make allnoconfig" or return 0;
+	doprint "$make kernelrelease ... ";
+	$version = `$make -s kernelrelease | tail -1`;
+    }
     chomp($version);
     doprint "$version\n";
     $have_version = 1;
@@ -2960,8 +2965,6 @@ sub run_bisect_test {
 
     my $failed = 0;
     my $result;
-    my $output;
-    my $ret;
 
     $in_bisect = 1;
 
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index b3b00269a52a..b0049be00c70 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -38,9 +38,6 @@ CONFIG_IWLWIFI=y
 CONFIG_DAMON=y
 CONFIG_DAMON_VADDR=y
 CONFIG_DAMON_PADDR=y
-CONFIG_DEBUG_FS=y
-CONFIG_DAMON_DBGFS=y
-CONFIG_DAMON_DBGFS_DEPRECATED=y
 
 CONFIG_REGMAP_BUILD=y
 
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 676fa99a8b19..7f9ae55fd6d5 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -312,7 +312,16 @@ def massage_argv(argv: Sequence[str]) -> Sequence[str]:
 	return list(map(massage_arg, argv))
 
 def get_default_jobs() -> int:
-	return len(os.sched_getaffinity(0))
+	if sys.version_info >= (3, 13):
+		if (ncpu := os.process_cpu_count()) is not None:
+			return ncpu
+		raise RuntimeError("os.process_cpu_count() returned None")
+	 # See https://github.com/python/cpython/blob/b61fece/Lib/os.py#L1175-L1186.
+	if sys.platform != "darwin":
+		return len(os.sched_getaffinity(0))
+	if (ncpu := os.cpu_count()) is not None:
+		return ncpu
+	raise RuntimeError("os.cpu_count() returned None")
 
 def add_common_opts(parser: argparse.ArgumentParser) -> None:
 	parser.add_argument('--build_dir',
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index e76d7894b6c5..d30f90eae9a4 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -125,6 +125,9 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations):
 				'-append', ' '.join(params + [self._kernel_command_line]),
 				'-no-reboot',
 				'-nographic',
+				'-accel', 'kvm',
+				'-accel', 'hvf',
+				'-accel', 'tcg',
 				'-serial', self._serial] + self._extra_qemu_params
 		# Note: shlex.join() does what we want, but requires python 3.8+.
 		print('Running tests with:\n$', ' '.join(shlex.quote(arg) for arg in qemu_command))
diff --git a/tools/testing/kunit/qemu_configs/arm64.py b/tools/testing/kunit/qemu_configs/arm64.py
index d3ff27024755..5c44d3a87e6d 100644
--- a/tools/testing/kunit/qemu_configs/arm64.py
+++ b/tools/testing/kunit/qemu_configs/arm64.py
@@ -9,4 +9,4 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y''',
 			   qemu_arch='aarch64',
 			   kernel_path='arch/arm64/boot/Image.gz',
 			   kernel_command_line='console=ttyAMA0',
-			   extra_qemu_params=['-machine', 'virt', '-cpu', 'max,pauth-impdef=on'])
+			   extra_qemu_params=['-machine', 'virt', '-cpu', 'max'])
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index cffaf2245d4f..eaff1b036989 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -227,6 +227,7 @@ static void *load_creator(void *ptr)
 			unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) -
 						(1 << order);
 			item_insert_order(tree, index, order);
+			xa_set_mark(tree, index, XA_MARK_1);
 			item_delete_rcu(tree, index);
 		}
 	}
@@ -242,8 +243,11 @@ static void *load_worker(void *ptr)
 
 	rcu_register_thread();
 	while (!stop_iteration) {
+		unsigned long find_index = (2 << RADIX_TREE_MAP_SHIFT) + 1;
 		struct item *item = xa_load(ptr, index);
 		assert(!xa_is_internal(item));
+		item = xa_find(ptr, &find_index, index, XA_MARK_1);
+		assert(!xa_is_internal(item));
 	}
 	rcu_unregister_thread();
 
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 2401e973c359..2ebaf5e6942e 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -18,6 +18,7 @@ TARGETS += devices/error_logs
 TARGETS += devices/probe
 TARGETS += dmabuf-heaps
 TARGETS += drivers/dma-buf
+TARGETS += drivers/ntsync
 TARGETS += drivers/s390x/uvdevice
 TARGETS += drivers/net
 TARGETS += drivers/net/bonding
@@ -34,6 +35,7 @@ TARGETS += filesystems/epoll
 TARGETS += filesystems/fat
 TARGETS += filesystems/overlayfs
 TARGETS += filesystems/statmount
+TARGETS += filesystems/mount-notify
 TARGETS += firmware
 TARGETS += fpu
 TARGETS += ftrace
@@ -72,6 +74,7 @@ TARGETS += net/packetdrill
 TARGETS += net/rds
 TARGETS += net/tcp_ao
 TARGETS += nsfs
+TARGETS += pci_endpoint
 TARGETS += pcie_bwctrl
 TARGETS += perf_events
 TARGETS += pidfd
diff --git a/tools/testing/selftests/acct/acct_syscall.c b/tools/testing/selftests/acct/acct_syscall.c
index e44e8fe1f4a3..87c044fb9293 100644
--- a/tools/testing/selftests/acct/acct_syscall.c
+++ b/tools/testing/selftests/acct/acct_syscall.c
@@ -24,7 +24,7 @@ int main(void)
 
 	// Check if test is run a root
 	if (geteuid()) {
-		ksft_test_result_skip("This test needs root to run!\n");
+		ksft_exit_skip("This test needs root to run!\n");
 		return 1;
 	}
 
diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c
index 859345379044..e3cec3723ffa 100644
--- a/tools/testing/selftests/arm64/fp/kernel-test.c
+++ b/tools/testing/selftests/arm64/fp/kernel-test.c
@@ -46,8 +46,6 @@ static void handle_kick_signal(int sig, siginfo_t *info, void *context)
 }
 
 static char *drivers[] = {
-	"crct10dif-arm64-ce",
-	/* "crct10dif-arm64-neon", - Same priority as generic */
 	"sha1-ce",
 	"sha224-arm64",
 	"sha224-arm64-neon",
diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
index 303260a6dc65..3bfcd3848432 100644
--- a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
+++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c
@@ -227,6 +227,8 @@ static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mappin
 int main(int argc, char *argv[])
 {
 	int err;
+	void *map_ptr;
+	unsigned long map_size;
 
 	err = mte_default_setup();
 	if (err)
@@ -243,6 +245,15 @@ int main(int argc, char *argv[])
 		return KSFT_FAIL;
 	}
 
+	/* Check if MTE supports hugetlb mappings */
+	map_size = default_huge_page_size();
+	map_ptr = mmap(NULL, map_size, PROT_READ | PROT_MTE,
+		       MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+	if (map_ptr == MAP_FAILED)
+		ksft_exit_skip("PROT_MTE not supported with MAP_HUGETLB mappings\n");
+	else
+		munmap(map_ptr, map_size);
+
 	/* Set test plan */
 	ksft_set_plan(12);
 
@@ -270,13 +281,13 @@ int main(int argc, char *argv[])
 	"Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
 
 	evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
-		"Check child hugetlb memory with private mapping, precise mode and mmap memory\n");
+		"Check child hugetlb memory with private mapping, sync error mode and mmap memory\n");
 	evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
-		"Check child hugetlb memory with private mapping, precise mode and mmap memory\n");
+		"Check child hugetlb memory with private mapping, async error mode and mmap memory\n");
 	evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
-		"Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n");
+		"Check child hugetlb memory with private mapping, sync error mode and mmap/mprotect memory\n");
 	evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB),
-		"Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n");
+		"Check child hugetlb memory with private mapping, async error mode and mmap/mprotect memory\n");
 
 	mte_restore_setup();
 	free_hugetlb();
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index e9c377001f93..e2a2c46c008b 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -18,7 +18,6 @@ feature
 urandom_read
 test_sockmap
 test_lirc_mode2_user
-test_flow_dissector
 flow_dissector_load
 test_tcpnotify_user
 test_libbpf
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 7eeb3cbe18c7..87551628e112 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -41,7 +41,7 @@ srctree := $(patsubst %/,%,$(dir $(srctree)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 endif
 
-CFLAGS += -g $(OPT_FLAGS) -rdynamic					\
+CFLAGS += -g $(OPT_FLAGS) -rdynamic -std=gnu11				\
 	  -Wall -Werror -fno-omit-frame-pointer				\
 	  $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS)			\
 	  -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)		\
@@ -54,21 +54,6 @@ PCAP_LIBS	:= $(shell $(PKG_CONFIG) --libs libpcap 2>/dev/null)
 LDLIBS += $(PCAP_LIBS)
 CFLAGS += $(PCAP_CFLAGS)
 
-# The following tests perform type punning and they may break strict
-# aliasing rules, which are exploited by both GCC and clang by default
-# while optimizing.  This can lead to broken programs.
-progs/bind4_prog.c-CFLAGS := -fno-strict-aliasing
-progs/bind6_prog.c-CFLAGS := -fno-strict-aliasing
-progs/dynptr_fail.c-CFLAGS := -fno-strict-aliasing
-progs/linked_list_fail.c-CFLAGS := -fno-strict-aliasing
-progs/map_kptr_fail.c-CFLAGS := -fno-strict-aliasing
-progs/syscall.c-CFLAGS := -fno-strict-aliasing
-progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing
-progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing
-progs/timer_crash.c-CFLAGS := -fno-strict-aliasing
-progs/test_global_func9.c-CFLAGS := -fno-strict-aliasing
-progs/verifier_nocsr.c-CFLAGS := -fno-strict-aliasing
-
 # Some utility functions use LLVM libraries
 jit_disasm_helpers.c-CFLAGS = $(LLVM_CFLAGS)
 
@@ -103,18 +88,6 @@ progs/btf_dump_test_case_packing.c-bpf_gcc-CFLAGS := -Wno-error
 progs/btf_dump_test_case_padding.c-bpf_gcc-CFLAGS := -Wno-error
 progs/btf_dump_test_case_syntax.c-bpf_gcc-CFLAGS := -Wno-error
 
-# The following tests do type-punning, via the __imm_insn macro, from
-# `struct bpf_insn' to long and then uses the value.  This triggers an
-# "is used uninitialized" warning in GCC due to strict-aliasing
-# rules.
-progs/verifier_ref_tracking.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_unpriv.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_cgroup_storage.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_ld_ind.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_map_ret_val.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_spill_fill.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_subprog_precision.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_uninit.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
 endif
 
 ifneq ($(CLANG_CPUV4),)
@@ -127,13 +100,10 @@ TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
-	test_xdp_redirect.sh \
 	test_xdp_redirect_multi.sh \
-	test_xdp_meta.sh \
 	test_tunnel.sh \
 	test_lwt_seg6local.sh \
 	test_lirc_mode2.sh \
-	test_flow_dissector.sh \
 	test_xdp_vlan_mode_generic.sh \
 	test_xdp_vlan_mode_native.sh \
 	test_lwt_ip_encap.sh \
@@ -151,17 +121,16 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh ima_setup.sh verify_sig_setup.sh \
 	test_xdp_vlan.sh test_bpftool.py
 
+TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
+	bpf_test_modorder_y.ko
+TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS))
+
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = \
 	bench \
-	bpf_testmod.ko \
-	bpf_test_modorder_x.ko \
-	bpf_test_modorder_y.ko \
-	bpf_test_no_cfi.ko \
 	flow_dissector_load \
 	runqslower \
 	test_cpp \
-	test_flow_dissector \
 	test_lirc_mode2_user \
 	veristat \
 	xdp_features \
@@ -184,8 +153,9 @@ override define CLEAN
 	$(Q)$(RM) -r $(TEST_GEN_PROGS)
 	$(Q)$(RM) -r $(TEST_GEN_PROGS_EXTENDED)
 	$(Q)$(RM) -r $(TEST_GEN_FILES)
+	$(Q)$(RM) -r $(TEST_KMODS)
 	$(Q)$(RM) -r $(EXTRA_CLEAN)
-	$(Q)$(MAKE) -C bpf_testmod clean
+	$(Q)$(MAKE) -C test_kmods clean
 	$(Q)$(MAKE) docs-clean
 endef
 
@@ -203,9 +173,9 @@ ifeq ($(shell expr $(MAKE_VERSION) \>= 4.4), 1)
 $(let OUTPUT,$(OUTPUT)/,\
 	$(eval include ../../../build/Makefile.feature))
 else
-OUTPUT := $(OUTPUT)/
+override OUTPUT := $(OUTPUT)/
 $(eval include ../../../build/Makefile.feature)
-OUTPUT := $(patsubst %/,%,$(OUTPUT))
+override OUTPUT := $(patsubst %/,%,$(OUTPUT))
 endif
 endif
 
@@ -251,7 +221,7 @@ endif
 # to build individual tests.
 # NOTE: Semicolon at the end is critical to override lib.mk's default static
 # rule for binaries.
-$(notdir $(TEST_GEN_PROGS)						\
+$(notdir $(TEST_GEN_PROGS) $(TEST_KMODS)				\
 	 $(TEST_GEN_PROGS_EXTENDED)): %: $(OUTPUT)/% ;
 
 # sort removes libbpf duplicates when not cross-building
@@ -305,37 +275,19 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
 		  $< -o $@ \
 		  $(shell $(PKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
 
-$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
-	$(call msg,MOD,,$@)
-	$(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation
-	$(Q)$(MAKE) $(submake_extras) -C bpf_testmod \
-		RESOLVE_BTFIDS=$(RESOLVE_BTFIDS)     \
-		EXTRA_CFLAGS='' EXTRA_LDFLAGS=''
-	$(Q)cp bpf_testmod/bpf_testmod.ko $@
-
-$(OUTPUT)/bpf_test_no_cfi.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_no_cfi/Makefile bpf_test_no_cfi/*.[ch])
-	$(call msg,MOD,,$@)
-	$(Q)$(RM) bpf_test_no_cfi/bpf_test_no_cfi.ko # force re-compilation
-	$(Q)$(MAKE) $(submake_extras) -C bpf_test_no_cfi \
-		RESOLVE_BTFIDS=$(RESOLVE_BTFIDS)	 \
+# This should really be a grouped target, but make versions before 4.3 don't
+# support that for regular rules. However, pattern matching rules are implicitly
+# treated as grouped even with older versions of make, so as a workaround, the
+# subst() turns the rule into a pattern matching rule
+$(addprefix test_kmods/,$(subst .ko,%ko,$(TEST_KMODS))): $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard test_kmods/Makefile test_kmods/*.[ch])
+	$(Q)$(RM) test_kmods/*.ko test_kmods/*.mod.o # force re-compilation
+	$(Q)$(MAKE) $(submake_extras) -C test_kmods	\
+		RESOLVE_BTFIDS=$(RESOLVE_BTFIDS)	\
 		EXTRA_CFLAGS='' EXTRA_LDFLAGS=''
-	$(Q)cp bpf_test_no_cfi/bpf_test_no_cfi.ko $@
 
-$(OUTPUT)/bpf_test_modorder_x.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_modorder_x/Makefile bpf_test_modorder_x/*.[ch])
+$(TEST_KMOD_TARGETS): $(addprefix test_kmods/,$(TEST_KMODS))
 	$(call msg,MOD,,$@)
-	$(Q)$(RM) bpf_test_modorder_x/bpf_test_modorder_x.ko # force re-compilation
-	$(Q)$(MAKE) $(submake_extras) -C bpf_test_modorder_x \
-		RESOLVE_BTFIDS=$(RESOLVE_BTFIDS)	     \
-		EXTRA_CFLAGS='' EXTRA_LDFLAGS=''
-	$(Q)cp bpf_test_modorder_x/bpf_test_modorder_x.ko $@
-
-$(OUTPUT)/bpf_test_modorder_y.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_modorder_y/Makefile bpf_test_modorder_y/*.[ch])
-	$(call msg,MOD,,$@)
-	$(Q)$(RM) bpf_test_modorder_y/bpf_test_modorder_y.ko # force re-compilation
-	$(Q)$(MAKE) $(submake_extras) -C bpf_test_modorder_y \
-		RESOLVE_BTFIDS=$(RESOLVE_BTFIDS)	     \
-		EXTRA_CFLAGS='' EXTRA_LDFLAGS=''
-	$(Q)cp bpf_test_modorder_y/bpf_test_modorder_y.ko $@
+	$(Q)cp test_kmods/$(@F) $@
 
 
 DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
@@ -480,10 +432,10 @@ $(shell $(1) $(2) -dM -E - </dev/null | grep -E 'MIPS(EL|EB)|_MIPS_SZ(PTR|LONG)
 endef
 
 # Determine target endianness.
-IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
+IS_LITTLE_ENDIAN := $(shell $(CC) -dM -E - </dev/null | \
 			grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
-MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
-BPF_TARGET_ENDIAN=$(if $(IS_LITTLE_ENDIAN),--target=bpfel,--target=bpfeb)
+MENDIAN:=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
+BPF_TARGET_ENDIAN:=$(if $(IS_LITTLE_ENDIAN),--target=bpfel,--target=bpfeb)
 
 ifneq ($(CROSS_COMPILE),)
 CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
@@ -493,6 +445,8 @@ CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
 BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)	\
 	     -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)			\
 	     -I$(abspath $(OUTPUT)/../usr/include)			\
+	     -std=gnu11		 					\
+	     -fno-strict-aliasing 					\
 	     -Wno-compare-distinct-pointer-types
 # TODO: enable me -Wsign-compare
 
@@ -760,14 +714,12 @@ TRUNNER_EXTRA_SOURCES := test_progs.c		\
 			 json_writer.c 		\
 			 flow_dissector_load.h	\
 			 ip_check_defrag_frags.h
-TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
-		       $(OUTPUT)/bpf_test_no_cfi.ko			\
-		       $(OUTPUT)/bpf_test_modorder_x.ko		\
-		       $(OUTPUT)/bpf_test_modorder_y.ko		\
+TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read				\
 		       $(OUTPUT)/liburandom_read.so			\
 		       $(OUTPUT)/xdp_synproxy				\
 		       $(OUTPUT)/sign-file				\
 		       $(OUTPUT)/uprobe_multi				\
+		       $(TEST_KMOD_TARGETS)				\
 		       ima_setup.sh 					\
 		       verify_sig_setup.sh				\
 		       $(wildcard progs/btf_dump_test_case_*.c)		\
@@ -834,9 +786,12 @@ $(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp
 	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
 
 # Make sure we are able to include and link libbpf against c++.
+CXXFLAGS += $(CFLAGS)
+CXXFLAGS := $(subst -D_GNU_SOURCE=,,$(CXXFLAGS))
+CXXFLAGS := $(subst -std=gnu11,-std=gnu++11,$(CXXFLAGS))
 $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
 	$(call msg,CXX,,$@)
-	$(Q)$(CXX) $(subst -D_GNU_SOURCE=,,$(CFLAGS)) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
+	$(Q)$(CXX) $(CXXFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
 
 # Benchmark runner
 $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ)
@@ -894,12 +849,9 @@ $(OUTPUT)/uprobe_multi: uprobe_multi.c uprobe_multi.ld
 
 EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR)			\
 	prog_tests/tests.h map_tests/tests.h verifier/tests.h		\
-	feature bpftool 						\
+	feature bpftool $(TEST_KMOD_TARGETS)				\
 	$(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h	\
-			       no_alu32 cpuv4 bpf_gcc bpf_testmod.ko	\
-			       bpf_test_no_cfi.ko			\
-			       bpf_test_modorder_x.ko			\
-			       bpf_test_modorder_y.ko			\
+			       no_alu32 cpuv4 bpf_gcc			\
 			       liburandom_read.so)			\
 	$(OUTPUT)/FEATURE-DUMP.selftests
 
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
index eb6a4fea8c79..f7f9e7088bb3 100644
--- a/tools/testing/selftests/bpf/Makefile.docs
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -7,12 +7,6 @@ INSTALL ?= install
 RM ?= rm -f
 RMDIR ?= rmdir --ignore-fail-on-non-empty
 
-ifeq ($(V),1)
-  Q =
-else
-  Q = @
-endif
-
 prefix ?= /usr/local
 mandir ?= $(prefix)/man
 man2dir = $(mandir)/man2
diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile b/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile
deleted file mode 100644
index 40b25b98ad1b..000000000000
--- a/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
-KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..)
-
-ifeq ($(V),1)
-Q =
-else
-Q = @
-endif
-
-MODULES = bpf_test_modorder_x.ko
-
-obj-m += bpf_test_modorder_x.o
-
-all:
-	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules
-
-clean:
-	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean
-
diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile b/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile
deleted file mode 100644
index 52c3ab9d84e2..000000000000
--- a/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
-KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..)
-
-ifeq ($(V),1)
-Q =
-else
-Q = @
-endif
-
-MODULES = bpf_test_modorder_y.ko
-
-obj-m += bpf_test_modorder_y.o
-
-all:
-	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules
-
-clean:
-	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean
-
diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile
deleted file mode 100644
index ed5143b79edf..000000000000
--- a/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-BPF_TEST_NO_CFI_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
-KDIR ?= $(abspath $(BPF_TEST_NO_CFI_DIR)/../../../../..)
-
-ifeq ($(V),1)
-Q =
-else
-Q = @
-endif
-
-MODULES = bpf_test_no_cfi.ko
-
-obj-m += bpf_test_no_cfi.o
-
-all:
-	+$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) modules
-
-clean:
-	+$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) clean
-
diff --git a/tools/testing/selftests/bpf/bpf_testmod/Makefile b/tools/testing/selftests/bpf/bpf_testmod/Makefile
deleted file mode 100644
index 15cb36c4483a..000000000000
--- a/tools/testing/selftests/bpf/bpf_testmod/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
-KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..)
-
-ifeq ($(V),1)
-Q =
-else
-Q = @
-endif
-
-MODULES = bpf_testmod.ko
-
-obj-m += bpf_testmod.o
-CFLAGS_bpf_testmod.o = -I$(src)
-
-all:
-	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules
-
-clean:
-	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean
-
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 4ca84c8d9116..c378d5d07e02 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -58,6 +58,7 @@ CONFIG_MPLS=y
 CONFIG_MPLS_IPTUNNEL=y
 CONFIG_MPLS_ROUTING=y
 CONFIG_MPTCP=y
+CONFIG_NET_ACT_GACT=y
 CONFIG_NET_ACT_SKBMOD=y
 CONFIG_NET_CLS=y
 CONFIG_NET_CLS_ACT=y
diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
index 66191ae9863c..79c3ccadb962 100644
--- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
@@ -120,11 +120,12 @@ static void validate_fetch_results(int outer_map_fd,
 
 static void fetch_and_validate(int outer_map_fd,
 			       struct bpf_map_batch_opts *opts,
-			       __u32 batch_size, bool delete_entries)
+			       __u32 batch_size, bool delete_entries,
+			       bool has_holes)
 {
-	__u32 *fetched_keys, *fetched_values, total_fetched = 0;
-	__u32 batch_key = 0, fetch_count, step_size;
-	int err, max_entries = OUTER_MAP_ENTRIES;
+	int err, max_entries = OUTER_MAP_ENTRIES - !!has_holes;
+	__u32 *fetched_keys, *fetched_values, total_fetched = 0, i;
+	__u32 batch_key = 0, fetch_count, step_size = batch_size;
 	__u32 value_size = sizeof(__u32);
 
 	/* Total entries needs to be fetched */
@@ -134,9 +135,8 @@ static void fetch_and_validate(int outer_map_fd,
 	      "Memory allocation failed for fetched_keys or fetched_values",
 	      "error=%s\n", strerror(errno));
 
-	for (step_size = batch_size;
-	     step_size <= max_entries;
-	     step_size += batch_size) {
+	/* hash map may not always return full batch */
+	for (i = 0; i < OUTER_MAP_ENTRIES; i++) {
 		fetch_count = step_size;
 		err = delete_entries
 		      ? bpf_map_lookup_and_delete_batch(outer_map_fd,
@@ -155,6 +155,7 @@ static void fetch_and_validate(int outer_map_fd,
 		if (err && errno == ENOSPC) {
 			/* Fetch again with higher batch size */
 			total_fetched = 0;
+			step_size += batch_size;
 			continue;
 		}
 
@@ -184,18 +185,19 @@ static void fetch_and_validate(int outer_map_fd,
 }
 
 static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
-				  enum bpf_map_type inner_map_type)
+				  enum bpf_map_type inner_map_type,
+				  bool has_holes)
 {
+	__u32 max_entries = OUTER_MAP_ENTRIES - !!has_holes;
 	__u32 *outer_map_keys, *inner_map_fds;
-	__u32 max_entries = OUTER_MAP_ENTRIES;
 	LIBBPF_OPTS(bpf_map_batch_opts, opts);
 	__u32 value_size = sizeof(__u32);
 	int batch_size[2] = {5, 10};
 	__u32 map_index, op_index;
 	int outer_map_fd, ret;
 
-	outer_map_keys = calloc(max_entries, value_size);
-	inner_map_fds = calloc(max_entries, value_size);
+	outer_map_keys = calloc(OUTER_MAP_ENTRIES, value_size);
+	inner_map_fds = calloc(OUTER_MAP_ENTRIES, value_size);
 	CHECK((!outer_map_keys || !inner_map_fds),
 	      "Memory allocation failed for outer_map_keys or inner_map_fds",
 	      "error=%s\n", strerror(errno));
@@ -209,6 +211,24 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
 			((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
 			 ? 9 : 1000) - map_index;
 
+	/* This condition is only meaningful for array of maps.
+	 *
+	 * max_entries == OUTER_MAP_ENTRIES - 1 if it is true. Say
+	 * max_entries is short for n, then outer_map_keys looks like:
+	 *
+	 *   [n, n-1, ... 2, 1]
+	 *
+	 * We change it to
+	 *
+	 *   [n, n-1, ... 2, 0]
+	 *
+	 * So it will leave key 1 as a hole. It will serve to test the
+	 * correctness when batch on an array: a "non-exist" key might be
+	 * actually allocated and returned from key iteration.
+	 */
+	if (has_holes)
+		outer_map_keys[max_entries - 1]--;
+
 	/* batch operation - map_update */
 	ret = bpf_map_update_batch(outer_map_fd, outer_map_keys,
 				   inner_map_fds, &max_entries, &opts);
@@ -219,15 +239,17 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
 	/* batch operation - map_lookup */
 	for (op_index = 0; op_index < 2; ++op_index)
 		fetch_and_validate(outer_map_fd, &opts,
-				   batch_size[op_index], false);
+				   batch_size[op_index], false,
+				   has_holes);
 
 	/* batch operation - map_lookup_delete */
 	if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
 		fetch_and_validate(outer_map_fd, &opts,
-				   max_entries, true /*delete*/);
+				   max_entries, true /*delete*/,
+				   has_holes);
 
 	/* close all map fds */
-	for (map_index = 0; map_index < max_entries; map_index++)
+	for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++)
 		close(inner_map_fds[map_index]);
 	close(outer_map_fd);
 
@@ -237,16 +259,20 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
 
 void test_map_in_map_batch_ops_array(void)
 {
-	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
 	printf("%s:PASS with inner ARRAY map\n", __func__);
-	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, false);
 	printf("%s:PASS with inner HASH map\n", __func__);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, true);
+	printf("%s:PASS with inner ARRAY map with holes\n", __func__);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, true);
+	printf("%s:PASS with inner HASH map with holes\n", __func__);
 }
 
 void test_map_in_map_batch_ops_hash(void)
 {
-	_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
 	printf("%s:PASS with inner ARRAY map\n", __func__);
-	_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH, false);
 	printf("%s:PASS with inner HASH map\n", __func__);
 }
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 27784946b01b..80844a5fb1fe 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -21,7 +21,7 @@
 #include <linux/limits.h>
 
 #include <linux/ip.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 #include <netinet/tcp.h>
 #include <net/if.h>
 
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 5764155b6d25..ebec8a8d6f81 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -14,6 +14,7 @@ typedef __u16 __sum16;
 #include <linux/sockios.h>
 #include <linux/err.h>
 #include <netinet/tcp.h>
+#include <netinet/udp.h>
 #include <bpf/bpf_endian.h>
 #include <net/if.h>
 
@@ -105,6 +106,45 @@ static __u16 csum_fold(__u32 csum)
 	return (__u16)~csum;
 }
 
+static __wsum csum_partial(const void *buf, int len, __wsum sum)
+{
+	__u16 *p = (__u16 *)buf;
+	int num_u16 = len >> 1;
+	int i;
+
+	for (i = 0; i < num_u16; i++)
+		sum += p[i];
+
+	return sum;
+}
+
+static inline __sum16 build_ip_csum(struct iphdr *iph)
+{
+	__u32 sum = 0;
+	__u16 *p;
+
+	iph->check = 0;
+	p = (void *)iph;
+	sum = csum_partial(p, iph->ihl << 2, 0);
+
+	return csum_fold(sum);
+}
+
+/**
+ * csum_tcpudp_magic - compute IP pseudo-header checksum
+ *
+ * Compute the IPv4 pseudo header checksum. The helper can take a
+ * accumulated sum from the transport layer to accumulate it and directly
+ * return the transport layer
+ *
+ * @saddr: IP source address
+ * @daddr: IP dest address
+ * @len: IP data size
+ * @proto: transport layer protocol
+ * @csum: The accumulated partial sum to add to the computation
+ *
+ * Returns the folded sum
+ */
 static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
 					__u32 len, __u8 proto,
 					__wsum csum)
@@ -120,6 +160,21 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
 	return csum_fold((__u32)s);
 }
 
+/**
+ * csum_ipv6_magic - compute IPv6 pseudo-header checksum
+ *
+ * Compute the ipv6 pseudo header checksum. The helper can take a
+ * accumulated sum from the transport layer to accumulate it and directly
+ * return the transport layer
+ *
+ * @saddr: IPv6 source address
+ * @daddr: IPv6 dest address
+ * @len: IPv6 data size
+ * @proto: transport layer protocol
+ * @csum: The accumulated partial sum to add to the computation
+ *
+ * Returns the folded sum
+ */
 static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 				      const struct in6_addr *daddr,
 					__u32 len, __u8 proto,
@@ -139,6 +194,47 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 	return csum_fold((__u32)s);
 }
 
+/**
+ * build_udp_v4_csum - compute UDP checksum for UDP over IPv4
+ *
+ * Compute the checksum to embed in UDP header, composed of the sum of IP
+ * pseudo-header checksum, UDP header checksum and UDP data checksum
+ * @iph IP header
+ * @udph UDP header, which must be immediately followed by UDP data
+ *
+ * Returns the total checksum
+ */
+
+static inline __sum16 build_udp_v4_csum(const struct iphdr *iph,
+					const struct udphdr *udph)
+{
+	unsigned long sum;
+
+	sum = csum_partial(udph, ntohs(udph->len), 0);
+	return csum_tcpudp_magic(iph->saddr, iph->daddr, ntohs(udph->len),
+				 IPPROTO_UDP, sum);
+}
+
+/**
+ * build_udp_v6_csum - compute UDP checksum for UDP over IPv6
+ *
+ * Compute the checksum to embed in UDP header, composed of the sum of IPv6
+ * pseudo-header checksum, UDP header checksum and UDP data checksum
+ * @ip6h IPv6 header
+ * @udph UDP header, which must be immediately followed by UDP data
+ *
+ * Returns the total checksum
+ */
+static inline __sum16 build_udp_v6_csum(const struct ipv6hdr *ip6h,
+					const struct udphdr *udph)
+{
+	unsigned long sum;
+
+	sum = csum_partial(udph, ntohs(udph->len), 0);
+	return csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ntohs(udph->len),
+			       IPPROTO_UDP, sum);
+}
+
 struct tmonitor_ctx;
 
 #ifdef TRAFFIC_MONITOR
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_distill.c b/tools/testing/selftests/bpf/prog_tests/btf_distill.c
index ca84726d5ac1..fb67ae195a73 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_distill.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_distill.c
@@ -385,7 +385,7 @@ static void test_distilled_base_missing_err(void)
 		"[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
 	btf5 = btf__new_empty();
 	if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf"))
-		return;
+		goto cleanup;
 	btf__add_int(btf5, "int", 4, BTF_INT_SIGNED);   /* [1] int */
 	VALIDATE_RAW_BTF(
 		btf5,
@@ -478,7 +478,7 @@ static void test_distilled_base_multi_err2(void)
 		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
 	btf5 = btf__new_empty();
 	if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf"))
-		return;
+		goto cleanup;
 	btf__add_int(btf5, "int", 4, BTF_INT_SIGNED);   /* [1] int */
 	btf__add_int(btf5, "int", 4, BTF_INT_SIGNED);   /* [2] int */
 	VALIDATE_RAW_BTF(
@@ -601,6 +601,76 @@ cleanup:
 	btf__free(base);
 }
 
+/* If a needed composite type, which is the member of composite type
+ * in the split BTF, has a different size in the base BTF we wish to
+ * relocate with, btf__relocate() should error out.
+ */
+static void test_distilled_base_embedded_err(void)
+{
+	struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL;
+
+	btf1 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+		return;
+
+	btf__add_int(btf1, "int", 4, BTF_INT_SIGNED);   /* [1] int */
+	btf__add_struct(btf1, "s1", 4);                 /* [2] struct s1 { */
+	btf__add_field(btf1, "f1", 1, 0, 0);            /*      int f1; */
+							/* } */
+	VALIDATE_RAW_BTF(
+		btf1,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] STRUCT 's1' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0");
+
+	btf2 = btf__new_empty_split(btf1);
+	if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+		goto cleanup;
+
+	btf__add_struct(btf2, "with_embedded", 8);      /* [3] struct with_embedded { */
+	btf__add_field(btf2, "e1", 2, 0, 0);		/*      struct s1 e1; */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] STRUCT 's1' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0",
+		"[3] STRUCT 'with_embedded' size=8 vlen=1\n"
+		"\t'e1' type_id=2 bits_offset=0");
+
+	if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+		       "distilled_base") ||
+	    !ASSERT_OK_PTR(btf3, "distilled_base") ||
+	    !ASSERT_OK_PTR(btf4, "distilled_split") ||
+	    !ASSERT_EQ(2, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+		goto cleanup;
+
+	VALIDATE_RAW_BTF(
+		btf4,
+		"[1] STRUCT 's1' size=4 vlen=0",
+		"[2] STRUCT 'with_embedded' size=8 vlen=1\n"
+		"\t'e1' type_id=1 bits_offset=0");
+
+	btf5 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf"))
+		goto cleanup;
+
+	btf__add_int(btf5, "int", 4, BTF_INT_SIGNED);   /* [1] int */
+	/* struct with the same name but different size */
+	btf__add_struct(btf5, "s1", 8);                 /* [2] struct s1 { */
+	btf__add_field(btf5, "f1", 1, 0, 0);            /*      int f1; */
+							/* } */
+
+	ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split");
+cleanup:
+	btf__free(btf5);
+	btf__free(btf4);
+	btf__free(btf3);
+	btf__free(btf2);
+	btf__free(btf1);
+}
+
 void test_btf_distill(void)
 {
 	if (test__start_subtest("distilled_base"))
@@ -613,6 +683,8 @@ void test_btf_distill(void)
 		test_distilled_base_multi_err();
 	if (test__start_subtest("distilled_base_multi_err2"))
 		test_distilled_base_multi_err2();
+	if (test__start_subtest("distilled_base_embedded_err"))
+		test_distilled_base_embedded_err();
 	if (test__start_subtest("distilled_base_vmlinux"))
 		test_distilled_base_vmlinux();
 	if (test__start_subtest("distilled_endianness"))
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c
new file mode 100644
index 000000000000..e1a90c10db8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "cgroup_skb_direct_packet_access.skel.h"
+
+void test_cgroup_skb_prog_run_direct_packet_access(void)
+{
+	int err;
+	struct cgroup_skb_direct_packet_access *skel;
+	char test_skb[64] = {};
+
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = test_skb,
+		.data_size_in = sizeof(test_skb),
+	);
+
+	skel = cgroup_skb_direct_packet_access__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "cgroup_skb_direct_packet_access__open_and_load"))
+		return;
+
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.direct_packet_access), &topts);
+	ASSERT_OK(err, "bpf_prog_test_run_opts err");
+	ASSERT_EQ(topts.retval, 1, "retval");
+
+	ASSERT_NEQ(skel->bss->data_end, 0, "data_end");
+
+	cgroup_skb_direct_packet_access__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 1c682550e0e7..e10ea92c3fe2 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -2,7 +2,7 @@
 #define _GNU_SOURCE
 #include <test_progs.h>
 #include "progs/core_reloc_types.h"
-#include "bpf_testmod/bpf_testmod.h"
+#include "test_kmods/bpf_testmod.h"
 #include <linux/limits.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c
new file mode 100644
index 000000000000..a1d52e73fb16
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include <linux/btf.h>
+#include <bpf/bpf.h>
+
+#include "../test_btf.h"
+
+static inline int new_map(void)
+{
+	const char *name = NULL;
+	__u32 max_entries = 1;
+	__u32 value_size = 8;
+	__u32 key_size = 4;
+
+	return bpf_map_create(BPF_MAP_TYPE_ARRAY, name,
+			      key_size, value_size,
+			      max_entries, NULL);
+}
+
+static int new_btf(void)
+{
+	struct btf_blob {
+		struct btf_header btf_hdr;
+		__u32 types[8];
+		__u32 str;
+	} raw_btf = {
+		.btf_hdr = {
+			.magic = BTF_MAGIC,
+			.version = BTF_VERSION,
+			.hdr_len = sizeof(struct btf_header),
+			.type_len = sizeof(raw_btf.types),
+			.str_off = offsetof(struct btf_blob, str) - offsetof(struct btf_blob, types),
+			.str_len = sizeof(raw_btf.str),
+		},
+		.types = {
+			/* long */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8),  /* [1] */
+			/* unsigned long */
+			BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),  /* [2] */
+		},
+	};
+
+	return bpf_btf_load(&raw_btf, sizeof(raw_btf), NULL);
+}
+
+#define Close(FD) do {		\
+	if ((FD) >= 0) {	\
+		close(FD);	\
+		FD = -1;	\
+	}			\
+} while(0)
+
+static bool map_exists(__u32 id)
+{
+	int fd;
+
+	fd = bpf_map_get_fd_by_id(id);
+	if (fd >= 0) {
+		close(fd);
+		return true;
+	}
+	return false;
+}
+
+static bool btf_exists(__u32 id)
+{
+	int fd;
+
+	fd = bpf_btf_get_fd_by_id(id);
+	if (fd >= 0) {
+		close(fd);
+		return true;
+	}
+	return false;
+}
+
+static inline int bpf_prog_get_map_ids(int prog_fd, __u32 *nr_map_ids, __u32 *map_ids)
+{
+	__u32 len = sizeof(struct bpf_prog_info);
+	struct bpf_prog_info info;
+	int err;
+
+	memset(&info, 0, len);
+	info.nr_map_ids = *nr_map_ids,
+	info.map_ids = ptr_to_u64(map_ids),
+
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &len);
+	if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+		return -1;
+
+	*nr_map_ids = info.nr_map_ids;
+
+	return 0;
+}
+
+static int __load_test_prog(int map_fd, const int *fd_array, int fd_array_cnt)
+{
+	/* A trivial program which uses one map */
+	struct bpf_insn insns[] = {
+		BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+		BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+	opts.fd_array = fd_array;
+	opts.fd_array_cnt = fd_array_cnt;
+
+	return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, ARRAY_SIZE(insns), &opts);
+}
+
+static int load_test_prog(const int *fd_array, int fd_array_cnt)
+{
+	int map_fd;
+	int ret;
+
+	map_fd = new_map();
+	if (!ASSERT_GE(map_fd, 0, "new_map"))
+		return map_fd;
+
+	ret = __load_test_prog(map_fd, fd_array, fd_array_cnt);
+	close(map_fd);
+	return ret;
+}
+
+static bool check_expected_map_ids(int prog_fd, int expected, __u32 *map_ids, __u32 *nr_map_ids)
+{
+	int err;
+
+	err = bpf_prog_get_map_ids(prog_fd, nr_map_ids, map_ids);
+	if (!ASSERT_OK(err, "bpf_prog_get_map_ids"))
+		return false;
+	if (!ASSERT_EQ(*nr_map_ids, expected, "unexpected nr_map_ids"))
+		return false;
+
+	return true;
+}
+
+/*
+ * Load a program, which uses one map. No fd_array maps are present.
+ * On return only one map is expected to be bound to prog.
+ */
+static void check_fd_array_cnt__no_fd_array(void)
+{
+	__u32 map_ids[16];
+	__u32 nr_map_ids;
+	int prog_fd = -1;
+
+	prog_fd = load_test_prog(NULL, 0);
+	if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+		return;
+	nr_map_ids = ARRAY_SIZE(map_ids);
+	check_expected_map_ids(prog_fd, 1, map_ids, &nr_map_ids);
+	close(prog_fd);
+}
+
+/*
+ * Load a program, which uses one map, and pass two extra, non-equal, maps in
+ * fd_array with fd_array_cnt=2. On return three maps are expected to be bound
+ * to the program.
+ */
+static void check_fd_array_cnt__fd_array_ok(void)
+{
+	int extra_fds[2] = { -1, -1 };
+	__u32 map_ids[16];
+	__u32 nr_map_ids;
+	int prog_fd = -1;
+
+	extra_fds[0] = new_map();
+	if (!ASSERT_GE(extra_fds[0], 0, "new_map"))
+		goto cleanup;
+	extra_fds[1] = new_map();
+	if (!ASSERT_GE(extra_fds[1], 0, "new_map"))
+		goto cleanup;
+	prog_fd = load_test_prog(extra_fds, 2);
+	if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+		goto cleanup;
+	nr_map_ids = ARRAY_SIZE(map_ids);
+	if (!check_expected_map_ids(prog_fd, 3, map_ids, &nr_map_ids))
+		goto cleanup;
+
+	/* maps should still exist when original file descriptors are closed */
+	Close(extra_fds[0]);
+	Close(extra_fds[1]);
+	if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map_ids[0] should exist"))
+		goto cleanup;
+	if (!ASSERT_EQ(map_exists(map_ids[1]), true, "map_ids[1] should exist"))
+		goto cleanup;
+
+	/* some fds might be invalid, so ignore return codes */
+cleanup:
+	Close(extra_fds[1]);
+	Close(extra_fds[0]);
+	Close(prog_fd);
+}
+
+/*
+ * Load a program with a few extra maps duplicated in the fd_array.
+ * After the load maps should only be referenced once.
+ */
+static void check_fd_array_cnt__duplicated_maps(void)
+{
+	int extra_fds[4] = { -1, -1, -1, -1 };
+	__u32 map_ids[16];
+	__u32 nr_map_ids;
+	int prog_fd = -1;
+
+	extra_fds[0] = extra_fds[2] = new_map();
+	if (!ASSERT_GE(extra_fds[0], 0, "new_map"))
+		goto cleanup;
+	extra_fds[1] = extra_fds[3] = new_map();
+	if (!ASSERT_GE(extra_fds[1], 0, "new_map"))
+		goto cleanup;
+	prog_fd = load_test_prog(extra_fds, 4);
+	if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+		goto cleanup;
+	nr_map_ids = ARRAY_SIZE(map_ids);
+	if (!check_expected_map_ids(prog_fd, 3, map_ids, &nr_map_ids))
+		goto cleanup;
+
+	/* maps should still exist when original file descriptors are closed */
+	Close(extra_fds[0]);
+	Close(extra_fds[1]);
+	if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map should exist"))
+		goto cleanup;
+	if (!ASSERT_EQ(map_exists(map_ids[1]), true, "map should exist"))
+		goto cleanup;
+
+	/* some fds might be invalid, so ignore return codes */
+cleanup:
+	Close(extra_fds[1]);
+	Close(extra_fds[0]);
+	Close(prog_fd);
+}
+
+/*
+ * Check that if maps which are referenced by a program are
+ * passed in fd_array, then they will be referenced only once
+ */
+static void check_fd_array_cnt__referenced_maps_in_fd_array(void)
+{
+	int extra_fds[1] = { -1 };
+	__u32 map_ids[16];
+	__u32 nr_map_ids;
+	int prog_fd = -1;
+
+	extra_fds[0] = new_map();
+	if (!ASSERT_GE(extra_fds[0], 0, "new_map"))
+		goto cleanup;
+	prog_fd = __load_test_prog(extra_fds[0], extra_fds, 1);
+	if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+		goto cleanup;
+	nr_map_ids = ARRAY_SIZE(map_ids);
+	if (!check_expected_map_ids(prog_fd, 1, map_ids, &nr_map_ids))
+		goto cleanup;
+
+	/* map should still exist when original file descriptor is closed */
+	Close(extra_fds[0]);
+	if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map should exist"))
+		goto cleanup;
+
+	/* some fds might be invalid, so ignore return codes */
+cleanup:
+	Close(extra_fds[0]);
+	Close(prog_fd);
+}
+
+static int get_btf_id_by_fd(int btf_fd, __u32 *id)
+{
+	struct bpf_btf_info info;
+	__u32 info_len = sizeof(info);
+	int err;
+
+	memset(&info, 0, info_len);
+	err = bpf_btf_get_info_by_fd(btf_fd, &info, &info_len);
+	if (err)
+		return err;
+	if (id)
+		*id = info.id;
+	return 0;
+}
+
+/*
+ * Check that fd_array operates properly for btfs. Namely, to check that
+ * passing a btf fd in fd_array increases its reference count, do the
+ * following:
+ *  1) Create a new btf, it's referenced only by a file descriptor, so refcnt=1
+ *  2) Load a BPF prog with fd_array[0] = btf_fd; now btf's refcnt=2
+ *  3) Close the btf_fd, now refcnt=1
+ * Wait and check that BTF stil exists.
+ */
+static void check_fd_array_cnt__referenced_btfs(void)
+{
+	int extra_fds[1] = { -1 };
+	int prog_fd = -1;
+	__u32 btf_id;
+	int tries;
+	int err;
+
+	extra_fds[0] = new_btf();
+	if (!ASSERT_GE(extra_fds[0], 0, "new_btf"))
+		goto cleanup;
+	prog_fd = load_test_prog(extra_fds, 1);
+	if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+		goto cleanup;
+
+	/* btf should still exist when original file descriptor is closed */
+	err = get_btf_id_by_fd(extra_fds[0], &btf_id);
+	if (!ASSERT_GE(err, 0, "get_btf_id_by_fd"))
+		goto cleanup;
+
+	Close(extra_fds[0]);
+
+	if (!ASSERT_GE(kern_sync_rcu(), 0, "kern_sync_rcu 1"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(btf_exists(btf_id), true, "btf should exist"))
+		goto cleanup;
+
+	Close(prog_fd);
+
+	/* The program is freed by a workqueue, so no reliable
+	 * way to sync, so just wait a bit (max ~1 second). */
+	for (tries = 100; tries >= 0; tries--) {
+		usleep(1000);
+
+		if (!btf_exists(btf_id))
+			break;
+
+		if (tries)
+			continue;
+
+		PRINT_FAIL("btf should have been freed");
+	}
+
+	/* some fds might be invalid, so ignore return codes */
+cleanup:
+	Close(extra_fds[0]);
+	Close(prog_fd);
+}
+
+/*
+ * Test that a program with trash in fd_array can't be loaded:
+ * only map and BTF file descriptors should be accepted.
+ */
+static void check_fd_array_cnt__fd_array_with_trash(void)
+{
+	int extra_fds[3] = { -1, -1, -1 };
+	int prog_fd = -1;
+
+	extra_fds[0] = new_map();
+	if (!ASSERT_GE(extra_fds[0], 0, "new_map"))
+		goto cleanup;
+	extra_fds[1] = new_btf();
+	if (!ASSERT_GE(extra_fds[1], 0, "new_btf"))
+		goto cleanup;
+
+	/* trash 1: not a file descriptor */
+	extra_fds[2] = 0xbeef;
+	prog_fd = load_test_prog(extra_fds, 3);
+	if (!ASSERT_EQ(prog_fd, -EBADF, "prog should have been rejected with -EBADF"))
+		goto cleanup;
+
+	/* trash 2: not a map or btf */
+	extra_fds[2] = socket(AF_INET, SOCK_STREAM, 0);
+	if (!ASSERT_GE(extra_fds[2], 0, "socket"))
+		goto cleanup;
+
+	prog_fd = load_test_prog(extra_fds, 3);
+	if (!ASSERT_EQ(prog_fd, -EINVAL, "prog should have been rejected with -EINVAL"))
+		goto cleanup;
+
+	/* Validate that the prog is ok if trash is removed */
+	Close(extra_fds[2]);
+	extra_fds[2] = new_btf();
+	if (!ASSERT_GE(extra_fds[2], 0, "new_btf"))
+		goto cleanup;
+
+	prog_fd = load_test_prog(extra_fds, 3);
+	if (!ASSERT_GE(prog_fd, 0, "prog should have been loaded"))
+		goto cleanup;
+
+	/* some fds might be invalid, so ignore return codes */
+cleanup:
+	Close(extra_fds[2]);
+	Close(extra_fds[1]);
+	Close(extra_fds[0]);
+}
+
+/*
+ * Test that a program with too big fd_array can't be loaded.
+ */
+static void check_fd_array_cnt__fd_array_too_big(void)
+{
+	int extra_fds[65];
+	int prog_fd = -1;
+	int i;
+
+	for (i = 0; i < 65; i++) {
+		extra_fds[i] = new_map();
+		if (!ASSERT_GE(extra_fds[i], 0, "new_map"))
+			goto cleanup_fds;
+	}
+
+	prog_fd = load_test_prog(extra_fds, 65);
+	ASSERT_EQ(prog_fd, -E2BIG, "prog should have been rejected with -E2BIG");
+
+cleanup_fds:
+	while (i > 0)
+		Close(extra_fds[--i]);
+}
+
+void test_fd_array_cnt(void)
+{
+	if (test__start_subtest("no-fd-array"))
+		check_fd_array_cnt__no_fd_array();
+
+	if (test__start_subtest("fd-array-ok"))
+		check_fd_array_cnt__fd_array_ok();
+
+	if (test__start_subtest("fd-array-dup-input"))
+		check_fd_array_cnt__duplicated_maps();
+
+	if (test__start_subtest("fd-array-ref-maps-in-array"))
+		check_fd_array_cnt__referenced_maps_in_fd_array();
+
+	if (test__start_subtest("fd-array-ref-btfs"))
+		check_fd_array_cnt__referenced_btfs();
+
+	if (test__start_subtest("fd-array-trash-input"))
+		check_fd_array_cnt__fd_array_with_trash();
+
+	if (test__start_subtest("fd-array-2big"))
+		check_fd_array_cnt__fd_array_too_big();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
index d50cbd8040d4..e59af2aa6601 100644
--- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -171,6 +171,10 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel,
 		/* See also arch_adjust_kprobe_addr(). */
 		if (skel->kconfig->CONFIG_X86_KERNEL_IBT)
 			entry_offset = 4;
+		if (skel->kconfig->CONFIG_PPC64 &&
+		    skel->kconfig->CONFIG_KPROBES_ON_FTRACE &&
+		    !skel->kconfig->CONFIG_PPC_FTRACE_OUT_OF_LINE)
+			entry_offset = 4;
 		err = verify_perf_link_info(link_fd, type, kprobe_addr, 0, entry_offset);
 		ASSERT_OK(err, "verify_perf_link_info");
 	} else {
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index cfcc90cb7ffb..08bae13248c4 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -7,39 +7,14 @@
 
 #include "bpf_flow.skel.h"
 
+#define TEST_NS	"flow_dissector_ns"
 #define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */
+#define TEST_NAME_MAX_LEN	64
 
 #ifndef IP_MF
 #define IP_MF 0x2000
 #endif
 
-#define CHECK_FLOW_KEYS(desc, got, expected)				\
-	_CHECK(memcmp(&got, &expected, sizeof(got)) != 0,		\
-	      desc,							\
-	      topts.duration,						\
-	      "nhoff=%u/%u "						\
-	      "thoff=%u/%u "						\
-	      "addr_proto=0x%x/0x%x "					\
-	      "is_frag=%u/%u "						\
-	      "is_first_frag=%u/%u "					\
-	      "is_encap=%u/%u "						\
-	      "ip_proto=0x%x/0x%x "					\
-	      "n_proto=0x%x/0x%x "					\
-	      "flow_label=0x%x/0x%x "					\
-	      "sport=%u/%u "						\
-	      "dport=%u/%u\n",						\
-	      got.nhoff, expected.nhoff,				\
-	      got.thoff, expected.thoff,				\
-	      got.addr_proto, expected.addr_proto,			\
-	      got.is_frag, expected.is_frag,				\
-	      got.is_first_frag, expected.is_first_frag,		\
-	      got.is_encap, expected.is_encap,				\
-	      got.ip_proto, expected.ip_proto,				\
-	      got.n_proto, expected.n_proto,				\
-	      got.flow_label, expected.flow_label,			\
-	      got.sport, expected.sport,				\
-	      got.dport, expected.dport)
-
 struct ipv4_pkt {
 	struct ethhdr eth;
 	struct iphdr iph;
@@ -89,6 +64,19 @@ struct dvlan_ipv6_pkt {
 	struct tcphdr tcp;
 } __packed;
 
+struct gre_base_hdr {
+	__be16 flags;
+	__be16 protocol;
+} gre_base_hdr;
+
+struct gre_minimal_pkt {
+	struct ethhdr eth;
+	struct iphdr iph;
+	struct gre_base_hdr gre_hdr;
+	struct iphdr iph_inner;
+	struct tcphdr tcp;
+} __packed;
+
 struct test {
 	const char *name;
 	union {
@@ -98,6 +86,7 @@ struct test {
 		struct ipv6_pkt ipv6;
 		struct ipv6_frag_pkt ipv6_frag;
 		struct dvlan_ipv6_pkt dvlan_ipv6;
+		struct gre_minimal_pkt gre_minimal;
 	} pkt;
 	struct bpf_flow_keys keys;
 	__u32 flags;
@@ -106,7 +95,6 @@ struct test {
 
 #define VLAN_HLEN	4
 
-static __u32 duration;
 struct test tests[] = {
 	{
 		.name = "ipv4",
@@ -444,8 +432,137 @@ struct test tests[] = {
 		},
 		.retval = BPF_FLOW_DISSECTOR_CONTINUE,
 	},
+	{
+		.name = "ip-gre",
+		.pkt.gre_minimal = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+			.iph.ihl = 5,
+			.iph.protocol = IPPROTO_GRE,
+			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+			.gre_hdr = {
+				.flags = 0,
+				.protocol = __bpf_constant_htons(ETH_P_IP),
+			},
+			.iph_inner.ihl = 5,
+			.iph_inner.protocol = IPPROTO_TCP,
+			.iph_inner.tot_len =
+				__bpf_constant_htons(MAGIC_BYTES -
+				sizeof(struct iphdr)),
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct iphdr) * 2 +
+				 sizeof(struct gre_base_hdr),
+			.addr_proto = ETH_P_IP,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IP),
+			.is_encap = true,
+			.sport = 80,
+			.dport = 8080,
+		},
+		.retval = BPF_OK,
+	},
+	{
+		.name = "ip-gre-no-encap",
+		.pkt.ipip = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+			.iph.ihl = 5,
+			.iph.protocol = IPPROTO_GRE,
+			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+			.iph_inner.ihl = 5,
+			.iph_inner.protocol = IPPROTO_TCP,
+			.iph_inner.tot_len =
+				__bpf_constant_htons(MAGIC_BYTES -
+				sizeof(struct iphdr)),
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct iphdr)
+				 + sizeof(struct gre_base_hdr),
+			.addr_proto = ETH_P_IP,
+			.ip_proto = IPPROTO_GRE,
+			.n_proto = __bpf_constant_htons(ETH_P_IP),
+			.is_encap = true,
+		},
+		.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
+		.retval = BPF_OK,
+	},
 };
 
+void serial_test_flow_dissector_namespace(void)
+{
+	struct bpf_flow *skel;
+	struct nstoken *ns;
+	int err, prog_fd;
+
+	skel = bpf_flow__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
+		return;
+
+	prog_fd = bpf_program__fd(skel->progs._dissect);
+	if (!ASSERT_OK_FD(prog_fd, "get dissector fd"))
+		goto out_destroy_skel;
+
+	/* We must be able to attach a flow dissector to root namespace */
+	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+	if (!ASSERT_OK(err, "attach on root namespace ok"))
+		goto out_destroy_skel;
+
+	err = make_netns(TEST_NS);
+	if (!ASSERT_OK(err, "create non-root net namespace"))
+		goto out_destroy_skel;
+
+	/* We must not be able to additionally attach a flow dissector to a
+	 * non-root net namespace
+	 */
+	ns = open_netns(TEST_NS);
+	if (!ASSERT_OK_PTR(ns, "enter non-root net namespace"))
+		goto out_clean_ns;
+	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+	if (!ASSERT_ERR(err,
+			"refuse new flow dissector in non-root net namespace"))
+		bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+	else
+		ASSERT_EQ(errno, EEXIST,
+			  "refused because of already attached prog");
+	close_netns(ns);
+
+	/* If no flow dissector is attached to the root namespace, we must
+	 * be able to attach one to a non-root net namespace
+	 */
+	bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+	ns = open_netns(TEST_NS);
+	ASSERT_OK_PTR(ns, "enter non-root net namespace");
+	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+	close_netns(ns);
+	ASSERT_OK(err, "accept new flow dissector in non-root net namespace");
+
+	/* If a flow dissector is attached to non-root net namespace, attaching
+	 * a flow dissector to root namespace must fail
+	 */
+	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+	if (!ASSERT_ERR(err, "refuse new flow dissector on root namespace"))
+		bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+	else
+		ASSERT_EQ(errno, EEXIST,
+			  "refused because of already attached prog");
+
+	ns = open_netns(TEST_NS);
+	bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+	close_netns(ns);
+out_clean_ns:
+	remove_netns(TEST_NS);
+out_destroy_skel:
+	bpf_flow__destroy(skel);
+}
+
 static int create_tap(const char *ifname)
 {
 	struct ifreq ifr = {
@@ -533,22 +650,27 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
 	return 0;
 }
 
-static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
+static void run_tests_skb_less(int tap_fd, struct bpf_map *keys,
+			       char *test_suffix)
 {
+	char test_name[TEST_NAME_MAX_LEN];
 	int i, err, keys_fd;
 
 	keys_fd = bpf_map__fd(keys);
-	if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
+	if (!ASSERT_OK_FD(keys_fd, "bpf_map__fd"))
 		return;
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		/* Keep in sync with 'flags' from eth_get_headlen. */
 		__u32 eth_get_headlen_flags =
 			BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
-		LIBBPF_OPTS(bpf_test_run_opts, topts);
 		struct bpf_flow_keys flow_keys = {};
 		__u32 key = (__u32)(tests[i].keys.sport) << 16 |
 			    tests[i].keys.dport;
+		snprintf(test_name, TEST_NAME_MAX_LEN, "%s-%s", tests[i].name,
+			 test_suffix);
+		if (!test__start_subtest(test_name))
+			continue;
 
 		/* For skb-less case we can't pass input flags; run
 		 * only the tests that have a matching set of flags.
@@ -558,78 +680,139 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
 			continue;
 
 		err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
-		CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
+		if (!ASSERT_EQ(err, sizeof(tests[i].pkt), "tx_tap"))
+			continue;
 
 		/* check the stored flow_keys only if BPF_OK expected */
 		if (tests[i].retval != BPF_OK)
 			continue;
 
 		err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
-		ASSERT_OK(err, "bpf_map_lookup_elem");
+		if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+			continue;
 
-		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+		ASSERT_MEMEQ(&flow_keys, &tests[i].keys,
+			     sizeof(struct bpf_flow_keys),
+			     "returned flow keys");
 
 		err = bpf_map_delete_elem(keys_fd, &key);
 		ASSERT_OK(err, "bpf_map_delete_elem");
 	}
 }
 
-static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd)
+void test_flow_dissector_skb_less_direct_attach(void)
 {
-	int err, prog_fd;
+	int err, prog_fd, tap_fd;
+	struct bpf_flow *skel;
+	struct netns_obj *ns;
 
-	prog_fd = bpf_program__fd(skel->progs._dissect);
-	if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
+	ns = netns_new("flow_dissector_skb_less_indirect_attach_ns", true);
+	if (!ASSERT_OK_PTR(ns, "create and open netns"))
 		return;
 
+	skel = bpf_flow__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
+		goto out_clean_ns;
+
+	err = init_prog_array(skel->obj, skel->maps.jmp_table);
+	if (!ASSERT_OK(err, "init_prog_array"))
+		goto out_destroy_skel;
+
+	prog_fd = bpf_program__fd(skel->progs._dissect);
+	if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd"))
+		goto out_destroy_skel;
+
 	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
-	if (CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno))
-		return;
+	if (!ASSERT_OK(err, "bpf_prog_attach"))
+		goto out_destroy_skel;
+
+	tap_fd = create_tap("tap0");
+	if (!ASSERT_OK_FD(tap_fd, "create_tap"))
+		goto out_destroy_skel;
+	err = ifup("tap0");
+	if (!ASSERT_OK(err, "ifup"))
+		goto out_close_tap;
 
-	run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+	run_tests_skb_less(tap_fd, skel->maps.last_dissection,
+			   "non-skb-direct-attach");
 
 	err = bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
-	CHECK(err, "bpf_prog_detach2", "err %d errno %d\n", err, errno);
+	ASSERT_OK(err, "bpf_prog_detach2");
+
+out_close_tap:
+	close(tap_fd);
+out_destroy_skel:
+	bpf_flow__destroy(skel);
+out_clean_ns:
+	netns_free(ns);
 }
 
-static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
+void test_flow_dissector_skb_less_indirect_attach(void)
 {
+	int err, net_fd, tap_fd;
+	struct bpf_flow *skel;
 	struct bpf_link *link;
-	int err, net_fd;
+	struct netns_obj *ns;
 
-	net_fd = open("/proc/self/ns/net", O_RDONLY);
-	if (CHECK(net_fd < 0, "open(/proc/self/ns/net)", "err %d\n", errno))
+	ns = netns_new("flow_dissector_skb_less_indirect_attach_ns", true);
+	if (!ASSERT_OK_PTR(ns, "create and open netns"))
 		return;
 
+	skel = bpf_flow__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
+		goto out_clean_ns;
+
+	net_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (!ASSERT_OK_FD(net_fd, "open(/proc/self/ns/net"))
+		goto out_destroy_skel;
+
+	err = init_prog_array(skel->obj, skel->maps.jmp_table);
+	if (!ASSERT_OK(err, "init_prog_array"))
+		goto out_destroy_skel;
+
+	tap_fd = create_tap("tap0");
+	if (!ASSERT_OK_FD(tap_fd, "create_tap"))
+		goto out_close_ns;
+	err = ifup("tap0");
+	if (!ASSERT_OK(err, "ifup"))
+		goto out_close_tap;
+
 	link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
 	if (!ASSERT_OK_PTR(link, "attach_netns"))
-		goto out_close;
+		goto out_close_tap;
 
-	run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+	run_tests_skb_less(tap_fd, skel->maps.last_dissection,
+			   "non-skb-indirect-attach");
 
 	err = bpf_link__destroy(link);
-	CHECK(err, "bpf_link__destroy", "err %d\n", err);
-out_close:
+	ASSERT_OK(err, "bpf_link__destroy");
+
+out_close_tap:
+	close(tap_fd);
+out_close_ns:
 	close(net_fd);
+out_destroy_skel:
+	bpf_flow__destroy(skel);
+out_clean_ns:
+	netns_free(ns);
 }
 
-void test_flow_dissector(void)
+void test_flow_dissector_skb(void)
 {
-	int i, err, prog_fd, keys_fd = -1, tap_fd;
+	char test_name[TEST_NAME_MAX_LEN];
 	struct bpf_flow *skel;
+	int i, err, prog_fd;
 
 	skel = bpf_flow__open_and_load();
-	if (CHECK(!skel, "skel", "failed to open/load skeleton\n"))
+	if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
 		return;
 
-	prog_fd = bpf_program__fd(skel->progs._dissect);
-	if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
-		goto out_destroy_skel;
-	keys_fd = bpf_map__fd(skel->maps.last_dissection);
-	if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
-		goto out_destroy_skel;
 	err = init_prog_array(skel->obj, skel->maps.jmp_table);
-	if (CHECK(err, "init_prog_array", "err %d\n", err))
+	if (!ASSERT_OK(err, "init_prog_array"))
+		goto out_destroy_skel;
+
+	prog_fd = bpf_program__fd(skel->progs._dissect);
+	if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd"))
 		goto out_destroy_skel;
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
@@ -641,6 +824,10 @@ void test_flow_dissector(void)
 		);
 		static struct bpf_flow_keys ctx = {};
 
+		snprintf(test_name, TEST_NAME_MAX_LEN, "%s-skb", tests[i].name);
+		if (!test__start_subtest(test_name))
+			continue;
+
 		if (tests[i].flags) {
 			topts.ctx_in = &ctx;
 			topts.ctx_size_in = sizeof(ctx);
@@ -656,26 +843,12 @@ void test_flow_dissector(void)
 			continue;
 		ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
 			  "test_run data_size_out");
-		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+		ASSERT_MEMEQ(&flow_keys, &tests[i].keys,
+			     sizeof(struct bpf_flow_keys),
+			     "returned flow keys");
 	}
 
-	/* Do the same tests but for skb-less flow dissector.
-	 * We use a known path in the net/tun driver that calls
-	 * eth_get_headlen and we manually export bpf_flow_keys
-	 * via BPF map in this case.
-	 */
-
-	tap_fd = create_tap("tap0");
-	CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno);
-	err = ifup("tap0");
-	CHECK(err, "ifup", "err %d errno %d\n", err, errno);
-
-	/* Test direct prog attachment */
-	test_skb_less_prog_attach(skel, tap_fd);
-	/* Test indirect prog attachment via link */
-	test_skb_less_link_create(skel, tap_fd);
-
-	close(tap_fd);
 out_destroy_skel:
 	bpf_flow__destroy(skel);
 }
+
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
new file mode 100644
index 000000000000..80b153d3ddec
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
@@ -0,0 +1,797 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <bpf/bpf.h>
+#include <linux/bpf.h>
+#include <bpf/libbpf.h>
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "bpf_util.h"
+#include "bpf_flow.skel.h"
+
+#define CFG_PORT_INNER 8000
+#define CFG_PORT_GUE 6080
+#define SUBTEST_NAME_MAX_LEN 32
+#define TEST_NAME_MAX_LEN (32 + SUBTEST_NAME_MAX_LEN)
+#define MAX_SOURCE_PORTS 3
+#define TEST_PACKETS_COUNT 10
+#define TEST_PACKET_LEN 100
+#define TEST_PACKET_PATTERN 'a'
+#define TEST_IPV4 "192.168.0.1/32"
+#define TEST_IPV6 "100::a/128"
+#define TEST_TUNNEL_REMOTE "127.0.0.2"
+#define TEST_TUNNEL_LOCAL "127.0.0.1"
+
+#define INIT_ADDR4(addr4, port)					\
+	{							\
+		.sin_family = AF_INET,				\
+		.sin_port = __constant_htons(port),		\
+		.sin_addr.s_addr = __constant_htonl(addr4),	\
+	}
+
+#define INIT_ADDR6(addr6, port)				\
+	{						\
+		.sin6_family = AF_INET6,		\
+		.sin6_port = __constant_htons(port),	\
+		.sin6_addr = addr6,			\
+	}
+#define TEST_IN4_SRC_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK + 2, 0)
+#define TEST_IN4_DST_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK, CFG_PORT_INNER)
+#define TEST_OUT4_SRC_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK + 1, 0)
+#define TEST_OUT4_DST_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK, 0)
+
+#define TEST_IN6_SRC_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0)
+#define TEST_IN6_DST_ADDR_DEFAULT \
+	INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
+#define TEST_OUT6_SRC_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0)
+#define TEST_OUT6_DST_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0)
+
+#define TEST_IN4_SRC_ADDR_DISSECT_CONTINUE INIT_ADDR4(INADDR_LOOPBACK + 126, 0)
+#define TEST_IN4_SRC_ADDR_IPIP INIT_ADDR4((in_addr_t)0x01010101, 0)
+#define TEST_IN4_DST_ADDR_IPIP INIT_ADDR4((in_addr_t)0xC0A80001, CFG_PORT_INNER)
+
+struct grehdr {
+	uint16_t unused;
+	uint16_t protocol;
+} __packed;
+
+struct guehdr {
+	union {
+		struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+			__u8 hlen : 5, control : 1, version : 2;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+			__u8 version : 2, control : 1, hlen : 5;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+			__u8 proto_ctype;
+			__be16 flags;
+		};
+		__be32 word;
+	};
+};
+
+static char buf[ETH_DATA_LEN];
+
+struct test_configuration {
+	char name[SUBTEST_NAME_MAX_LEN];
+	int (*test_setup)(void);
+	void (*test_teardown)(void);
+	int source_ports[MAX_SOURCE_PORTS];
+	int cfg_l3_inner;
+	struct sockaddr_in in_saddr4;
+	struct sockaddr_in in_daddr4;
+	struct sockaddr_in6 in_saddr6;
+	struct sockaddr_in6 in_daddr6;
+	int cfg_l3_outer;
+	struct sockaddr_in out_saddr4;
+	struct sockaddr_in out_daddr4;
+	struct sockaddr_in6 out_saddr6;
+	struct sockaddr_in6 out_daddr6;
+	int cfg_encap_proto;
+	uint8_t cfg_dsfield_inner;
+	uint8_t cfg_dsfield_outer;
+	int cfg_l3_extra;
+	struct sockaddr_in extra_saddr4;
+	struct sockaddr_in extra_daddr4;
+	struct sockaddr_in6 extra_saddr6;
+	struct sockaddr_in6 extra_daddr6;
+};
+
+static unsigned long util_gettime(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void build_ipv4_header(void *header, uint8_t proto, uint32_t src,
+			      uint32_t dst, int payload_len, uint8_t tos)
+{
+	struct iphdr *iph = header;
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->tos = tos;
+	iph->ttl = 8;
+	iph->tot_len = htons(sizeof(*iph) + payload_len);
+	iph->id = htons(1337);
+	iph->protocol = proto;
+	iph->saddr = src;
+	iph->daddr = dst;
+	iph->check = build_ip_csum((void *)iph);
+}
+
+static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
+{
+	uint16_t val, *ptr = (uint16_t *)ip6h;
+
+	val = ntohs(*ptr);
+	val &= 0xF00F;
+	val |= ((uint16_t)dsfield) << 4;
+	*ptr = htons(val);
+}
+
+static void build_ipv6_header(void *header, uint8_t proto,
+			      const struct sockaddr_in6 *src,
+			      const struct sockaddr_in6 *dst, int payload_len,
+			      uint8_t dsfield)
+{
+	struct ipv6hdr *ip6h = header;
+
+	ip6h->version = 6;
+	ip6h->payload_len = htons(payload_len);
+	ip6h->nexthdr = proto;
+	ip6h->hop_limit = 8;
+	ipv6_set_dsfield(ip6h, dsfield);
+
+	memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
+	memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
+}
+
+static void build_udp_header(void *header, int payload_len, uint16_t sport,
+			     uint16_t dport, int family)
+{
+	struct udphdr *udph = header;
+	int len = sizeof(*udph) + payload_len;
+
+	udph->source = htons(sport);
+	udph->dest = htons(dport);
+	udph->len = htons(len);
+	udph->check = 0;
+	if (family == AF_INET)
+		udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
+						udph);
+	else
+		udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
+						udph);
+}
+
+static void build_gue_header(void *header, uint8_t proto)
+{
+	struct guehdr *gueh = header;
+
+	gueh->proto_ctype = proto;
+}
+
+static void build_gre_header(void *header, uint16_t proto)
+{
+	struct grehdr *greh = header;
+
+	greh->protocol = htons(proto);
+}
+
+static int l3_length(int family)
+{
+	if (family == AF_INET)
+		return sizeof(struct iphdr);
+	else
+		return sizeof(struct ipv6hdr);
+}
+
+static int build_packet(const struct test_configuration *test, uint16_t sport)
+{
+	int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
+	int el3_len = 0, packet_len;
+
+	memset(buf, 0, ETH_DATA_LEN);
+
+	if (test->cfg_l3_extra)
+		el3_len = l3_length(test->cfg_l3_extra);
+
+	/* calculate header offsets */
+	if (test->cfg_encap_proto) {
+		ol3_len = l3_length(test->cfg_l3_outer);
+
+		if (test->cfg_encap_proto == IPPROTO_GRE)
+			ol4_len = sizeof(struct grehdr);
+		else if (test->cfg_encap_proto == IPPROTO_UDP)
+			ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
+	}
+
+	il3_len = l3_length(test->cfg_l3_inner);
+	il4_len = sizeof(struct udphdr);
+
+	packet_len = el3_len + ol3_len + ol4_len + il3_len + il4_len +
+		     TEST_PACKET_LEN;
+	if (!ASSERT_LE(packet_len, sizeof(buf), "check packet size"))
+		return -1;
+
+	/*
+	 * Fill packet from inside out, to calculate correct checksums.
+	 * But create ip before udp headers, as udp uses ip for pseudo-sum.
+	 */
+	memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
+	       TEST_PACKET_PATTERN, TEST_PACKET_LEN);
+
+	/* add zero byte for udp csum padding */
+	buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + TEST_PACKET_LEN] =
+		0;
+
+	switch (test->cfg_l3_inner) {
+	case PF_INET:
+		build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
+				  IPPROTO_UDP, test->in_saddr4.sin_addr.s_addr,
+				  test->in_daddr4.sin_addr.s_addr,
+				  il4_len + TEST_PACKET_LEN,
+				  test->cfg_dsfield_inner);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
+				  IPPROTO_UDP, &test->in_saddr6,
+				  &test->in_daddr6, il4_len + TEST_PACKET_LEN,
+				  test->cfg_dsfield_inner);
+		break;
+	}
+
+	build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
+			 TEST_PACKET_LEN, sport, CFG_PORT_INNER,
+			 test->cfg_l3_inner);
+
+	if (!test->cfg_encap_proto)
+		return il3_len + il4_len + TEST_PACKET_LEN;
+
+	switch (test->cfg_l3_outer) {
+	case PF_INET:
+		build_ipv4_header(buf + el3_len, test->cfg_encap_proto,
+				  test->out_saddr4.sin_addr.s_addr,
+				  test->out_daddr4.sin_addr.s_addr,
+				  ol4_len + il3_len + il4_len + TEST_PACKET_LEN,
+				  test->cfg_dsfield_outer);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf + el3_len, test->cfg_encap_proto,
+				  &test->out_saddr6, &test->out_daddr6,
+				  ol4_len + il3_len + il4_len + TEST_PACKET_LEN,
+				  test->cfg_dsfield_outer);
+		break;
+	}
+
+	switch (test->cfg_encap_proto) {
+	case IPPROTO_UDP:
+		build_gue_header(buf + el3_len + ol3_len + ol4_len -
+					 sizeof(struct guehdr),
+				 test->cfg_l3_inner == PF_INET ? IPPROTO_IPIP :
+								 IPPROTO_IPV6);
+		build_udp_header(buf + el3_len + ol3_len,
+				 sizeof(struct guehdr) + il3_len + il4_len +
+					 TEST_PACKET_LEN,
+				 sport, CFG_PORT_GUE, test->cfg_l3_outer);
+		break;
+	case IPPROTO_GRE:
+		build_gre_header(buf + el3_len + ol3_len,
+				 test->cfg_l3_inner == PF_INET ? ETH_P_IP :
+								 ETH_P_IPV6);
+		break;
+	}
+
+	switch (test->cfg_l3_extra) {
+	case PF_INET:
+		build_ipv4_header(buf,
+				  test->cfg_l3_outer == PF_INET ? IPPROTO_IPIP :
+								  IPPROTO_IPV6,
+				  test->extra_saddr4.sin_addr.s_addr,
+				  test->extra_daddr4.sin_addr.s_addr,
+				  ol3_len + ol4_len + il3_len + il4_len +
+					  TEST_PACKET_LEN,
+				  0);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf,
+				  test->cfg_l3_outer == PF_INET ? IPPROTO_IPIP :
+								  IPPROTO_IPV6,
+				  &test->extra_saddr6, &test->extra_daddr6,
+				  ol3_len + ol4_len + il3_len + il4_len +
+					  TEST_PACKET_LEN,
+				  0);
+		break;
+	}
+
+	return el3_len + ol3_len + ol4_len + il3_len + il4_len +
+	       TEST_PACKET_LEN;
+}
+
+/* sender transmits encapsulated over RAW or unencap'd over UDP */
+static int setup_tx(const struct test_configuration *test)
+{
+	int family, fd, ret;
+
+	if (test->cfg_l3_extra)
+		family = test->cfg_l3_extra;
+	else if (test->cfg_l3_outer)
+		family = test->cfg_l3_outer;
+	else
+		family = test->cfg_l3_inner;
+
+	fd = socket(family, SOCK_RAW, IPPROTO_RAW);
+	if (!ASSERT_OK_FD(fd, "setup tx socket"))
+		return fd;
+
+	if (test->cfg_l3_extra) {
+		if (test->cfg_l3_extra == PF_INET)
+			ret = connect(fd, (void *)&test->extra_daddr4,
+				      sizeof(test->extra_daddr4));
+		else
+			ret = connect(fd, (void *)&test->extra_daddr6,
+				      sizeof(test->extra_daddr6));
+		if (!ASSERT_OK(ret, "connect")) {
+			close(fd);
+			return ret;
+		}
+	} else if (test->cfg_l3_outer) {
+		/* connect to destination if not encapsulated */
+		if (test->cfg_l3_outer == PF_INET)
+			ret = connect(fd, (void *)&test->out_daddr4,
+				      sizeof(test->out_daddr4));
+		else
+			ret = connect(fd, (void *)&test->out_daddr6,
+				      sizeof(test->out_daddr6));
+		if (!ASSERT_OK(ret, "connect")) {
+			close(fd);
+			return ret;
+		}
+	} else {
+		/* otherwise using loopback */
+		if (test->cfg_l3_inner == PF_INET)
+			ret = connect(fd, (void *)&test->in_daddr4,
+				      sizeof(test->in_daddr4));
+		else
+			ret = connect(fd, (void *)&test->in_daddr6,
+				      sizeof(test->in_daddr6));
+		if (!ASSERT_OK(ret, "connect")) {
+			close(fd);
+			return ret;
+		}
+	}
+
+	return fd;
+}
+
+/* receiver reads unencapsulated UDP */
+static int setup_rx(const struct test_configuration *test)
+{
+	int fd, ret;
+
+	fd = socket(test->cfg_l3_inner, SOCK_DGRAM, 0);
+	if (!ASSERT_OK_FD(fd, "socket rx"))
+		return fd;
+
+	if (test->cfg_l3_inner == PF_INET)
+		ret = bind(fd, (void *)&test->in_daddr4,
+			   sizeof(test->in_daddr4));
+	else
+		ret = bind(fd, (void *)&test->in_daddr6,
+			   sizeof(test->in_daddr6));
+	if (!ASSERT_OK(ret, "bind rx")) {
+		close(fd);
+		return ret;
+	}
+
+	return fd;
+}
+
+static int do_tx(int fd, const char *pkt, int len)
+{
+	int ret;
+
+	ret = write(fd, pkt, len);
+	return ret != len;
+}
+
+static int do_poll(int fd, short events, int timeout)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.fd = fd;
+	pfd.events = events;
+
+	ret = poll(&pfd, 1, timeout);
+	return ret;
+}
+
+static int do_rx(int fd)
+{
+	char rbuf;
+	int ret, num = 0;
+
+	while (1) {
+		ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
+		if (ret == -1 && errno == EAGAIN)
+			break;
+		if (ret < 0)
+			return -1;
+		if (!ASSERT_EQ(rbuf, TEST_PACKET_PATTERN, "check pkt pattern"))
+			return -1;
+		num++;
+	}
+
+	return num;
+}
+
+static int run_test(const struct test_configuration *test,
+		    int source_port_index)
+{
+	int fdt = -1, fdr = -1, len, tx = 0, rx = 0, err;
+	unsigned long tstop, tcur;
+
+	fdr = setup_rx(test);
+	fdt = setup_tx(test);
+	if (!ASSERT_OK_FD(fdr, "setup rx") || !ASSERT_OK_FD(fdt, "setup tx")) {
+		err = -1;
+		goto out_close_sockets;
+	}
+
+	len = build_packet(test,
+			   (uint16_t)test->source_ports[source_port_index]);
+	if (!ASSERT_GT(len, 0, "build test packet"))
+		return -1;
+
+	tcur = util_gettime();
+	tstop = tcur;
+
+	while (tx < TEST_PACKETS_COUNT) {
+		if (!ASSERT_OK(do_tx(fdt, buf, len), "do_tx"))
+			break;
+		tx++;
+		err = do_rx(fdr);
+		if (!ASSERT_GE(err, 0, "do_rx"))
+			break;
+		rx += err;
+	}
+
+	/* read straggler packets, if any */
+	if (rx < tx) {
+		tstop = util_gettime() + 100;
+		while (rx < tx) {
+			tcur = util_gettime();
+			if (tcur >= tstop)
+				break;
+
+			err = do_poll(fdr, POLLIN, tstop - tcur);
+			if (err < 0)
+				break;
+			err = do_rx(fdr);
+			if (err >= 0)
+				rx += err;
+		}
+	}
+
+out_close_sockets:
+	close(fdt);
+	close(fdr);
+	return rx;
+}
+
+static int attach_and_configure_program(struct bpf_flow *skel)
+{
+	struct bpf_map *prog_array = skel->maps.jmp_table;
+	int main_prog_fd, sub_prog_fd, map_fd, i, err;
+	struct bpf_program *prog;
+	char prog_name[32];
+
+	main_prog_fd = bpf_program__fd(skel->progs._dissect);
+	if (main_prog_fd < 0)
+		return main_prog_fd;
+
+	err = bpf_prog_attach(main_prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+	if (err)
+		return err;
+
+	map_fd = bpf_map__fd(prog_array);
+	if (map_fd < 0)
+		return map_fd;
+
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+		snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
+
+		prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+		if (!prog)
+			return -1;
+
+		sub_prog_fd = bpf_program__fd(prog);
+		if (sub_prog_fd < 0)
+			return -1;
+
+		err = bpf_map_update_elem(map_fd, &i, &sub_prog_fd, BPF_ANY);
+		if (err)
+			return -1;
+	}
+
+	return main_prog_fd;
+}
+
+static void detach_program(struct bpf_flow *skel, int prog_fd)
+{
+	bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+}
+
+static int set_port_drop(int pf, bool multi_port)
+{
+	char dst_port[16];
+
+	snprintf(dst_port, sizeof(dst_port), "%d", CFG_PORT_INNER);
+
+	SYS(fail, "tc qdisc add dev lo ingress");
+	SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s %s %s",
+	    "dev lo",
+	    "parent FFFF:",
+	    "protocol", pf == PF_INET6 ? "ipv6" : "ip",
+	    "pref 1337",
+	    "flower",
+	    "ip_proto udp",
+	    "src_port", multi_port ? "8-10" : "9",
+	    "dst_port", dst_port,
+	    "action drop");
+	return 0;
+
+fail_delete_qdisc:
+	SYS_NOFAIL("tc qdisc del dev lo ingress");
+fail:
+	return 1;
+}
+
+static void remove_filter(void)
+{
+	SYS_NOFAIL("tc filter del dev lo ingress");
+	SYS_NOFAIL("tc qdisc del dev lo ingress");
+}
+
+static int ipv4_setup(void)
+{
+	return set_port_drop(PF_INET, false);
+}
+
+static int ipv6_setup(void)
+{
+	return set_port_drop(PF_INET6, false);
+}
+
+static int port_range_setup(void)
+{
+	return set_port_drop(PF_INET, true);
+}
+
+static int set_addresses(void)
+{
+	SYS(out, "ip -4 addr add  %s dev lo", TEST_IPV4);
+	SYS(out_remove_ipv4, "ip -6 addr add %s dev lo", TEST_IPV6);
+	return 0;
+out_remove_ipv4:
+	SYS_NOFAIL("ip -4 addr del %s dev lo", TEST_IPV4);
+out:
+	return -1;
+}
+
+static void unset_addresses(void)
+{
+	SYS_NOFAIL("ip -4 addr del %s dev lo", TEST_IPV4);
+	SYS_NOFAIL("ip -6 addr del %s dev lo", TEST_IPV6);
+}
+
+static int ipip_setup(void)
+{
+	if (!ASSERT_OK(set_addresses(), "configure addresses"))
+		return -1;
+	if (!ASSERT_OK(set_port_drop(PF_INET, false), "set filter"))
+		goto out_unset_addresses;
+	SYS(out_remove_filter,
+	    "ip link add ipip_test type ipip remote %s local %s dev lo",
+	    TEST_TUNNEL_REMOTE, TEST_TUNNEL_LOCAL);
+	SYS(out_clean_netif, "ip link set ipip_test up");
+	return 0;
+
+out_clean_netif:
+	SYS_NOFAIL("ip link del ipip_test");
+out_remove_filter:
+	remove_filter();
+out_unset_addresses:
+	unset_addresses();
+	return -1;
+}
+
+static void ipip_shutdown(void)
+{
+	SYS_NOFAIL("ip link del ipip_test");
+	remove_filter();
+	unset_addresses();
+}
+
+static int gre_setup(void)
+{
+	if (!ASSERT_OK(set_addresses(), "configure addresses"))
+		return -1;
+	if (!ASSERT_OK(set_port_drop(PF_INET, false), "set filter"))
+		goto out_unset_addresses;
+	SYS(out_remove_filter,
+	    "ip link add gre_test type gre remote %s local %s dev lo",
+	    TEST_TUNNEL_REMOTE, TEST_TUNNEL_LOCAL);
+	SYS(out_clean_netif, "ip link set gre_test up");
+	return 0;
+
+out_clean_netif:
+	SYS_NOFAIL("ip link del ipip_test");
+out_remove_filter:
+	remove_filter();
+out_unset_addresses:
+	unset_addresses();
+	return -1;
+}
+
+static void gre_shutdown(void)
+{
+	SYS_NOFAIL("ip link del gre_test");
+	remove_filter();
+	unset_addresses();
+}
+
+static const struct test_configuration tests_input[] = {
+	{
+		.name = "ipv4",
+		.test_setup = ipv4_setup,
+		.test_teardown = remove_filter,
+		.source_ports = { 8, 9, 10 },
+		.cfg_l3_inner = PF_INET,
+		.in_saddr4 = TEST_IN4_SRC_ADDR_DEFAULT,
+		.in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT
+	},
+	{
+		.name = "ipv4_continue_dissect",
+		.test_setup = ipv4_setup,
+		.test_teardown = remove_filter,
+		.source_ports = { 8, 9, 10 },
+		.cfg_l3_inner = PF_INET,
+		.in_saddr4 = TEST_IN4_SRC_ADDR_DISSECT_CONTINUE,
+		.in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT },
+	{
+		.name = "ipip",
+		.test_setup = ipip_setup,
+		.test_teardown = ipip_shutdown,
+		.source_ports = { 8, 9, 10 },
+		.cfg_l3_inner = PF_INET,
+		.in_saddr4 = TEST_IN4_SRC_ADDR_IPIP,
+		.in_daddr4 = TEST_IN4_DST_ADDR_IPIP,
+		.out_saddr4 = TEST_OUT4_SRC_ADDR_DEFAULT,
+		.out_daddr4 = TEST_OUT4_DST_ADDR_DEFAULT,
+		.cfg_l3_outer = PF_INET,
+		.cfg_encap_proto = IPPROTO_IPIP,
+
+	},
+	{
+		.name = "gre",
+		.test_setup = gre_setup,
+		.test_teardown = gre_shutdown,
+		.source_ports = { 8, 9, 10 },
+		.cfg_l3_inner = PF_INET,
+		.in_saddr4 = TEST_IN4_SRC_ADDR_IPIP,
+		.in_daddr4 = TEST_IN4_DST_ADDR_IPIP,
+		.out_saddr4 = TEST_OUT4_SRC_ADDR_DEFAULT,
+		.out_daddr4 = TEST_OUT4_DST_ADDR_DEFAULT,
+		.cfg_l3_outer = PF_INET,
+		.cfg_encap_proto = IPPROTO_GRE,
+	},
+	{
+		.name = "port_range",
+		.test_setup = port_range_setup,
+		.test_teardown = remove_filter,
+		.source_ports = { 7, 9, 11 },
+		.cfg_l3_inner = PF_INET,
+		.in_saddr4 = TEST_IN4_SRC_ADDR_DEFAULT,
+		.in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT },
+	{
+		.name = "ipv6",
+		.test_setup = ipv6_setup,
+		.test_teardown = remove_filter,
+		.source_ports = { 8, 9, 10 },
+		.cfg_l3_inner = PF_INET6,
+		.in_saddr6 = TEST_IN6_SRC_ADDR_DEFAULT,
+		.in_daddr6 = TEST_IN6_DST_ADDR_DEFAULT
+	},
+};
+
+struct test_ctx {
+	struct bpf_flow *skel;
+	struct netns_obj *ns;
+	int prog_fd;
+};
+
+static int test_global_init(struct test_ctx *ctx)
+{
+	int err;
+
+	ctx->skel = bpf_flow__open_and_load();
+	if (!ASSERT_OK_PTR(ctx->skel, "open and load flow_dissector"))
+		return -1;
+
+	ctx->ns = netns_new("flow_dissector_classification", true);
+	if (!ASSERT_OK_PTR(ctx->ns, "switch ns"))
+		goto out_destroy_skel;
+
+	err = write_sysctl("/proc/sys/net/ipv4/conf/default/rp_filter", "0");
+	err |= write_sysctl("/proc/sys/net/ipv4/conf/all/rp_filter", "0");
+	err |= write_sysctl("/proc/sys/net/ipv4/conf/lo/rp_filter", "0");
+	if (!ASSERT_OK(err, "configure net tunables"))
+		goto out_clean_ns;
+
+	ctx->prog_fd = attach_and_configure_program(ctx->skel);
+	if (!ASSERT_OK_FD(ctx->prog_fd, "attach and configure program"))
+		goto out_clean_ns;
+	return 0;
+out_clean_ns:
+	netns_free(ctx->ns);
+out_destroy_skel:
+	bpf_flow__destroy(ctx->skel);
+	return -1;
+}
+
+static void test_global_shutdown(struct test_ctx *ctx)
+{
+	detach_program(ctx->skel, ctx->prog_fd);
+	netns_free(ctx->ns);
+	bpf_flow__destroy(ctx->skel);
+}
+
+void test_flow_dissector_classification(void)
+{
+	struct test_ctx ctx;
+	const struct test_configuration *test;
+	int i;
+
+	if (test_global_init(&ctx))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(tests_input); i++) {
+		if (!test__start_subtest(tests_input[i].name))
+			continue;
+		test = &tests_input[i];
+		/* All tests are expected to have one rx-ok port first,
+		 * then a non-working rx port, and finally a rx-ok port
+		 */
+		if (test->test_setup &&
+		    !ASSERT_OK(test->test_setup(), "init filter"))
+			continue;
+
+		ASSERT_EQ(run_test(test, 0), TEST_PACKETS_COUNT,
+			  "test first port");
+		ASSERT_EQ(run_test(test, 1), 0, "test second port");
+		ASSERT_EQ(run_test(test, 2), TEST_PACKETS_COUNT,
+			  "test third port");
+		if (test->test_teardown)
+			test->test_teardown();
+	}
+	test_global_shutdown(&ctx);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/free_timer.c b/tools/testing/selftests/bpf/prog_tests/free_timer.c
new file mode 100644
index 000000000000..b7b77a6b2979
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/free_timer.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <test_progs.h>
+
+#include "free_timer.skel.h"
+
+struct run_ctx {
+	struct bpf_program *start_prog;
+	struct bpf_program *overwrite_prog;
+	pthread_barrier_t notify;
+	int loop;
+	bool start;
+	bool stop;
+};
+
+static void start_threads(struct run_ctx *ctx)
+{
+	ctx->start = true;
+}
+
+static void stop_threads(struct run_ctx *ctx)
+{
+	ctx->stop = true;
+	/* Guarantee the order between ->stop and ->start */
+	__atomic_store_n(&ctx->start, true, __ATOMIC_RELEASE);
+}
+
+static int wait_for_start(struct run_ctx *ctx)
+{
+	while (!__atomic_load_n(&ctx->start, __ATOMIC_ACQUIRE))
+		usleep(10);
+
+	return ctx->stop;
+}
+
+static void *overwrite_timer_fn(void *arg)
+{
+	struct run_ctx *ctx = arg;
+	int loop, fd, err;
+	cpu_set_t cpuset;
+	long ret = 0;
+
+	/* Pin on CPU 0 */
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+
+	/* Is the thread being stopped ? */
+	err = wait_for_start(ctx);
+	if (err)
+		return NULL;
+
+	fd = bpf_program__fd(ctx->overwrite_prog);
+	loop = ctx->loop;
+	while (loop-- > 0) {
+		LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+		/* Wait for start thread to complete */
+		pthread_barrier_wait(&ctx->notify);
+
+		/* Overwrite timers */
+		err = bpf_prog_test_run_opts(fd, &opts);
+		if (err)
+			ret |= 1;
+		else if (opts.retval)
+			ret |= 2;
+
+		/* Notify start thread to start timers */
+		pthread_barrier_wait(&ctx->notify);
+	}
+
+	return (void *)ret;
+}
+
+static void *start_timer_fn(void *arg)
+{
+	struct run_ctx *ctx = arg;
+	int loop, fd, err;
+	cpu_set_t cpuset;
+	long ret = 0;
+
+	/* Pin on CPU 1 */
+	CPU_ZERO(&cpuset);
+	CPU_SET(1, &cpuset);
+	pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+
+	/* Is the thread being stopped ? */
+	err = wait_for_start(ctx);
+	if (err)
+		return NULL;
+
+	fd = bpf_program__fd(ctx->start_prog);
+	loop = ctx->loop;
+	while (loop-- > 0) {
+		LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+		/* Run the prog to start timer */
+		err = bpf_prog_test_run_opts(fd, &opts);
+		if (err)
+			ret |= 4;
+		else if (opts.retval)
+			ret |= 8;
+
+		/* Notify overwrite thread to do overwrite */
+		pthread_barrier_wait(&ctx->notify);
+
+		/* Wait for overwrite thread to complete */
+		pthread_barrier_wait(&ctx->notify);
+	}
+
+	return (void *)ret;
+}
+
+void test_free_timer(void)
+{
+	struct free_timer *skel;
+	struct bpf_program *prog;
+	struct run_ctx ctx;
+	pthread_t tid[2];
+	void *ret;
+	int err;
+
+	skel = free_timer__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_load"))
+		return;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	prog = bpf_object__find_program_by_name(skel->obj, "start_timer");
+	if (!ASSERT_OK_PTR(prog, "find start prog"))
+		goto out;
+	ctx.start_prog = prog;
+
+	prog = bpf_object__find_program_by_name(skel->obj, "overwrite_timer");
+	if (!ASSERT_OK_PTR(prog, "find overwrite prog"))
+		goto out;
+	ctx.overwrite_prog = prog;
+
+	pthread_barrier_init(&ctx.notify, NULL, 2);
+	ctx.loop = 10;
+
+	err = pthread_create(&tid[0], NULL, start_timer_fn, &ctx);
+	if (!ASSERT_OK(err, "create start_timer"))
+		goto out;
+
+	err = pthread_create(&tid[1], NULL, overwrite_timer_fn, &ctx);
+	if (!ASSERT_OK(err, "create overwrite_timer")) {
+		stop_threads(&ctx);
+		goto out;
+	}
+
+	start_threads(&ctx);
+
+	ret = NULL;
+	err = pthread_join(tid[0], &ret);
+	ASSERT_EQ(err | (long)ret, 0, "start_timer");
+	ret = NULL;
+	err = pthread_join(tid[1], &ret);
+	ASSERT_EQ(err | (long)ret, 0, "overwrite_timer");
+out:
+	free_timer__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 66ab1cae923e..e19ef509ebf8 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -397,6 +397,31 @@ cleanup:
 	kprobe_multi_session_cookie__destroy(skel);
 }
 
+static void test_unique_match(void)
+{
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+	struct kprobe_multi *skel = NULL;
+	struct bpf_link *link = NULL;
+
+	skel = kprobe_multi__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "kprobe_multi__open_and_load"))
+		return;
+
+	opts.unique_match = true;
+	skel->bss->pid = getpid();
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+						     "bpf_fentry_test*", &opts);
+	if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_multi_opts"))
+		bpf_link__destroy(link);
+
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+						     "bpf_fentry_test8*", &opts);
+	if (ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts"))
+		bpf_link__destroy(link);
+
+	kprobe_multi__destroy(skel);
+}
+
 static size_t symbol_hash(long key, void *ctx __maybe_unused)
 {
 	return str_hash((const char *) key);
@@ -765,5 +790,7 @@ void test_kprobe_multi_test(void)
 		test_session_skel_api();
 	if (test__start_subtest("session_cookie"))
 		test_session_cookie_skel_api();
+	if (test__start_subtest("unique_match"))
+		test_unique_match();
 	RUN_TESTS(kprobe_multi_verifier);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/missed.c b/tools/testing/selftests/bpf/prog_tests/missed.c
index 70d90c43537c..ed8857ae914a 100644
--- a/tools/testing/selftests/bpf/prog_tests/missed.c
+++ b/tools/testing/selftests/bpf/prog_tests/missed.c
@@ -85,6 +85,7 @@ static void test_missed_kprobe_recursion(void)
 	ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses");
 	ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses");
 	ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test5)), 1, "test5_recursion_misses");
+	ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test6)), 1, "test6_recursion_misses");
 
 cleanup:
 	missed_kprobe_recursion__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 884ad87783d5..1e3e4392dcca 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -111,31 +111,35 @@ out:
 
 static void test_sockmap_vsock_delete_on_close(void)
 {
-	int err, c, p, map;
-	const int zero = 0;
-
-	err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
-	if (!ASSERT_OK(err, "create_pair(AF_VSOCK)"))
-		return;
+	int map, c, p, err, zero = 0;
 
 	map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
 			     sizeof(int), 1, NULL);
-	if (!ASSERT_GE(map, 0, "bpf_map_create")) {
-		close(c);
-		goto out;
-	}
+	if (!ASSERT_OK_FD(map, "bpf_map_create"))
+		return;
 
-	err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
-	close(c);
-	if (!ASSERT_OK(err, "bpf_map_update"))
-		goto out;
+	err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+	if (!ASSERT_OK(err, "create_pair"))
+		goto close_map;
+
+	if (xbpf_map_update_elem(map, &zero, &c, BPF_NOEXIST))
+		goto close_socks;
+
+	xclose(c);
+	xclose(p);
+
+	err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+	if (!ASSERT_OK(err, "create_pair"))
+		goto close_map;
 
-	err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+	err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
 	ASSERT_OK(err, "after close(), bpf_map_update");
 
-out:
-	close(p);
-	close(map);
+close_socks:
+	xclose(c);
+	xclose(p);
+close_map:
+	xclose(map);
 }
 
 static void test_skmsg_helpers(enum bpf_map_type map_type)
@@ -522,8 +526,8 @@ static void test_sockmap_skb_verdict_shutdown(void)
 	if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
 		goto out_close;
 
-	n = recv(c1, &b, 1, SOCK_NONBLOCK);
-	ASSERT_EQ(n, 0, "recv_timeout(fin)");
+	n = recv(c1, &b, 1, MSG_DONTWAIT);
+	ASSERT_EQ(n, 0, "recv(fin)");
 out_close:
 	close(c1);
 	close(p1);
@@ -531,57 +535,6 @@ out:
 	test_sockmap_pass_prog__destroy(skel);
 }
 
-static void test_sockmap_stream_pass(void)
-{
-	int zero = 0, sent, recvd;
-	int verdict, parser;
-	int err, map;
-	int c = -1, p = -1;
-	struct test_sockmap_pass_prog *pass = NULL;
-	char snd[256] = "0123456789";
-	char rcv[256] = "0";
-
-	pass = test_sockmap_pass_prog__open_and_load();
-	verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
-	parser = bpf_program__fd(pass->progs.prog_skb_parser);
-	map = bpf_map__fd(pass->maps.sock_map_rx);
-
-	err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
-	if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
-		goto out;
-
-	err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
-	if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
-		goto out;
-
-	err = create_pair(AF_INET, SOCK_STREAM, &c, &p);
-	if (err)
-		goto out;
-
-	/* sk_data_ready of 'p' will be replaced by strparser handler */
-	err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
-	if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
-		goto out_close;
-
-	/*
-	 * as 'prog_skb_parser' return the original skb len and
-	 * 'prog_skb_verdict' return SK_PASS, the kernel will just
-	 * pass it through to original socket 'p'
-	 */
-	sent = xsend(c, snd, sizeof(snd), 0);
-	ASSERT_EQ(sent, sizeof(snd), "xsend(c)");
-
-	recvd = recv_timeout(p, rcv, sizeof(rcv), SOCK_NONBLOCK,
-			     IO_TIMEOUT_SEC);
-	ASSERT_EQ(recvd, sizeof(rcv), "recv_timeout(p)");
-
-out_close:
-	close(c);
-	close(p);
-
-out:
-	test_sockmap_pass_prog__destroy(pass);
-}
 
 static void test_sockmap_skb_verdict_fionread(bool pass_prog)
 {
@@ -628,7 +581,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
 	ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
 	/* On DROP test there will be no data to read */
 	if (pass_prog) {
-		recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
+		recvd = recv_timeout(c1, &buf, sizeof(buf), MSG_DONTWAIT, IO_TIMEOUT_SEC);
 		ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
 	}
 
@@ -1061,6 +1014,34 @@ destroy:
 	test_sockmap_pass_prog__destroy(skel);
 }
 
+static void test_sockmap_vsock_unconnected(void)
+{
+	struct sockaddr_storage addr;
+	int map, s, zero = 0;
+	socklen_t alen;
+
+	map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
+			     sizeof(int), 1, NULL);
+	if (!ASSERT_OK_FD(map, "bpf_map_create"))
+		return;
+
+	s = xsocket(AF_VSOCK, SOCK_STREAM, 0);
+	if (s < 0)
+		goto close_map;
+
+	/* Fail connect(), but trigger transport assignment. */
+	init_addr_loopback(AF_VSOCK, &addr, &alen);
+	if (!ASSERT_ERR(connect(s, sockaddr(&addr), alen), "connect"))
+		goto close_sock;
+
+	ASSERT_ERR(bpf_map_update_elem(map, &zero, &s, BPF_ANY), "map_update");
+
+close_sock:
+	xclose(s);
+close_map:
+	xclose(map);
+}
+
 void test_sockmap_basic(void)
 {
 	if (test__start_subtest("sockmap create_update_free"))
@@ -1101,8 +1082,6 @@ void test_sockmap_basic(void)
 		test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
 	if (test__start_subtest("sockmap skb_verdict shutdown"))
 		test_sockmap_skb_verdict_shutdown();
-	if (test__start_subtest("sockmap stream parser and verdict pass"))
-		test_sockmap_stream_pass();
 	if (test__start_subtest("sockmap skb_verdict fionread"))
 		test_sockmap_skb_verdict_fionread(true);
 	if (test__start_subtest("sockmap skb_verdict fionread on drop"))
@@ -1127,4 +1106,6 @@ void test_sockmap_basic(void)
 		test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH);
 	if (test__start_subtest("sockmap skb_verdict vsock poll"))
 		test_sockmap_skb_verdict_vsock_poll();
+	if (test__start_subtest("sockmap vsock unconnected"))
+		test_sockmap_vsock_unconnected();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
new file mode 100644
index 000000000000..621b3b71888e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
@@ -0,0 +1,454 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <netinet/tcp.h>
+#include <test_progs.h>
+#include "sockmap_helpers.h"
+#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_strp.skel.h"
+
+#define STRP_PKT_HEAD_LEN 4
+#define STRP_PKT_BODY_LEN 6
+#define STRP_PKT_FULL_LEN (STRP_PKT_HEAD_LEN + STRP_PKT_BODY_LEN)
+
+static const char packet[STRP_PKT_FULL_LEN] = "head+body\0";
+static const int test_packet_num = 100;
+
+/* Current implementation of tcp_bpf_recvmsg_parser() invokes data_ready
+ * with sk held if an skb exists in sk_receive_queue. Then for the
+ * data_ready implementation of strparser, it will delay the read
+ * operation if sk is held and EAGAIN is returned.
+ */
+static int sockmap_strp_consume_pre_data(int p)
+{
+	int recvd;
+	bool retried = false;
+	char rcv[10];
+
+retry:
+	errno = 0;
+	recvd = recv_timeout(p, rcv, sizeof(rcv), 0, 1);
+	if (recvd < 0 && errno == EAGAIN && retried == false) {
+		/* On the first call, EAGAIN will certainly be returned.
+		 * A 1-second wait is enough for the workqueue to finish.
+		 */
+		sleep(1);
+		retried = true;
+		goto retry;
+	}
+
+	if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv error or truncated data") ||
+	    !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+				"data mismatch"))
+		return -1;
+	return 0;
+}
+
+static struct test_sockmap_strp *sockmap_strp_init(int *out_map, bool pass,
+						   bool need_parser)
+{
+	struct test_sockmap_strp *strp = NULL;
+	int verdict, parser;
+	int err;
+
+	strp = test_sockmap_strp__open_and_load();
+	*out_map = bpf_map__fd(strp->maps.sock_map);
+
+	if (need_parser)
+		parser = bpf_program__fd(strp->progs.prog_skb_parser_partial);
+	else
+		parser = bpf_program__fd(strp->progs.prog_skb_parser);
+
+	if (pass)
+		verdict = bpf_program__fd(strp->progs.prog_skb_verdict_pass);
+	else
+		verdict = bpf_program__fd(strp->progs.prog_skb_verdict);
+
+	err = bpf_prog_attach(parser, *out_map, BPF_SK_SKB_STREAM_PARSER, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
+		goto err;
+
+	err = bpf_prog_attach(verdict, *out_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
+		goto err;
+
+	return strp;
+err:
+	test_sockmap_strp__destroy(strp);
+	return NULL;
+}
+
+/* Dispatch packets to different socket by packet size:
+ *
+ *                      ------  ------
+ *                     | pkt4 || pkt1 |... > remote socket
+ *  ------ ------     / ------  ------
+ * | pkt8 | pkt7 |...
+ *  ------ ------     \ ------  ------
+ *                     | pkt3 || pkt2 |... > local socket
+ *                      ------  ------
+ */
+static void test_sockmap_strp_dispatch_pkt(int family, int sotype)
+{
+	int i, j, zero = 0, one = 1, recvd;
+	int err, map;
+	int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+	struct test_sockmap_strp *strp = NULL;
+	int test_cnt = 6;
+	char rcv[10];
+	struct {
+		char	data[7];
+		int	data_len;
+		int	send_cnt;
+		int	*receiver;
+	} send_dir[2] = {
+		/* data expected to deliver to local */
+		{"llllll", 6, 0, &p0},
+		/* data expected to deliver to remote */
+		{"rrrrr",  5, 0, &c1}
+	};
+
+	strp = sockmap_strp_init(&map, false, false);
+	if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+		return;
+
+	err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
+	if (!ASSERT_OK(err, "create_socket_pairs()"))
+		goto out;
+
+	err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+		goto out_close;
+
+	err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+		goto out_close;
+
+	err = setsockopt(c1, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero));
+	if (!ASSERT_OK(err, "setsockopt(TCP_NODELAY)"))
+		goto out_close;
+
+	/* deliver data with data size greater than 5 to local */
+	strp->data->verdict_max_size = 5;
+
+	for (i = 0; i < test_cnt; i++) {
+		int d = i % 2;
+
+		xsend(c0, send_dir[d].data, send_dir[d].data_len, 0);
+		send_dir[d].send_cnt++;
+	}
+
+	for (i = 0; i < 2; i++) {
+		for (j = 0; j < send_dir[i].send_cnt; j++) {
+			int expected = send_dir[i].data_len;
+
+			recvd = recv_timeout(*send_dir[i].receiver, rcv,
+					     expected, MSG_DONTWAIT,
+					     IO_TIMEOUT_SEC);
+			if (!ASSERT_EQ(recvd, expected, "recv_timeout()"))
+				goto out_close;
+			if (!ASSERT_OK(memcmp(send_dir[i].data, rcv, recvd),
+				       "data mismatch"))
+				goto out_close;
+		}
+	}
+out_close:
+	close(c0);
+	close(c1);
+	close(p0);
+	close(p1);
+out:
+	test_sockmap_strp__destroy(strp);
+}
+
+/* We have multiple packets in one skb
+ * ------------ ------------ ------------
+ * |  packet1  |   packet2  |  ...
+ * ------------ ------------ ------------
+ */
+static void test_sockmap_strp_multiple_pkt(int family, int sotype)
+{
+	int i, zero = 0;
+	int sent, recvd, total;
+	int err, map;
+	int c = -1, p = -1;
+	struct test_sockmap_strp *strp = NULL;
+	char *snd = NULL, *rcv = NULL;
+
+	strp = sockmap_strp_init(&map, true, true);
+	if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+		return;
+
+	err = create_pair(family, sotype, &c, &p);
+	if (err)
+		goto out;
+
+	err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
+		goto out_close;
+
+	/* construct multiple packets in one buffer */
+	total = test_packet_num * STRP_PKT_FULL_LEN;
+	snd = malloc(total);
+	rcv = malloc(total + 1);
+	if (!ASSERT_TRUE(snd, "malloc(snd)") ||
+	    !ASSERT_TRUE(rcv, "malloc(rcv)"))
+		goto out_close;
+
+	for (i = 0; i < test_packet_num; i++) {
+		memcpy(snd + i * STRP_PKT_FULL_LEN,
+		       packet, STRP_PKT_FULL_LEN);
+	}
+
+	sent = xsend(c, snd, total, 0);
+	if (!ASSERT_EQ(sent, total, "xsend(c)"))
+		goto out_close;
+
+	/* try to recv one more byte to avoid truncation check */
+	recvd = recv_timeout(p, rcv, total + 1, MSG_DONTWAIT, IO_TIMEOUT_SEC);
+	if (!ASSERT_EQ(recvd, total, "recv(rcv)"))
+		goto out_close;
+
+	/* we sent TCP segment with multiple encapsulation
+	 * then check whether packets are handled correctly
+	 */
+	if (!ASSERT_OK(memcmp(snd, rcv, total), "data mismatch"))
+		goto out_close;
+
+out_close:
+	close(c);
+	close(p);
+	if (snd)
+		free(snd);
+	if (rcv)
+		free(rcv);
+out:
+	test_sockmap_strp__destroy(strp);
+}
+
+/* Test strparser with partial read */
+static void test_sockmap_strp_partial_read(int family, int sotype)
+{
+	int zero = 0, recvd, off;
+	int err, map;
+	int c = -1, p = -1;
+	struct test_sockmap_strp *strp = NULL;
+	char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+	strp = sockmap_strp_init(&map, true, true);
+	if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+		return;
+
+	err = create_pair(family, sotype, &c, &p);
+	if (err)
+		goto out;
+
+	/* sk_data_ready of 'p' will be replaced by strparser handler */
+	err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
+		goto out_close;
+
+	/* 1.1 send partial head, 1 byte header left */
+	off = STRP_PKT_HEAD_LEN - 1;
+	xsend(c, packet, off, 0);
+	recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+	if (!ASSERT_EQ(-1, recvd, "partial head sent, expected no data"))
+		goto out_close;
+
+	/* 1.2 send remaining head and body */
+	xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
+	recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
+	if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
+		goto out_close;
+
+	/* 2.1 send partial head, 1 byte header left */
+	off = STRP_PKT_HEAD_LEN - 1;
+	xsend(c, packet, off, 0);
+
+	/* 2.2 send remaining head and partial body, 1 byte body left */
+	xsend(c, packet + off, STRP_PKT_FULL_LEN - off - 1, 0);
+	off = STRP_PKT_FULL_LEN - 1;
+	recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+	if (!ASSERT_EQ(-1, recvd, "partial body sent, expected no data"))
+		goto out_close;
+
+	/* 2.3 send remaining body */
+	xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
+	recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
+	if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
+		goto out_close;
+
+out_close:
+	close(c);
+	close(p);
+
+out:
+	test_sockmap_strp__destroy(strp);
+}
+
+/* Test simple socket read/write with strparser + FIONREAD */
+static void test_sockmap_strp_pass(int family, int sotype, bool fionread)
+{
+	int zero = 0, pkt_size = STRP_PKT_FULL_LEN, sent, recvd, avail;
+	int err, map;
+	int c = -1, p = -1;
+	int test_cnt = 10, i;
+	struct test_sockmap_strp *strp = NULL;
+	char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+	strp = sockmap_strp_init(&map, true, true);
+	if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+		return;
+
+	err = create_pair(family, sotype, &c, &p);
+	if (err)
+		goto out;
+
+	/* inject some data before bpf process, it should be read
+	 * correctly because we check sk_receive_queue in
+	 * tcp_bpf_recvmsg_parser().
+	 */
+	sent = xsend(c, packet, pkt_size, 0);
+	if (!ASSERT_EQ(sent, pkt_size, "xsend(pre-data)"))
+		goto out_close;
+
+	/* sk_data_ready of 'p' will be replaced by strparser handler */
+	err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
+		goto out_close;
+
+	/* consume previous data we injected */
+	if (sockmap_strp_consume_pre_data(p))
+		goto out_close;
+
+	/* Previously, we encountered issues such as deadlocks and
+	 * sequence errors that resulted in the inability to read
+	 * continuously. Therefore, we perform multiple iterations
+	 * of testing here.
+	 */
+	for (i = 0; i < test_cnt; i++) {
+		sent = xsend(c, packet, pkt_size, 0);
+		if (!ASSERT_EQ(sent, pkt_size, "xsend(c)"))
+			goto out_close;
+
+		recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
+				     IO_TIMEOUT_SEC);
+		if (!ASSERT_EQ(recvd, pkt_size, "recv_timeout(p)") ||
+		    !ASSERT_OK(memcmp(packet, rcv, pkt_size),
+				  "memcmp, data mismatch"))
+			goto out_close;
+	}
+
+	if (fionread) {
+		sent = xsend(c, packet, pkt_size, 0);
+		if (!ASSERT_EQ(sent, pkt_size, "second xsend(c)"))
+			goto out_close;
+
+		err = ioctl(p, FIONREAD, &avail);
+		if (!ASSERT_OK(err, "ioctl(FIONREAD) error") ||
+		    !ASSERT_EQ(avail, pkt_size, "ioctl(FIONREAD)"))
+			goto out_close;
+
+		recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
+				     IO_TIMEOUT_SEC);
+		if (!ASSERT_EQ(recvd, pkt_size, "second recv_timeout(p)") ||
+		    !ASSERT_OK(memcmp(packet, rcv, pkt_size),
+			      "second memcmp, data mismatch"))
+			goto out_close;
+	}
+
+out_close:
+	close(c);
+	close(p);
+
+out:
+	test_sockmap_strp__destroy(strp);
+}
+
+/* Test strparser with verdict mode */
+static void test_sockmap_strp_verdict(int family, int sotype)
+{
+	int zero = 0, one = 1, sent, recvd, off;
+	int err, map;
+	int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+	struct test_sockmap_strp *strp = NULL;
+	char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+	strp = sockmap_strp_init(&map, false, true);
+	if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+		return;
+
+	/* We simulate a reverse proxy server.
+	 * When p0 receives data from c0, we forward it to c1.
+	 * From c1's perspective, it will consider this data
+	 * as being sent by p1.
+	 */
+	err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
+	if (!ASSERT_OK(err, "create_socket_pairs()"))
+		goto out;
+
+	err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+		goto out_close;
+
+	err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+		goto out_close;
+
+	sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
+	if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "xsend(c0)"))
+		goto out_close;
+
+	recvd = recv_timeout(c1, rcv, sizeof(rcv), MSG_DONTWAIT,
+			     IO_TIMEOUT_SEC);
+	if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv_timeout(c1)") ||
+	    !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+			  "received data does not match the sent data"))
+		goto out_close;
+
+	/* send again to ensure the stream is functioning correctly. */
+	sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
+	if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "second xsend(c0)"))
+		goto out_close;
+
+	/* partial read */
+	off = STRP_PKT_FULL_LEN / 2;
+	recvd = recv_timeout(c1, rcv, off, MSG_DONTWAIT,
+			     IO_TIMEOUT_SEC);
+	recvd += recv_timeout(c1, rcv + off, sizeof(rcv) - off, MSG_DONTWAIT,
+			      IO_TIMEOUT_SEC);
+
+	if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "partial recv_timeout(c1)") ||
+	    !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+			  "partial received data does not match the sent data"))
+		goto out_close;
+
+out_close:
+	close(c0);
+	close(c1);
+	close(p0);
+	close(p1);
+out:
+	test_sockmap_strp__destroy(strp);
+}
+
+void test_sockmap_strp(void)
+{
+	if (test__start_subtest("sockmap strp tcp pass"))
+		test_sockmap_strp_pass(AF_INET, SOCK_STREAM, false);
+	if (test__start_subtest("sockmap strp tcp v6 pass"))
+		test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, false);
+	if (test__start_subtest("sockmap strp tcp pass fionread"))
+		test_sockmap_strp_pass(AF_INET, SOCK_STREAM, true);
+	if (test__start_subtest("sockmap strp tcp v6 pass fionread"))
+		test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, true);
+	if (test__start_subtest("sockmap strp tcp verdict"))
+		test_sockmap_strp_verdict(AF_INET, SOCK_STREAM);
+	if (test__start_subtest("sockmap strp tcp v6 verdict"))
+		test_sockmap_strp_verdict(AF_INET6, SOCK_STREAM);
+	if (test__start_subtest("sockmap strp tcp partial read"))
+		test_sockmap_strp_partial_read(AF_INET, SOCK_STREAM);
+	if (test__start_subtest("sockmap strp tcp multiple packets"))
+		test_sockmap_strp_multiple_pkt(AF_INET, SOCK_STREAM);
+	if (test__start_subtest("sockmap strp tcp dispatch"))
+		test_sockmap_strp_dispatch_pkt(AF_INET, SOCK_STREAM);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index 05d0e07da394..ba6b3ec1156a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -2,7 +2,7 @@
 #include <test_progs.h>
 #include "cgroup_helpers.h"
 
-#include <linux/tcp.h>
+#include <netinet/tcp.h>
 #include <linux/netlink.h>
 #include "sockopt_sk.skel.h"
 
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
index 151a4210028f..2461d183dee5 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
@@ -14,10 +14,16 @@
 #include "netlink_helpers.h"
 #include "tc_helpers.h"
 
+#define NETKIT_HEADROOM	32
+#define NETKIT_TAILROOM	8
+
 #define MARK		42
 #define PRIO		0xeb9f
 #define ICMP_ECHO	8
 
+#define FLAG_ADJUST_ROOM (1 << 0)
+#define FLAG_SAME_NETNS  (1 << 1)
+
 struct icmphdr {
 	__u8		type;
 	__u8		code;
@@ -35,7 +41,7 @@ struct iplink_req {
 };
 
 static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
-			 bool same_netns, int scrub, int peer_scrub)
+			 int scrub, int peer_scrub, __u32 flags)
 {
 	struct rtnl_handle rth = { .fd = -1 };
 	struct iplink_req req = {};
@@ -63,6 +69,10 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
 	addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub);
 	addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub);
 	addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+	if (flags & FLAG_ADJUST_ROOM) {
+		addattr16(&req.n, sizeof(req), IFLA_NETKIT_HEADROOM, NETKIT_HEADROOM);
+		addattr16(&req.n, sizeof(req), IFLA_NETKIT_TAILROOM, NETKIT_TAILROOM);
+	}
 	addattr_nest_end(&req.n, data);
 	addattr_nest_end(&req.n, linkinfo);
 
@@ -87,7 +97,7 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
 				 " addr ee:ff:bb:cc:aa:dd"),
 				 "set hwaddress");
 	}
-	if (same_netns) {
+	if (flags & FLAG_SAME_NETNS) {
 		ASSERT_OK(system("ip link set dev " netkit_peer " up"),
 				 "up peer");
 		ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"),
@@ -184,8 +194,8 @@ void serial_test_tc_netkit_basic(void)
 	int err, ifindex;
 
 	err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, false, NETKIT_SCRUB_DEFAULT,
-			    NETKIT_SCRUB_DEFAULT);
+			    &ifindex, NETKIT_SCRUB_DEFAULT,
+			    NETKIT_SCRUB_DEFAULT, 0);
 	if (err)
 		return;
 
@@ -299,8 +309,8 @@ static void serial_test_tc_netkit_multi_links_target(int mode, int target)
 	int err, ifindex;
 
 	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, false, NETKIT_SCRUB_DEFAULT,
-			    NETKIT_SCRUB_DEFAULT);
+			    &ifindex, NETKIT_SCRUB_DEFAULT,
+			    NETKIT_SCRUB_DEFAULT, 0);
 	if (err)
 		return;
 
@@ -428,8 +438,8 @@ static void serial_test_tc_netkit_multi_opts_target(int mode, int target)
 	int err, ifindex;
 
 	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, false, NETKIT_SCRUB_DEFAULT,
-			    NETKIT_SCRUB_DEFAULT);
+			    &ifindex, NETKIT_SCRUB_DEFAULT,
+			    NETKIT_SCRUB_DEFAULT, 0);
 	if (err)
 		return;
 
@@ -543,8 +553,8 @@ void serial_test_tc_netkit_device(void)
 	int err, ifindex, ifindex2;
 
 	err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, true, NETKIT_SCRUB_DEFAULT,
-			    NETKIT_SCRUB_DEFAULT);
+			    &ifindex, NETKIT_SCRUB_DEFAULT,
+			    NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS);
 	if (err)
 		return;
 
@@ -655,8 +665,8 @@ static void serial_test_tc_netkit_neigh_links_target(int mode, int target)
 	int err, ifindex;
 
 	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, false, NETKIT_SCRUB_DEFAULT,
-			    NETKIT_SCRUB_DEFAULT);
+			    &ifindex, NETKIT_SCRUB_DEFAULT,
+			    NETKIT_SCRUB_DEFAULT, 0);
 	if (err)
 		return;
 
@@ -733,8 +743,8 @@ static void serial_test_tc_netkit_pkt_type_mode(int mode)
 	struct bpf_link *link;
 
 	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, true, NETKIT_SCRUB_DEFAULT,
-			    NETKIT_SCRUB_DEFAULT);
+			    &ifindex, NETKIT_SCRUB_DEFAULT,
+			    NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS);
 	if (err)
 		return;
 
@@ -799,7 +809,7 @@ void serial_test_tc_netkit_pkt_type(void)
 	serial_test_tc_netkit_pkt_type_mode(NETKIT_L3);
 }
 
-static void serial_test_tc_netkit_scrub_type(int scrub)
+static void serial_test_tc_netkit_scrub_type(int scrub, bool room)
 {
 	LIBBPF_OPTS(bpf_netkit_opts, optl);
 	struct test_tc_link *skel;
@@ -807,7 +817,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub)
 	int err, ifindex;
 
 	err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, false, scrub, scrub);
+			    &ifindex, scrub, scrub,
+			    room ? FLAG_ADJUST_ROOM : 0);
 	if (err)
 		return;
 
@@ -842,6 +853,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub)
 	ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8");
 	ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark");
 	ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio");
+	ASSERT_EQ(skel->bss->headroom, room ? NETKIT_HEADROOM : 0, "headroom");
+	ASSERT_EQ(skel->bss->tailroom, room ? NETKIT_TAILROOM : 0, "tailroom");
 cleanup:
 	test_tc_link__destroy(skel);
 
@@ -852,6 +865,6 @@ cleanup:
 
 void serial_test_tc_netkit_scrub(void)
 {
-	serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT);
-	serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE);
+	serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT, false);
+	serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE, true);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 3ee40ee9413a..8a0e1ff8a2dc 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -52,6 +52,8 @@
 #include "verifier_map_ptr_mixing.skel.h"
 #include "verifier_map_ret_val.skel.h"
 #include "verifier_masking.skel.h"
+#include "verifier_may_goto_1.skel.h"
+#include "verifier_may_goto_2.skel.h"
 #include "verifier_meta_access.skel.h"
 #include "verifier_movsx.skel.h"
 #include "verifier_mtu.skel.h"
@@ -98,6 +100,7 @@
 #include "verifier_xdp_direct_packet_access.skel.h"
 #include "verifier_bits_iter.skel.h"
 #include "verifier_lsm.skel.h"
+#include "irq.skel.h"
 
 #define MAX_ENTRIES 11
 
@@ -181,6 +184,8 @@ void test_verifier_map_ptr(void)              { RUN(verifier_map_ptr); }
 void test_verifier_map_ptr_mixing(void)       { RUN(verifier_map_ptr_mixing); }
 void test_verifier_map_ret_val(void)          { RUN(verifier_map_ret_val); }
 void test_verifier_masking(void)              { RUN(verifier_masking); }
+void test_verifier_may_goto_1(void)           { RUN(verifier_may_goto_1); }
+void test_verifier_may_goto_2(void)           { RUN(verifier_may_goto_2); }
 void test_verifier_meta_access(void)          { RUN(verifier_meta_access); }
 void test_verifier_movsx(void)                 { RUN(verifier_movsx); }
 void test_verifier_netfilter_ctx(void)        { RUN(verifier_netfilter_ctx); }
@@ -225,6 +230,7 @@ void test_verifier_xdp(void)                  { RUN(verifier_xdp); }
 void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); }
 void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); }
 void test_verifier_lsm(void)                  { RUN(verifier_lsm); }
+void test_irq(void)			      { RUN(irq); }
 void test_verifier_mtu(void)		      { RUN(verifier_mtu); }
 
 static int init_test_val_map(struct bpf_object *obj, char *map_name)
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 53d6ad8c2257..b2b2d85dbb1b 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -82,6 +82,8 @@ static void test_xdp_adjust_tail_grow2(void)
 	/* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */
 #if defined(__s390x__)
 	int tailroom = 512;
+#elif defined(__powerpc__)
+	int tailroom = 384;
 #else
 	int tailroom = 320;
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
index 6d8b54124cb3..fb952703653e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -17,7 +17,7 @@
 #include "network_helpers.h"
 #include <linux/if_bonding.h>
 #include <linux/limits.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 #include <uapi/linux/netdev.h>
 
 #include "xdp_dummy.skel.h"
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index e6a783c7f5db..937da9b7532a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -2,6 +2,14 @@
 #include <test_progs.h>
 #include <network_helpers.h>
 #include "test_xdp_context_test_run.skel.h"
+#include "test_xdp_meta.skel.h"
+
+#define TX_ADDR "10.0.0.1"
+#define RX_ADDR "10.0.0.2"
+#define RX_NAME "veth0"
+#define TX_NAME "veth1"
+#define TX_NETNS "xdp_context_tx"
+#define RX_NETNS "xdp_context_rx"
 
 void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
 			    __u32 data_meta, __u32 data, __u32 data_end,
@@ -103,3 +111,82 @@ void test_xdp_context_test_run(void)
 
 	test_xdp_context_test_run__destroy(skel);
 }
+
+void test_xdp_context_functional(void)
+{
+	LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+	LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+	struct netns_obj *rx_ns = NULL, *tx_ns = NULL;
+	struct bpf_program *tc_prog, *xdp_prog;
+	struct test_xdp_meta *skel = NULL;
+	struct nstoken *nstoken = NULL;
+	int rx_ifindex;
+	int ret;
+
+	tx_ns = netns_new(TX_NETNS, false);
+	if (!ASSERT_OK_PTR(tx_ns, "create tx_ns"))
+		return;
+
+	rx_ns = netns_new(RX_NETNS, false);
+	if (!ASSERT_OK_PTR(rx_ns, "create rx_ns"))
+		goto close;
+
+	SYS(close, "ip link add " RX_NAME " netns " RX_NETNS
+	    " type veth peer name " TX_NAME " netns " TX_NETNS);
+
+	nstoken = open_netns(RX_NETNS);
+	if (!ASSERT_OK_PTR(nstoken, "setns rx_ns"))
+		goto close;
+
+	SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME);
+	SYS(close, "ip link set dev " RX_NAME " up");
+
+	skel = test_xdp_meta__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+		goto close;
+
+	rx_ifindex = if_nametoindex(RX_NAME);
+	if (!ASSERT_GE(rx_ifindex, 0, "if_nametoindex rx"))
+		goto close;
+
+	tc_hook.ifindex = rx_ifindex;
+	ret = bpf_tc_hook_create(&tc_hook);
+	if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+		goto close;
+
+	tc_prog = bpf_object__find_program_by_name(skel->obj, "ing_cls");
+	if (!ASSERT_OK_PTR(tc_prog, "open ing_cls prog"))
+		goto close;
+
+	tc_opts.prog_fd = bpf_program__fd(tc_prog);
+	ret = bpf_tc_attach(&tc_hook, &tc_opts);
+	if (!ASSERT_OK(ret, "bpf_tc_attach"))
+		goto close;
+
+	xdp_prog = bpf_object__find_program_by_name(skel->obj, "ing_xdp");
+	if (!ASSERT_OK_PTR(xdp_prog, "open ing_xdp prog"))
+		goto close;
+
+	ret = bpf_xdp_attach(rx_ifindex,
+			     bpf_program__fd(xdp_prog),
+			     0, NULL);
+	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+		goto close;
+
+	close_netns(nstoken);
+
+	nstoken = open_netns(TX_NETNS);
+	if (!ASSERT_OK_PTR(nstoken, "setns tx_ns"))
+		goto close;
+
+	SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME);
+	SYS(close, "ip link set dev " TX_NAME " up");
+	ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping");
+
+close:
+	close_netns(nstoken);
+	test_xdp_meta__destroy(skel);
+	netns_free(rx_ns);
+	netns_free(tx_ns);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index c7f74f068e78..df27535995af 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -52,10 +52,10 @@ static void test_xdp_with_cpumap_helpers(void)
 	ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
 
 	/* send a packet to trigger any potential bugs in there */
-	char data[10] = {};
+	char data[ETH_HLEN] = {};
 	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
 			    .data_in = &data,
-			    .data_size_in = 10,
+			    .data_size_in = sizeof(data),
 			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
 			    .repeat = 1,
 		);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 27ffed17d4be..461ab18705d5 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -23,7 +23,7 @@ static void test_xdp_with_devmap_helpers(void)
 	__u32 len = sizeof(info);
 	int err, dm_fd, dm_fd_redir, map_fd;
 	struct nstoken *nstoken = NULL;
-	char data[10] = {};
+	char data[ETH_HLEN] = {};
 	__u32 idx = 0;
 
 	SYS(out_close, "ip netns add %s", TEST_NS);
@@ -58,7 +58,7 @@ static void test_xdp_with_devmap_helpers(void)
 	/* send a packet to trigger any potential bugs in there */
 	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
 			    .data_in = &data,
-			    .data_size_in = 10,
+			    .data_size_in = sizeof(data),
 			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
 			    .repeat = 1,
 		);
@@ -158,7 +158,7 @@ static void test_xdp_with_devmap_helpers_veth(void)
 	struct nstoken *nstoken = NULL;
 	__u32 len = sizeof(info);
 	int err, dm_fd, dm_fd_redir, map_fd, ifindex_dst;
-	char data[10] = {};
+	char data[ETH_HLEN] = {};
 	__u32 idx = 0;
 
 	SYS(out_close, "ip netns add %s", TEST_NS);
@@ -208,7 +208,7 @@ static void test_xdp_with_devmap_helpers_veth(void)
 	/* send a packet to trigger any potential bugs in there */
 	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
 			    .data_in = &data,
-			    .data_size_in = 10,
+			    .data_size_in = sizeof(data),
 			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
 			    .repeat = 1,
 		);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index bad0ea167be7..7dac044664ac 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -7,10 +7,11 @@
 #include <linux/if_link.h>
 #include <linux/ipv6.h>
 #include <linux/in6.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 #include <bpf/bpf_endian.h>
 #include <uapi/linux/netdev.h>
 #include "test_xdp_do_redirect.skel.h"
+#include "xdp_dummy.skel.h"
 
 struct udp_packet {
 	struct ethhdr eth;
@@ -246,3 +247,166 @@ out:
 	SYS_NOFAIL("ip netns del testns");
 	test_xdp_do_redirect__destroy(skel);
 }
+
+#define NS_NB		3
+#define NS0		"NS0"
+#define NS1		"NS1"
+#define NS2		"NS2"
+#define IPV4_NETWORK	"10.1.1"
+#define VETH1_INDEX	111
+#define VETH2_INDEX	222
+
+struct test_data {
+	struct netns_obj *ns[NS_NB];
+	u32 xdp_flags;
+};
+
+static void cleanup(struct test_data *data)
+{
+	int i;
+
+	for (i = 0; i < NS_NB; i++)
+		netns_free(data->ns[i]);
+}
+
+/**
+ * ping_setup -
+ * Create two veth peers and forward packets in-between using XDP
+ *
+ *    ------------           ------------
+ *    |    NS1   |           |    NS2   |
+ *    |   veth0  |           |   veth0  |
+ *    | 10.1.1.1 |           | 10.1.1.2 |
+ *    -----|------           ------|-----
+ *         |                       |
+ *         |                       |
+ *    -----|-----------------------|-------
+ *    |  veth1                   veth2    |
+ *    | (id:111)                (id:222)  |
+ *    |    |                        |     |
+ *    |    ----- xdp forwarding -----     |
+ *    |                                   |
+ *    |               NS0                 |
+ *    -------------------------------------
+ */
+static int ping_setup(struct test_data *data)
+{
+	int i;
+
+	data->ns[0] = netns_new(NS0, false);
+	if (!ASSERT_OK_PTR(data->ns[0], "create ns"))
+		return -1;
+
+	for (i = 1; i < NS_NB; i++) {
+		char ns_name[4] = {};
+
+		snprintf(ns_name, 4, "NS%d", i);
+		data->ns[i] = netns_new(ns_name, false);
+		if (!ASSERT_OK_PTR(data->ns[i], "create ns"))
+			goto fail;
+
+		SYS(fail,
+		    "ip -n %s link add veth%d index %d%d%d type veth peer name veth0 netns %s",
+		    NS0, i, i, i, i, ns_name);
+		SYS(fail, "ip -n %s link set veth%d up", NS0, i);
+
+		SYS(fail, "ip -n %s addr add %s.%d/24 dev veth0", ns_name, IPV4_NETWORK, i);
+		SYS(fail, "ip -n %s link set veth0 up", ns_name);
+	}
+
+	return 0;
+
+fail:
+	cleanup(data);
+	return -1;
+}
+
+static void ping_test(struct test_data *data)
+{
+	struct test_xdp_do_redirect *skel = NULL;
+	struct xdp_dummy *skel_dummy = NULL;
+	struct nstoken *nstoken = NULL;
+	int i, ret;
+
+	skel_dummy = xdp_dummy__open_and_load();
+	if (!ASSERT_OK_PTR(skel_dummy, "open and load xdp_dummy skeleton"))
+		goto close;
+
+	for (i = 1; i < NS_NB; i++) {
+		char ns_name[4] = {};
+
+		snprintf(ns_name, 4, "NS%d", i);
+		nstoken = open_netns(ns_name);
+		if (!ASSERT_OK_PTR(nstoken, "open ns"))
+			goto close;
+
+		ret = bpf_xdp_attach(if_nametoindex("veth0"),
+				     bpf_program__fd(skel_dummy->progs.xdp_dummy_prog),
+				     data->xdp_flags, NULL);
+		if (!ASSERT_GE(ret, 0, "bpf_xdp_attach dummy_prog"))
+			goto close;
+
+		close_netns(nstoken);
+		nstoken = NULL;
+	}
+
+	skel = test_xdp_do_redirect__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+		goto close;
+
+	nstoken = open_netns(NS0);
+	if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+		goto close;
+
+	ret = bpf_xdp_attach(VETH2_INDEX,
+			     bpf_program__fd(skel->progs.xdp_redirect_to_111),
+			     data->xdp_flags, NULL);
+	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+		goto close;
+
+	ret = bpf_xdp_attach(VETH1_INDEX,
+			     bpf_program__fd(skel->progs.xdp_redirect_to_222),
+			     data->xdp_flags, NULL);
+	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+		goto close;
+
+	close_netns(nstoken);
+	nstoken = NULL;
+
+	nstoken = open_netns(NS1);
+	if (!ASSERT_OK_PTR(nstoken, "open NS1"))
+		goto close;
+
+	SYS(close, "ping -c 1 %s.2 > /dev/null", IPV4_NETWORK);
+
+close:
+	close_netns(nstoken);
+	xdp_dummy__destroy(skel_dummy);
+	test_xdp_do_redirect__destroy(skel);
+}
+
+
+static void xdp_redirect_ping(u32 xdp_flags)
+{
+	struct test_data data = {};
+
+	if (ping_setup(&data) < 0)
+		return;
+
+	data.xdp_flags = xdp_flags;
+	ping_test(&data);
+	cleanup(&data);
+}
+
+void test_xdp_index_redirect(void)
+{
+	if (test__start_subtest("noflag"))
+		xdp_redirect_ping(0);
+
+	if (test__start_subtest("drvflag"))
+		xdp_redirect_ping(XDP_FLAGS_DRV_MODE);
+
+	if (test__start_subtest("skbflag"))
+		xdp_redirect_ping(XDP_FLAGS_SKB_MODE);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
index e1bf141d3401..3f9146d83d79 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
@@ -3,7 +3,7 @@
 #include <network_helpers.h>
 #include <bpf/btf.h>
 #include <linux/if_link.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 #include <net/if.h>
 #include <unistd.h>
 
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index c87ee2bf558c..3d47878ef6bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -10,7 +10,7 @@
 #include <linux/errqueue.h>
 #include <linux/if_link.h>
 #include <linux/net_tstamp.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 #include <sys/mman.h>
 #include <net/if.h>
 #include <poll.h>
@@ -133,23 +133,6 @@ static void close_xsk(struct xsk *xsk)
 	munmap(xsk->umem_area, UMEM_SIZE);
 }
 
-static void ip_csum(struct iphdr *iph)
-{
-	__u32 sum = 0;
-	__u16 *p;
-	int i;
-
-	iph->check = 0;
-	p = (void *)iph;
-	for (i = 0; i < sizeof(*iph) / sizeof(*p); i++)
-		sum += p[i];
-
-	while (sum >> 16)
-		sum = (sum & 0xffff) + (sum >> 16);
-
-	iph->check = ~sum;
-}
-
 static int generate_packet(struct xsk *xsk, __u16 dst_port)
 {
 	struct xsk_tx_metadata *meta;
@@ -192,7 +175,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
 	iph->protocol = IPPROTO_UDP;
 	ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)");
 	ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
-	ip_csum(iph);
+	iph->check = build_ip_csum(iph);
 
 	udph->source = htons(UDP_SOURCE_PORT);
 	udph->dest = htons(dst_port);
diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops.c b/tools/testing/selftests/bpf/progs/bad_struct_ops.c
index b7e175cd0af0..b3f77b4561c8 100644
--- a/tools/testing/selftests/bpf/progs/bad_struct_ops.c
+++ b/tools/testing/selftests/bpf/progs/bad_struct_ops.c
@@ -3,7 +3,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c
index 56c764df8196..5d6fc7f01ebb 100644
--- a/tools/testing/selftests/bpf/progs/cb_refs.c
+++ b/tools/testing/selftests/bpf/progs/cb_refs.c
@@ -2,7 +2,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 struct map_value {
 	struct prog_test_ref_kfunc __kptr *ptr;
diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c b/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c
new file mode 100644
index 000000000000..e32b07d802bb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 data_end;
+
+SEC("cgroup_skb/ingress")
+int direct_packet_access(struct __sk_buff *skb)
+{
+	data_end = skb->data_end;
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index dfd817d0348c..bd8f15229f5c 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -192,7 +192,7 @@ done:
 
 /* Can't add a dynptr to a map */
 SEC("?raw_tp")
-__failure __msg("invalid indirect read from stack")
+__failure __msg("invalid read from stack")
 int add_dynptr_to_map1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -210,7 +210,7 @@ int add_dynptr_to_map1(void *ctx)
 
 /* Can't add a struct with an embedded dynptr to a map */
 SEC("?raw_tp")
-__failure __msg("invalid indirect read from stack")
+__failure __msg("invalid read from stack")
 int add_dynptr_to_map2(void *ctx)
 {
 	struct test_info x;
@@ -398,7 +398,7 @@ int data_slice_missing_null_check2(void *ctx)
  * dynptr argument
  */
 SEC("?raw_tp")
-__failure __msg("invalid indirect read from stack")
+__failure __msg("invalid read from stack")
 int invalid_helper1(void *ctx)
 {
 	struct bpf_dynptr ptr;
diff --git a/tools/testing/selftests/bpf/progs/epilogue_exit.c b/tools/testing/selftests/bpf/progs/epilogue_exit.c
index 33d3a57bee90..35fec7c75bef 100644
--- a/tools/testing/selftests/bpf/progs/epilogue_exit.c
+++ b/tools/testing/selftests/bpf/progs/epilogue_exit.c
@@ -4,8 +4,8 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/epilogue_tailcall.c b/tools/testing/selftests/bpf/progs/epilogue_tailcall.c
index 7275dd594de0..153514691ba4 100644
--- a/tools/testing/selftests/bpf/progs/epilogue_tailcall.c
+++ b/tools/testing/selftests/bpf/progs/epilogue_tailcall.c
@@ -4,8 +4,8 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c
index fe0f3fa5aab6..8a0fdff89927 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_fail.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -131,7 +131,7 @@ int reject_subprog_with_lock(void *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region")
 int reject_with_rcu_read_lock(void *ctx)
 {
 	bpf_rcu_read_lock();
@@ -147,7 +147,7 @@ __noinline static int throwing_subprog(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region")
 int reject_subprog_with_rcu_read_lock(void *ctx)
 {
 	bpf_rcu_read_lock();
diff --git a/tools/testing/selftests/bpf/progs/find_vma.c b/tools/testing/selftests/bpf/progs/find_vma.c
index 38034fb82530..02b82774469c 100644
--- a/tools/testing/selftests/bpf/progs/find_vma.c
+++ b/tools/testing/selftests/bpf/progs/find_vma.c
@@ -25,7 +25,7 @@ static long check_vma(struct task_struct *task, struct vm_area_struct *vma,
 {
 	if (vma->vm_file)
 		bpf_probe_read_kernel_str(d_iname, DNAME_INLINE_LEN - 1,
-					  vma->vm_file->f_path.dentry->d_iname);
+					  vma->vm_file->f_path.dentry->d_shortname.string);
 
 	/* check for VM_EXEC */
 	if (vma->vm_flags & VM_EXEC)
diff --git a/tools/testing/selftests/bpf/progs/free_timer.c b/tools/testing/selftests/bpf/progs/free_timer.c
new file mode 100644
index 000000000000..4501ae8fc414
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/free_timer.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_ENTRIES 8
+
+struct map_value {
+	struct bpf_timer timer;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, MAX_ENTRIES);
+} map SEC(".maps");
+
+static int timer_cb(void *map, void *key, struct map_value *value)
+{
+	volatile int sum = 0;
+	int i;
+
+	bpf_for(i, 0, 1024 * 1024) sum += i;
+
+	return 0;
+}
+
+static int start_cb(int key)
+{
+	struct map_value *value;
+
+	value = bpf_map_lookup_elem(&map, (void *)&key);
+	if (!value)
+		return 0;
+
+	bpf_timer_init(&value->timer, &map, CLOCK_MONOTONIC);
+	bpf_timer_set_callback(&value->timer, timer_cb);
+	/* Hope 100us will be enough to wake-up and run the overwrite thread */
+	bpf_timer_start(&value->timer, 100000, BPF_F_TIMER_CPU_PIN);
+
+	return 0;
+}
+
+static int overwrite_cb(int key)
+{
+	struct map_value zero = {};
+
+	/* Free the timer which may run on other CPU */
+	bpf_map_update_elem(&map, (void *)&key, &zero, BPF_ANY);
+
+	return 0;
+}
+
+SEC("syscall")
+int BPF_PROG(start_timer)
+{
+	bpf_loop(MAX_ENTRIES, start_cb, NULL, 0);
+	return 0;
+}
+
+SEC("syscall")
+int BPF_PROG(overwrite_timer)
+{
+	bpf_loop(MAX_ENTRIES, overwrite_cb, NULL, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c
new file mode 100644
index 000000000000..b0b53d980964
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/irq.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+unsigned long global_flags;
+
+extern void bpf_local_irq_save(unsigned long *) __weak __ksym;
+extern void bpf_local_irq_restore(unsigned long *) __weak __ksym;
+extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym;
+
+SEC("?tc")
+__failure __msg("arg#0 doesn't point to an irq flag on stack")
+int irq_save_bad_arg(struct __sk_buff *ctx)
+{
+	bpf_local_irq_save(&global_flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("arg#0 doesn't point to an irq flag on stack")
+int irq_restore_bad_arg(struct __sk_buff *ctx)
+{
+	bpf_local_irq_restore(&global_flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_2(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_save(&flags2);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_3(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_save(&flags2);
+	bpf_local_irq_save(&flags3);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_3_minus_2(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_save(&flags2);
+	bpf_local_irq_save(&flags3);
+	bpf_local_irq_restore(&flags3);
+	bpf_local_irq_restore(&flags2);
+	return 0;
+}
+
+static __noinline void local_irq_save(unsigned long *flags)
+{
+	bpf_local_irq_save(flags);
+}
+
+static __noinline void local_irq_restore(unsigned long *flags)
+{
+	bpf_local_irq_restore(flags);
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_1_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags;
+
+	local_irq_save(&flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_2_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+
+	local_irq_save(&flags1);
+	local_irq_save(&flags2);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_3_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	local_irq_save(&flags1);
+	local_irq_save(&flags2);
+	local_irq_save(&flags3);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_3_minus_2_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	local_irq_save(&flags1);
+	local_irq_save(&flags2);
+	local_irq_save(&flags3);
+	local_irq_restore(&flags3);
+	local_irq_restore(&flags2);
+	return 0;
+}
+
+SEC("?tc")
+__success
+int irq_balance(struct __sk_buff *ctx)
+{
+	unsigned long flags;
+
+	local_irq_save(&flags);
+	local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?tc")
+__success
+int irq_balance_n(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	local_irq_save(&flags1);
+	local_irq_save(&flags2);
+	local_irq_save(&flags3);
+	local_irq_restore(&flags3);
+	local_irq_restore(&flags2);
+	local_irq_restore(&flags1);
+	return 0;
+}
+
+static __noinline void local_irq_balance(void)
+{
+	unsigned long flags;
+
+	local_irq_save(&flags);
+	local_irq_restore(&flags);
+}
+
+static __noinline void local_irq_balance_n(void)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	local_irq_save(&flags1);
+	local_irq_save(&flags2);
+	local_irq_save(&flags3);
+	local_irq_restore(&flags3);
+	local_irq_restore(&flags2);
+	local_irq_restore(&flags1);
+}
+
+SEC("?tc")
+__success
+int irq_balance_subprog(struct __sk_buff *ctx)
+{
+	local_irq_balance();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("sleepable helper bpf_copy_from_user#")
+int irq_sleepable_helper(void *ctx)
+{
+	unsigned long flags;
+	u32 data;
+
+	local_irq_save(&flags);
+	bpf_copy_from_user(&data, sizeof(data), NULL);
+	local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_copy_from_user_str is sleepable within IRQ-disabled region")
+int irq_sleepable_kfunc(void *ctx)
+{
+	unsigned long flags;
+	u32 data;
+
+	local_irq_save(&flags);
+	bpf_copy_from_user_str(&data, sizeof(data), NULL, 0);
+	local_irq_restore(&flags);
+	return 0;
+}
+
+int __noinline global_local_irq_balance(void)
+{
+	local_irq_balance_n();
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("global function calls are not allowed with IRQs disabled")
+int irq_global_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags;
+
+	bpf_local_irq_save(&flags);
+	global_local_irq_balance();
+	bpf_local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_restore_ooo(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_save(&flags2);
+	bpf_local_irq_restore(&flags1);
+	bpf_local_irq_restore(&flags2);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_restore_ooo_3(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_save(&flags2);
+	bpf_local_irq_restore(&flags2);
+	bpf_local_irq_save(&flags3);
+	bpf_local_irq_restore(&flags1);
+	bpf_local_irq_restore(&flags3);
+	return 0;
+}
+
+static __noinline void local_irq_save_3(unsigned long *flags1, unsigned long *flags2,
+					unsigned long *flags3)
+{
+	local_irq_save(flags1);
+	local_irq_save(flags2);
+	local_irq_save(flags3);
+}
+
+SEC("?tc")
+__success
+int irq_restore_3_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	local_irq_save_3(&flags1, &flags2, &flags3);
+	bpf_local_irq_restore(&flags3);
+	bpf_local_irq_restore(&flags2);
+	bpf_local_irq_restore(&flags1);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_restore_4_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+	unsigned long flags4;
+
+	local_irq_save_3(&flags1, &flags2, &flags3);
+	bpf_local_irq_restore(&flags3);
+	bpf_local_irq_save(&flags4);
+	bpf_local_irq_restore(&flags4);
+	bpf_local_irq_restore(&flags1);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_restore_ooo_3_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	local_irq_save_3(&flags1, &flags2, &flags3);
+	bpf_local_irq_restore(&flags3);
+	bpf_local_irq_restore(&flags2);
+	bpf_local_irq_save(&flags3);
+	bpf_local_irq_restore(&flags1);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected an initialized")
+int irq_restore_invalid(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags = 0xfaceb00c;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected uninitialized")
+int irq_save_invalid(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_save(&flags1);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected an initialized")
+int irq_restore_iter(struct __sk_buff *ctx)
+{
+	struct bpf_iter_num it;
+
+	bpf_iter_num_new(&it, 0, 42);
+	bpf_local_irq_restore((unsigned long *)&it);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=1")
+int irq_save_iter(struct __sk_buff *ctx)
+{
+	struct bpf_iter_num it;
+
+	/* Ensure same sized slot has st->ref_obj_id set, so we reject based on
+	 * slot_type != STACK_IRQ_FLAG...
+	 */
+	_Static_assert(sizeof(it) == sizeof(unsigned long), "broken iterator size");
+
+	bpf_iter_num_new(&it, 0, 42);
+	bpf_local_irq_save((unsigned long *)&it);
+	bpf_local_irq_restore((unsigned long *)&it);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected an initialized")
+int irq_flag_overwrite(struct __sk_buff *ctx)
+{
+	unsigned long flags;
+
+	bpf_local_irq_save(&flags);
+	flags = 0xdeadbeef;
+	bpf_local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected an initialized")
+int irq_flag_overwrite_partial(struct __sk_buff *ctx)
+{
+	unsigned long flags;
+
+	bpf_local_irq_save(&flags);
+	*(((char *)&flags) + 1) = 0xff;
+	bpf_local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_ooo_refs_array(struct __sk_buff *ctx)
+{
+	unsigned long flags[4];
+	struct { int i; } *p;
+
+	/* refs=1 */
+	bpf_local_irq_save(&flags[0]);
+
+	/* refs=1,2 */
+	p = bpf_obj_new(typeof(*p));
+	if (!p) {
+		bpf_local_irq_restore(&flags[0]);
+		return 0;
+	}
+
+	/* refs=1,2,3 */
+	bpf_local_irq_save(&flags[1]);
+
+	/* refs=1,2,3,4 */
+	bpf_local_irq_save(&flags[2]);
+
+	/* Now when we remove ref=2, the verifier must not break the ordering in
+	 * the refs array between 1,3,4. With an older implementation, the
+	 * verifier would swap the last element with the removed element, but to
+	 * maintain the stack property we need to use memmove.
+	 */
+	bpf_obj_drop(p);
+
+	/* Save and restore to reset active_irq_id to 3, as the ordering is now
+	 * refs=1,4,3. When restoring the linear scan will find prev_id in order
+	 * as 3 instead of 4.
+	 */
+	bpf_local_irq_save(&flags[3]);
+	bpf_local_irq_restore(&flags[3]);
+
+	/* With the incorrect implementation, we can release flags[1], flags[2],
+	 * and flags[0], i.e. in the wrong order.
+	 */
+	bpf_local_irq_restore(&flags[1]);
+	bpf_local_irq_restore(&flags[2]);
+	bpf_local_irq_restore(&flags[0]);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 7c969c127573..190822b2f08b 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -524,11 +524,11 @@ int iter_subprog_iters(const void *ctx)
 }
 
 struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(type, BPF_MAP_TYPE_HASH);
 	__type(key, int);
 	__type(value, int);
 	__uint(max_entries, 1000);
-} arr_map SEC(".maps");
+} hash_map SEC(".maps");
 
 SEC("?raw_tp")
 __failure __msg("invalid mem access 'scalar'")
@@ -539,7 +539,7 @@ int iter_err_too_permissive1(const void *ctx)
 
 	MY_PID_GUARD();
 
-	map_val = bpf_map_lookup_elem(&arr_map, &key);
+	map_val = bpf_map_lookup_elem(&hash_map, &key);
 	if (!map_val)
 		return 0;
 
@@ -561,12 +561,12 @@ int iter_err_too_permissive2(const void *ctx)
 
 	MY_PID_GUARD();
 
-	map_val = bpf_map_lookup_elem(&arr_map, &key);
+	map_val = bpf_map_lookup_elem(&hash_map, &key);
 	if (!map_val)
 		return 0;
 
 	bpf_repeat(1000000) {
-		map_val = bpf_map_lookup_elem(&arr_map, &key);
+		map_val = bpf_map_lookup_elem(&hash_map, &key);
 	}
 
 	*map_val = 123;
@@ -585,7 +585,7 @@ int iter_err_too_permissive3(const void *ctx)
 	MY_PID_GUARD();
 
 	bpf_repeat(1000000) {
-		map_val = bpf_map_lookup_elem(&arr_map, &key);
+		map_val = bpf_map_lookup_elem(&hash_map, &key);
 		found = true;
 	}
 
@@ -606,7 +606,7 @@ int iter_tricky_but_fine(const void *ctx)
 	MY_PID_GUARD();
 
 	bpf_repeat(1000000) {
-		map_val = bpf_map_lookup_elem(&arr_map, &key);
+		map_val = bpf_map_lookup_elem(&hash_map, &key);
 		if (map_val) {
 			found = true;
 			break;
diff --git a/tools/testing/selftests/bpf/progs/iters_testmod.c b/tools/testing/selftests/bpf/progs/iters_testmod.c
index df1d3db60b1b..9e4b45201e69 100644
--- a/tools/testing/selftests/bpf/progs/iters_testmod.c
+++ b/tools/testing/selftests/bpf/progs/iters_testmod.c
@@ -4,7 +4,7 @@
 #include "bpf_experimental.h"
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
index f9789e668297..82190d79de37 100644
--- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c
+++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
@@ -3,7 +3,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 static struct prog_test_ref_kfunc __kptr *v;
 long total_sum = -1;
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
index 7632d9ecb253..b9670e9a6e3d 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 SEC("tc")
 int kfunc_destructive_test(void)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
index 08fae306539c..a1963497f0bf 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2021 Facebook */
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 struct syscall_test_args {
 	__u8 data[16];
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
index d532af07decf..48f64827cd93 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_race.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 SEC("tc")
 int kfunc_call_fail(struct __sk_buff *ctx)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
index f502f755f567..8b86113a0126 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2021 Facebook */
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 SEC("tc")
 int kfunc_call_test4(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
index 2380c75e74ce..8e150e85b50d 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2021 Facebook */
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 extern const int bpf_prog_active __ksym;
 int active_res = -1;
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
index b092a72b2c9d..d736506a4c80 100644
--- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -6,7 +6,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_core_read.h>
 #include "../bpf_experimental.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 struct plain_local;
 
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index ab0ce1d01a4a..edaba481db9d 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -2,7 +2,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 struct map_value {
 	struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
index 450bb373b179..4c0ff01f1a96 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -4,7 +4,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_core_read.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 struct map_value {
 	char buf[8];
@@ -345,7 +345,7 @@ int reject_indirect_global_func_access(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("Unreleased reference id=5 alloc_insn=")
+__failure __msg("Unreleased reference id=4 alloc_insn=")
 int kptr_xchg_ref_state(struct __sk_buff *ctx)
 {
 	struct prog_test_ref_kfunc *p;
diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe.c b/tools/testing/selftests/bpf/progs/missed_kprobe.c
index 7f9ef701f5de..51a4fe64c917 100644
--- a/tools/testing/selftests/bpf/progs/missed_kprobe.c
+++ b/tools/testing/selftests/bpf/progs/missed_kprobe.c
@@ -2,7 +2,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
index 8ea71cbd6c45..29c18d869ec1 100644
--- a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
+++ b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
@@ -2,7 +2,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -46,3 +46,9 @@ int test5(struct pt_regs *ctx)
 {
 	return 0;
 }
+
+SEC("kprobe.session/bpf_kfunc_common_test")
+int test6(struct pt_regs *ctx)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/nested_acquire.c b/tools/testing/selftests/bpf/progs/nested_acquire.c
index 8e521a21d995..49ad7b9adf56 100644
--- a/tools/testing/selftests/bpf/progs/nested_acquire.c
+++ b/tools/testing/selftests/bpf/progs/nested_acquire.c
@@ -4,7 +4,7 @@
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c
index 885377e83607..6c5797bf0ead 100644
--- a/tools/testing/selftests/bpf/progs/preempt_lock.c
+++ b/tools/testing/selftests/bpf/progs/preempt_lock.c
@@ -5,8 +5,10 @@
 #include "bpf_misc.h"
 #include "bpf_experimental.h"
 
+extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym;
+
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_1(struct __sk_buff *ctx)
 {
 	bpf_preempt_disable();
@@ -14,7 +16,7 @@ int preempt_lock_missing_1(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_2(struct __sk_buff *ctx)
 {
 	bpf_preempt_disable();
@@ -23,7 +25,7 @@ int preempt_lock_missing_2(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_3(struct __sk_buff *ctx)
 {
 	bpf_preempt_disable();
@@ -33,7 +35,7 @@ int preempt_lock_missing_3(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_3_minus_2(struct __sk_buff *ctx)
 {
 	bpf_preempt_disable();
@@ -55,7 +57,7 @@ static __noinline void preempt_enable(void)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_1_subprog(struct __sk_buff *ctx)
 {
 	preempt_disable();
@@ -63,7 +65,7 @@ int preempt_lock_missing_1_subprog(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_2_subprog(struct __sk_buff *ctx)
 {
 	preempt_disable();
@@ -72,7 +74,7 @@ int preempt_lock_missing_2_subprog(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_2_minus_1_subprog(struct __sk_buff *ctx)
 {
 	preempt_disable();
@@ -113,6 +115,18 @@ int preempt_sleepable_helper(void *ctx)
 	return 0;
 }
 
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_copy_from_user_str is sleepable within non-preemptible region")
+int preempt_sleepable_kfunc(void *ctx)
+{
+	u32 data;
+
+	bpf_preempt_disable();
+	bpf_copy_from_user_str(&data, sizeof(data), NULL, 0);
+	bpf_preempt_enable();
+	return 0;
+}
+
 int __noinline preempt_global_subprog(void)
 {
 	preempt_balance_subprog();
diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue.c b/tools/testing/selftests/bpf/progs/pro_epilogue.c
index 44bc3f06b4b6..d97d6e07ef5c 100644
--- a/tools/testing/selftests/bpf/progs/pro_epilogue.c
+++ b/tools/testing/selftests/bpf/progs/pro_epilogue.c
@@ -4,8 +4,8 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c b/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c
index 3529e53be355..6048d79be48b 100644
--- a/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c
+++ b/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c
@@ -4,8 +4,8 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/sock_addr_kern.c b/tools/testing/selftests/bpf/progs/sock_addr_kern.c
index 8386bb15ccdc..84ad515eafd6 100644
--- a/tools/testing/selftests/bpf/progs/sock_addr_kern.c
+++ b/tools/testing/selftests/bpf/progs/sock_addr_kern.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2024 Google LLC */
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 SEC("syscall")
 int init_sock(struct init_sock_args *args)
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
index d7fdcabe7d90..284a5b008e0c 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_detach.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c
index 3c822103bd40..d8cc99f5c2e2 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
index b450f72e744a..ccab3935aa42 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
index 6283099ec383..8b5515f4f724 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c
index 4c56d4a9d9f4..71c420c3a5a6 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_module.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c
@@ -3,7 +3,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
index 9efcc6e4d356..5b23ea817f1f 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
@@ -3,7 +3,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c
index fa2021388485..5d0937fa07be 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
index 8ea57e5348ab..0e4d2ff63ab8 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
@@ -3,7 +3,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
index 1f55ec4cee37..58d5d8dc2235 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
@@ -3,7 +3,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
index f2f300d50988..31e58389bb8b 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
@@ -3,7 +3,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c
index 0f4dfb770c32..b698cc62a371 100644
--- a/tools/testing/selftests/bpf/progs/syscall.c
+++ b/tools/testing/selftests/bpf/progs/syscall.c
@@ -76,9 +76,9 @@ static int btf_load(void)
 			.magic = BTF_MAGIC,
 			.version = BTF_VERSION,
 			.hdr_len = sizeof(struct btf_header),
-			.type_len = sizeof(__u32) * 8,
-			.str_off = sizeof(__u32) * 8,
-			.str_len = sizeof(__u32),
+			.type_len = sizeof(raw_btf.types),
+			.str_off = offsetof(struct btf_blob, str) - offsetof(struct btf_blob, types),
+			.str_len = sizeof(raw_btf.str),
 		},
 		.types = {
 			/* long */
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index 683c8aaa63da..f344c6835e84 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -15,7 +15,7 @@
 #include <linux/ipv6.h>
 #include <linux/pkt_cls.h>
 #include <linux/tcp.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
index 233b089d1fba..eb55cb8a3dbd 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.h
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
@@ -10,7 +10,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 
 /* offsetof() is used in static asserts, and the libbpf-redefined CO-RE
  * friendly version breaks compilation for older clang versions <= 15
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
index 464515b824b9..d0f7670351e5 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
@@ -15,7 +15,7 @@
 #include <linux/ipv6.h>
 #include <linux/pkt_cls.h>
 #include <linux/tcp.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
index 6afa834756e9..fac33a14f200 100644
--- a/tools/testing/selftests/bpf/progs/test_fill_link_info.c
+++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
@@ -6,13 +6,20 @@
 #include <stdbool.h>
 
 extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak;
+extern bool CONFIG_PPC_FTRACE_OUT_OF_LINE __kconfig __weak;
+extern bool CONFIG_KPROBES_ON_FTRACE __kconfig __weak;
+extern bool CONFIG_PPC64 __kconfig __weak;
 
-/* This function is here to have CONFIG_X86_KERNEL_IBT
- * used and added to object BTF.
+/* This function is here to have CONFIG_X86_KERNEL_IBT,
+ * CONFIG_PPC_FTRACE_OUT_OF_LINE, CONFIG_KPROBES_ON_FTRACE,
+ * CONFIG_PPC6 used and added to object BTF.
  */
 int unused(void)
 {
-	return CONFIG_X86_KERNEL_IBT ? 0 : 1;
+	return CONFIG_X86_KERNEL_IBT ||
+			CONFIG_PPC_FTRACE_OUT_OF_LINE ||
+			CONFIG_KPROBES_ON_FTRACE ||
+			CONFIG_PPC64 ? 0 : 1;
 }
 
 SEC("kprobe")
diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
index 5da001ca57a5..09d027bd3ea8 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func10.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
@@ -26,7 +26,7 @@ __noinline int foo(const struct Big *big)
 }
 
 SEC("cgroup_skb/ingress")
-__failure __msg("invalid indirect access to stack")
+__failure __msg("invalid read from stack")
 int global_func10(struct __sk_buff *skb)
 {
 	const struct Small small = {.x = skb->len };
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
index 7ac7e1de34d8..0ad1bf1ede8d 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
@@ -4,7 +4,7 @@
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
 #include "bpf_kfuncs.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 SEC("tc")
 int kfunc_dynptr_nullable_test1(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index cc1a012d038f..fb07f5773888 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -5,7 +5,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_core_read.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 __u32 raw_tp_read_sz = 0;
 
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_strp.c b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
new file mode 100644
index 000000000000..dde3d5bec515
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+int verdict_max_size = 10000;
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 20);
+	__type(key, int);
+	__type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+	__u32 one = 1;
+
+	if (skb->len > verdict_max_size)
+		return SK_PASS;
+
+	return bpf_sk_redirect_map(skb, &sock_map, one, 0);
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict_pass(struct __sk_buff *skb)
+{
+	return SK_PASS;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+	return skb->len;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser_partial(struct __sk_buff *skb)
+{
+	/* agreement with the test program on a 4-byte size header
+	 * and 6-byte body.
+	 */
+	if (skb->len < 4) {
+		/* need more header to determine full length */
+		return 0;
+	}
+	/* return full length decoded from header.
+	 * the return value may be larger than skb->len which
+	 * means framework must wait body coming.
+	 */
+	return 10;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c
index 10d825928499..630f12e51b07 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_link.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_link.c
@@ -8,6 +8,7 @@
 #include <linux/if_packet.h>
 #include <bpf/bpf_endian.h>
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char LICENSE[] SEC("license") = "GPL";
 
@@ -27,6 +28,7 @@ bool seen_host;
 bool seen_mcast;
 
 int mark, prio;
+unsigned short headroom, tailroom;
 
 SEC("tc/ingress")
 int tc1(struct __sk_buff *skb)
@@ -104,11 +106,24 @@ out:
 	return TCX_PASS;
 }
 
+struct sk_buff {
+	struct net_device *dev;
+};
+
+struct net_device {
+	unsigned short needed_headroom;
+	unsigned short needed_tailroom;
+};
+
 SEC("tc/egress")
 int tc8(struct __sk_buff *skb)
 {
+	struct net_device *dev = BPF_CORE_READ((struct sk_buff *)skb, dev);
+
 	seen_tc8 = true;
 	mark = skb->mark;
 	prio = skb->priority;
+	headroom = BPF_CORE_READ(dev, needed_headroom);
+	tailroom = BPF_CORE_READ(dev, needed_tailroom);
 	return TCX_PASS;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
index bba3e37f749b..39ff06f2c834 100644
--- a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
+++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
@@ -3,7 +3,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 #include "bpf_misc.h"
 
 SEC("tp_btf/bpf_testmod_test_nullable_bare")
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index 81bb38d72ced..dc74d8cf9e3f 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -10,6 +10,8 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
 	/* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */
 #if defined(__TARGET_ARCH_s390)
 	int tailroom = 512;
+#elif defined(__TARGET_ARCH_powerpc)
+	int tailroom = 384;
 #else
 	int tailroom = 320;
 #endif
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
index 3abf068b8446..5928ed0911ca 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
@@ -98,6 +98,18 @@ int xdp_count_pkts(struct xdp_md *xdp)
 	return XDP_DROP;
 }
 
+SEC("xdp")
+int xdp_redirect_to_111(struct xdp_md *xdp)
+{
+	return bpf_redirect(111, 0);
+}
+
+SEC("xdp")
+int xdp_redirect_to_222(struct xdp_md *xdp)
+{
+	return bpf_redirect(222, 0);
+}
+
 SEC("tc")
 int tc_count_pkts(struct __sk_buff *skb)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index a7c4a7d49fe6..fe2d71ae0e71 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -8,7 +8,7 @@
 #define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
 #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
 
-SEC("t")
+SEC("tc")
 int ing_cls(struct __sk_buff *ctx)
 {
 	__u8 *data, *data_meta, *data_end;
@@ -28,7 +28,7 @@ int ing_cls(struct __sk_buff *ctx)
 	return diff ? TC_ACT_SHOT : TC_ACT_OK;
 }
 
-SEC("x")
+SEC("xdp")
 int ing_xdp(struct xdp_md *ctx)
 {
 	__u8 *data, *data_meta, *data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
deleted file mode 100644
index b778cad45485..000000000000
--- a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* Copyright (c) 2017 VMware
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-SEC("redirect_to_111")
-int xdp_redirect_to_111(struct xdp_md *xdp)
-{
-	return bpf_redirect(111, 0);
-}
-SEC("redirect_to_222")
-int xdp_redirect_to_222(struct xdp_md *xdp)
-{
-	return bpf_redirect(222, 0);
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/uninit_stack.c b/tools/testing/selftests/bpf/progs/uninit_stack.c
index 8a403470e557..046a204c8fc6 100644
--- a/tools/testing/selftests/bpf/progs/uninit_stack.c
+++ b/tools/testing/selftests/bpf/progs/uninit_stack.c
@@ -70,7 +70,8 @@ __naked int helper_uninit_to_misc(void *ctx)
 		r1 = r10;				\
 		r1 += -128;				\
 		r2 = 32;				\
-		call %[bpf_trace_printk];		\
+		r3 = 0;					\
+		call %[bpf_probe_read_user];		\
 		/* Call to dummy() forces print_verifier_state(..., true),	\
 		 * thus showing the stack state, matched by __msg().		\
 		 */					\
@@ -79,7 +80,7 @@ __naked int helper_uninit_to_misc(void *ctx)
 		exit;					\
 "
 		      :
-		      : __imm(bpf_trace_printk),
+		      : __imm(bpf_probe_read_user),
 			__imm(dummy)
 		      : __clobber_all);
 }
diff --git a/tools/testing/selftests/bpf/progs/unsupported_ops.c b/tools/testing/selftests/bpf/progs/unsupported_ops.c
index 9180365a3568..8aa2e0dd624e 100644
--- a/tools/testing/selftests/bpf/progs/unsupported_ops.c
+++ b/tools/testing/selftests/bpf/progs/unsupported_ops.c
@@ -4,7 +4,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c
index 4195aa824ba5..0a187ff725cc 100644
--- a/tools/testing/selftests/bpf/progs/verifier_array_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c
@@ -29,6 +29,20 @@ struct {
 } map_array_wo SEC(".maps");
 
 struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, struct test_val);
+} map_array_pcpu SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, struct test_val);
+} map_array SEC(".maps");
+
+struct {
 	__uint(type, BPF_MAP_TYPE_HASH);
 	__uint(max_entries, 1);
 	__type(key, long long);
@@ -525,4 +539,193 @@ l0_%=:	exit;						\
 	: __clobber_all);
 }
 
+SEC("socket")
+__description("valid map access into an array using constant without nullness")
+__success __retval(4) __log_level(2)
+__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -8) = {{(1|r[0-9])}}")
+unsigned int an_array_with_a_constant_no_nullness(void)
+{
+	/* Need 8-byte alignment for spill tracking */
+	__u32 __attribute__((aligned(8))) key = 1;
+	struct test_val *val;
+
+	val = bpf_map_lookup_elem(&map_array, &key);
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("valid multiple map access into an array using constant without nullness")
+__success __retval(8) __log_level(2)
+__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -16) = {{(0|r[0-9])}}")
+__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -8) = {{(1|r[0-9])}}")
+unsigned int multiple_array_with_a_constant_no_nullness(void)
+{
+	__u32 __attribute__((aligned(8))) key = 1;
+	__u32 __attribute__((aligned(8))) key2 = 0;
+	struct test_val *val, *val2;
+
+	val = bpf_map_lookup_elem(&map_array, &key);
+	val->index = offsetof(struct test_val, foo);
+
+	val2 = bpf_map_lookup_elem(&map_array, &key2);
+	val2->index = offsetof(struct test_val, foo);
+
+	return val->index + val2->index;
+}
+
+SEC("socket")
+__description("valid map access into an array using natural aligned 32-bit constant 0 without nullness")
+__success __retval(4)
+unsigned int an_array_with_a_32bit_constant_0_no_nullness(void)
+{
+	/* Unlike the above tests, 32-bit zeroing is precisely tracked even
+	 * if writes are not aligned to BPF_REG_SIZE. This tests that our
+	 * STACK_ZERO handling functions.
+	 */
+	struct test_val *val;
+	__u32 key = 0;
+
+	val = bpf_map_lookup_elem(&map_array, &key);
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("valid map access into a pcpu array using constant without nullness")
+__success __retval(4) __log_level(2)
+__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -8) = {{(1|r[0-9])}}")
+unsigned int a_pcpu_array_with_a_constant_no_nullness(void)
+{
+	__u32 __attribute__((aligned(8))) key = 1;
+	struct test_val *val;
+
+	val = bpf_map_lookup_elem(&map_array_pcpu, &key);
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("invalid map access into an array using constant without nullness")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int an_array_with_a_constant_no_nullness_out_of_bounds(void)
+{
+	/* Out of bounds */
+	__u32 __attribute__((aligned(8))) key = 3;
+	struct test_val *val;
+
+	val = bpf_map_lookup_elem(&map_array, &key);
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("invalid map access into an array using constant smaller than key_size")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int an_array_with_a_constant_too_small(void)
+{
+	__u32 __attribute__((aligned(8))) key;
+	struct test_val *val;
+
+	/* Mark entire key as STACK_MISC */
+	bpf_probe_read_user(&key, sizeof(key), NULL);
+
+	/* Spilling only the bottom byte results in a tnum const of 1.
+	 * We want to check that the verifier rejects it, as the spill is < 4B.
+	 */
+	*(__u8 *)&key = 1;
+	val = bpf_map_lookup_elem(&map_array, &key);
+
+	/* Should fail, as verifier cannot prove in-bound lookup */
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("invalid map access into an array using constant larger than key_size")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int an_array_with_a_constant_too_big(void)
+{
+	struct test_val *val;
+	__u64 key = 1;
+
+	/* Even if the constant value is < max_entries, if the spill size is
+	 * larger than the key size, the set bits may not be where we expect them
+	 * to be on different endian architectures.
+	 */
+	val = bpf_map_lookup_elem(&map_array, &key);
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("invalid elided lookup using const and non-const key")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int mixed_const_and_non_const_key_lookup(void)
+{
+	__u32 __attribute__((aligned(8))) key;
+	struct test_val *val;
+	__u32 rand;
+
+	rand = bpf_get_prandom_u32();
+	key = rand > 42 ? 1 : rand;
+	val = bpf_map_lookup_elem(&map_array, &key);
+
+	return val->index;
+}
+
+SEC("socket")
+__failure __msg("invalid read from stack R2 off=4096 size=4")
+__naked void key_lookup_at_invalid_fp(void)
+{
+	asm volatile ("					\
+	r1 = %[map_array] ll;				\
+	r2 = r10;					\
+	r2 += 4096;					\
+	call %[bpf_map_lookup_elem];			\
+	r0 = *(u64*)(r0 + 0);				\
+	exit;						\
+"	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(map_array)
+	: __clobber_all);
+}
+
+volatile __u32 __attribute__((aligned(8))) global_key;
+
+SEC("socket")
+__description("invalid elided lookup using non-stack key")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int non_stack_key_lookup(void)
+{
+	struct test_val *val;
+
+	global_key = 1;
+	val = bpf_map_lookup_elem(&map_array, (void *)&global_key);
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("doesn't reject UINT64_MAX as s64 for irrelevant maps")
+__success __retval(42)
+unsigned int doesnt_reject_irrelevant_maps(void)
+{
+	__u64 key = 0xFFFFFFFFFFFFFFFF;
+	struct test_val *val;
+
+	val = bpf_map_lookup_elem(&map_hash_48b, &key);
+	if (val)
+		return val->index;
+
+	return 42;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
index 8d77cc5323d3..fb62e09f2114 100644
--- a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
@@ -28,7 +28,7 @@ __naked void stack_out_of_bounds(void)
 SEC("socket")
 __description("uninitialized stack1")
 __success __log_level(4) __msg("stack depth 8")
-__failure_unpriv __msg_unpriv("invalid indirect read from stack")
+__failure_unpriv __msg_unpriv("invalid read from stack")
 __naked void uninitialized_stack1(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index a0bb7fb40ea5..0eb33bb801b5 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -1200,4 +1200,138 @@ l0_%=:	r0 = 0;						\
 	: __clobber_all);
 }
 
+SEC("tc")
+__description("multiply mixed sign bounds. test 1")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))")
+__naked void mult_mixed0_sign(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"r6 = r0;"
+	"call %[bpf_get_prandom_u32];"
+	"r7 = r0;"
+	"r6 &= 0xf;"
+	"r6 -= 1000000000;"
+	"r7 &= 0xf;"
+	"r7 -= 2000000000;"
+	"r6 *= r7;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
+
+SEC("tc")
+__description("multiply mixed sign bounds. test 2")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=smin32=-100,smax=smax32=200)")
+__naked void mult_mixed1_sign(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"r6 = r0;"
+	"call %[bpf_get_prandom_u32];"
+	"r7 = r0;"
+	"r6 &= 0xf;"
+	"r6 -= 0xa;"
+	"r7 &= 0xf;"
+	"r7 -= 0x14;"
+	"r6 *= r7;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
+
+SEC("tc")
+__description("multiply negative bounds")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6_w=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))")
+__naked void mult_sign_bounds(void)
+{
+	asm volatile (
+	"r8 = 0x7fff;"
+	"call %[bpf_get_prandom_u32];"
+	"r6 = r0;"
+	"call %[bpf_get_prandom_u32];"
+	"r7 = r0;"
+	"r6 &= 0xa;"
+	"r6 -= r8;"
+	"r7 &= 0xf;"
+	"r7 -= r8;"
+	"r6 *= r7;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
+
+SEC("tc")
+__description("multiply bounds that don't cross signed boundary")
+__success __log_level(2)
+__msg("r8 *= r6 {{.*}}; R6_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8_w=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))")
+__naked void mult_no_sign_crossing(void)
+{
+	asm volatile (
+	"r6 = 0xb;"
+	"r8 = 0xb3c3f8c99262687 ll;"
+	"call %[bpf_get_prandom_u32];"
+	"r7 = r0;"
+	"r6 &= r7;"
+	"r8 *= r6;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
+
+SEC("tc")
+__description("multiplication overflow, result in unbounded reg. test 1")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6_w=scalar()")
+__naked void mult_unsign_ovf(void)
+{
+	asm volatile (
+	"r8 = 0x7ffffffffff ll;"
+	"call %[bpf_get_prandom_u32];"
+	"r6 = r0;"
+	"call %[bpf_get_prandom_u32];"
+	"r7 = r0;"
+	"r6 &= 0x7fffffff;"
+	"r7 &= r8;"
+	"r6 *= r7;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
+
+SEC("tc")
+__description("multiplication overflow, result in unbounded reg. test 2")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6_w=scalar()")
+__naked void mult_sign_ovf(void)
+{
+	asm volatile (
+	"r8 = 0x7ffffffff ll;"
+	"call %[bpf_get_prandom_u32];"
+	"r6 = r0;"
+	"call %[bpf_get_prandom_u32];"
+	"r7 = r0;"
+	"r6 &= 0xa;"
+	"r6 -= r8;"
+	"r7 &= 0x7fffffff;"
+	"r6 *= r7;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_const_or.c b/tools/testing/selftests/bpf/progs/verifier_const_or.c
index ba8922b2eebd..68c568c3c3a0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_const_or.c
+++ b/tools/testing/selftests/bpf/progs/verifier_const_or.c
@@ -25,7 +25,7 @@ __naked void constant_should_keep_constant_type(void)
 
 SEC("tracepoint")
 __description("constant register |= constant should not bypass stack boundary checks")
-__failure __msg("invalid indirect access to stack R1 off=-48 size=58")
+__failure __msg("invalid write to stack R1 off=-48 size=58")
 __naked void not_bypass_stack_boundary_checks_1(void)
 {
 	asm volatile ("					\
@@ -62,7 +62,7 @@ __naked void register_should_keep_constant_type(void)
 
 SEC("tracepoint")
 __description("constant register |= constant register should not bypass stack boundary checks")
-__failure __msg("invalid indirect access to stack R1 off=-48 size=58")
+__failure __msg("invalid write to stack R1 off=-48 size=58")
 __naked void not_bypass_stack_boundary_checks_2(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c
index 50c6b22606f6..f2c54e4d89eb 100644
--- a/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c
@@ -67,7 +67,7 @@ SEC("socket")
 __description("helper access to variable memory: stack, bitwise AND, zero included")
 /* in privileged mode reads from uninitialized stack locations are permitted */
 __success __failure_unpriv
-__msg_unpriv("invalid indirect read from stack R2 off -64+0 size 64")
+__msg_unpriv("invalid read from stack R2 off -64+0 size 64")
 __retval(0)
 __naked void stack_bitwise_and_zero_included(void)
 {
@@ -100,7 +100,7 @@ __naked void stack_bitwise_and_zero_included(void)
 
 SEC("tracepoint")
 __description("helper access to variable memory: stack, bitwise AND + JMP, wrong max")
-__failure __msg("invalid indirect access to stack R1 off=-64 size=65")
+__failure __msg("invalid write to stack R1 off=-64 size=65")
 __naked void bitwise_and_jmp_wrong_max(void)
 {
 	asm volatile ("					\
@@ -187,7 +187,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("tracepoint")
 __description("helper access to variable memory: stack, JMP, bounds + offset")
-__failure __msg("invalid indirect access to stack R1 off=-64 size=65")
+__failure __msg("invalid write to stack R1 off=-64 size=65")
 __naked void memory_stack_jmp_bounds_offset(void)
 {
 	asm volatile ("					\
@@ -211,7 +211,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("tracepoint")
 __description("helper access to variable memory: stack, JMP, wrong max")
-__failure __msg("invalid indirect access to stack R1 off=-64 size=65")
+__failure __msg("invalid write to stack R1 off=-64 size=65")
 __naked void memory_stack_jmp_wrong_max(void)
 {
 	asm volatile ("					\
@@ -260,7 +260,7 @@ SEC("socket")
 __description("helper access to variable memory: stack, JMP, no min check")
 /* in privileged mode reads from uninitialized stack locations are permitted */
 __success __failure_unpriv
-__msg_unpriv("invalid indirect read from stack R2 off -64+0 size 64")
+__msg_unpriv("invalid read from stack R2 off -64+0 size 64")
 __retval(0)
 __naked void stack_jmp_no_min_check(void)
 {
@@ -750,7 +750,7 @@ SEC("socket")
 __description("helper access to variable memory: 8 bytes leak")
 /* in privileged mode reads from uninitialized stack locations are permitted */
 __success __failure_unpriv
-__msg_unpriv("invalid indirect read from stack R2 off -64+32 size 64")
+__msg_unpriv("invalid read from stack R2 off -64+32 size 64")
 __retval(0)
 __naked void variable_memory_8_bytes_leak(void)
 {
diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
index 5f2efb895edb..59e34d558654 100644
--- a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
@@ -96,7 +96,7 @@ __naked void arg_ptr_to_long_misaligned(void)
 
 SEC("cgroup/sysctl")
 __description("arg pointer to long size < sizeof(long)")
-__failure __msg("invalid indirect access to stack R4 off=-4 size=8")
+__failure __msg("invalid write to stack R4 off=-4 size=8")
 __naked void to_long_size_sizeof_long(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
index 4eaab1468eb7..7d088ba99ea5 100644
--- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
@@ -47,7 +47,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("map in map state pruning")
-__success __msg("processed 26 insns")
+__success __msg("processed 15 insns")
 __log_level(2) __retval(0) __flag(BPF_F_TEST_STATE_FREQ)
 __naked void map_in_map_state_pruning(void)
 {
diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
new file mode 100644
index 000000000000..e81097c96fe2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+SEC("raw_tp")
+__description("may_goto 0")
+__arch_x86_64
+__xlated("0: r0 = 1")
+__xlated("1: exit")
+__success
+__naked void may_goto_simple(void)
+{
+	asm volatile (
+	".8byte %[may_goto];"
+	"r0 = 1;"
+	".8byte %[may_goto];"
+	"exit;"
+	:
+	: __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+	: __clobber_all);
+}
+
+SEC("raw_tp")
+__description("batch 2 of may_goto 0")
+__arch_x86_64
+__xlated("0: r0 = 1")
+__xlated("1: exit")
+__success
+__naked void may_goto_batch_0(void)
+{
+	asm volatile (
+	".8byte %[may_goto1];"
+	".8byte %[may_goto1];"
+	"r0 = 1;"
+	".8byte %[may_goto1];"
+	".8byte %[may_goto1];"
+	"exit;"
+	:
+	: __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+	: __clobber_all);
+}
+
+SEC("raw_tp")
+__description("may_goto batch with offsets 2/1/0")
+__arch_x86_64
+__xlated("0: r0 = 1")
+__xlated("1: exit")
+__success
+__naked void may_goto_batch_1(void)
+{
+	asm volatile (
+	".8byte %[may_goto1];"
+	".8byte %[may_goto2];"
+	".8byte %[may_goto3];"
+	"r0 = 1;"
+	".8byte %[may_goto1];"
+	".8byte %[may_goto2];"
+	".8byte %[may_goto3];"
+	"exit;"
+	:
+	: __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)),
+	  __imm_insn(may_goto2, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 1 /* offset */, 0)),
+	  __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+	: __clobber_all);
+}
+
+SEC("raw_tp")
+__description("may_goto batch with offsets 2/0")
+__arch_x86_64
+__xlated("0: *(u64 *)(r10 -8) = 8388608")
+__xlated("1: r11 = *(u64 *)(r10 -8)")
+__xlated("2: if r11 == 0x0 goto pc+3")
+__xlated("3: r11 -= 1")
+__xlated("4: *(u64 *)(r10 -8) = r11")
+__xlated("5: r0 = 1")
+__xlated("6: r0 = 2")
+__xlated("7: exit")
+__success
+__naked void may_goto_batch_2(void)
+{
+	asm volatile (
+	".8byte %[may_goto1];"
+	".8byte %[may_goto3];"
+	"r0 = 1;"
+	"r0 = 2;"
+	"exit;"
+	:
+	: __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)),
+	  __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+	: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_2.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_2.c
new file mode 100644
index 000000000000..b891faf50660
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_2.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+int gvar;
+
+SEC("raw_tp")
+__description("C code with may_goto 0")
+__success
+int may_goto_c_code(void)
+{
+	int i, tmp[3];
+
+	for (i = 0; i < 3 && can_loop; i++)
+		tmp[i] = 0;
+
+	for (i = 0; i < 3 && can_loop; i++)
+		tmp[i] = gvar - i;
+
+	for (i = 0; i < 3 && can_loop; i++)
+		gvar += tmp[i];
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_mtu.c b/tools/testing/selftests/bpf/progs/verifier_mtu.c
index 4ccf1ebc42d1..256956ea1ac5 100644
--- a/tools/testing/selftests/bpf/progs/verifier_mtu.c
+++ b/tools/testing/selftests/bpf/progs/verifier_mtu.c
@@ -8,7 +8,7 @@ SEC("tc/ingress")
 __description("uninit/mtu: write rejected")
 __success
 __caps_unpriv(CAP_BPF|CAP_NET_ADMIN)
-__failure_unpriv __msg_unpriv("invalid indirect read from stack")
+__failure_unpriv __msg_unpriv("invalid read from stack")
 int tc_uninit_mtu(struct __sk_buff *ctx)
 {
 	__u32 mtu;
diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
index 7cc83acac727..c689665e07b9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
@@ -236,7 +236,7 @@ __naked void load_bytes_spilled_regs_data(void)
 
 SEC("tc")
 __description("raw_stack: skb_load_bytes, invalid access 1")
-__failure __msg("invalid indirect access to stack R3 off=-513 size=8")
+__failure __msg("invalid write to stack R3 off=-513 size=8")
 __naked void load_bytes_invalid_access_1(void)
 {
 	asm volatile ("					\
@@ -255,7 +255,7 @@ __naked void load_bytes_invalid_access_1(void)
 
 SEC("tc")
 __description("raw_stack: skb_load_bytes, invalid access 2")
-__failure __msg("invalid indirect access to stack R3 off=-1 size=8")
+__failure __msg("invalid write to stack R3 off=-1 size=8")
 __naked void load_bytes_invalid_access_2(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
index 3f679de73229..d9d7b05cf6d2 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
@@ -187,7 +187,7 @@ l0_%=:	r6 = r0;					\
 
 SEC("cgroup/skb")
 __description("spin_lock: test6 missing unlock")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_spin_lock-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_spin_lock-ed region")
 __failure_unpriv __msg_unpriv("")
 __naked void spin_lock_test6_missing_unlock(void)
 {
@@ -530,4 +530,30 @@ l1_%=:	exit;						\
 	: __clobber_all);
 }
 
+SEC("tc")
+__description("spin_lock: loop within a locked region")
+__success __failure_unpriv __msg_unpriv("")
+__retval(0)
+int bpf_loop_inside_locked_region(void)
+{
+	const int zero = 0;
+	struct val *val;
+	int i, j = 0;
+
+	val = bpf_map_lookup_elem(&map_spin_lock, &zero);
+	if (!val)
+		return -1;
+
+	bpf_spin_lock(&val->l);
+	bpf_for(i, 0, 10) {
+		j++;
+		/* Silence "unused variable" warnings. */
+		if (j == 10)
+			break;
+	}
+	bpf_spin_unlock(&val->l);
+
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
index 7ea535bfbacd..a4a5e2071604 100644
--- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
@@ -199,7 +199,7 @@ __naked void pass_pointer_to_helper_function(void)
 SEC("socket")
 __description("unpriv: indirectly pass pointer on stack to helper function")
 __success __failure_unpriv
-__msg_unpriv("invalid indirect read from stack R2 off -8+0 size 8")
+__msg_unpriv("invalid read from stack R2 off -8+0 size 8")
 __retval(0)
 __naked void on_stack_to_helper_function(void)
 {
diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c
index c810f4f6f479..1d36d01b746e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_var_off.c
+++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c
@@ -203,7 +203,7 @@ __naked void stack_write_clobbers_spilled_regs(void)
 
 SEC("sockops")
 __description("indirect variable-offset stack access, unbounded")
-__failure __msg("invalid unbounded variable-offset indirect access to stack R4")
+__failure __msg("invalid unbounded variable-offset write to stack R4")
 __naked void variable_offset_stack_access_unbounded(void)
 {
 	asm volatile ("					\
@@ -236,7 +236,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("lwt_in")
 __description("indirect variable-offset stack access, max out of bound")
-__failure __msg("invalid variable-offset indirect access to stack R2")
+__failure __msg("invalid variable-offset read from stack R2")
 __naked void access_max_out_of_bound(void)
 {
 	asm volatile ("					\
@@ -269,7 +269,7 @@ __naked void access_max_out_of_bound(void)
  */
 SEC("socket")
 __description("indirect variable-offset stack access, zero-sized, max out of bound")
-__failure __msg("invalid variable-offset indirect access to stack R1")
+__failure __msg("invalid variable-offset write to stack R1")
 __naked void zero_sized_access_max_out_of_bound(void)
 {
 	asm volatile ("                      \
@@ -294,7 +294,7 @@ __naked void zero_sized_access_max_out_of_bound(void)
 
 SEC("lwt_in")
 __description("indirect variable-offset stack access, min out of bound")
-__failure __msg("invalid variable-offset indirect access to stack R2")
+__failure __msg("invalid variable-offset read from stack R2")
 __naked void access_min_out_of_bound(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c
index f8d3ae0c29ae..2f1ba08c293e 100644
--- a/tools/testing/selftests/bpf/progs/wq.c
+++ b/tools/testing/selftests/bpf/progs/wq.c
@@ -5,7 +5,7 @@
 #include "bpf_experimental.h"
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c
index 25b51a72fe0f..4240211a1900 100644
--- a/tools/testing/selftests/bpf/progs/wq_failures.c
+++ b/tools/testing/selftests/bpf/progs/wq_failures.c
@@ -5,7 +5,7 @@
 #include "bpf_experimental.h"
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index 0ed67b6b31dd..238121fda5b6 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -66,7 +66,7 @@ class ArrayParser(BlockParser):
 
     def __init__(self, reader, array_name):
         self.array_name = array_name
-        self.start_marker = re.compile(f'(static )?const bool {self.array_name}\[.*\] = {{\n')
+        self.start_marker = re.compile(fr'(static )?const bool {self.array_name}\[.*\] = {{\n')
         super().__init__(reader)
 
     def search_block(self):
@@ -80,7 +80,7 @@ class ArrayParser(BlockParser):
         Parse a block and return data as a dictionary. Items to extract must be
         on separate lines in the file.
         """
-        pattern = re.compile('\[(BPF_\w*)\]\s*= (true|false),?$')
+        pattern = re.compile(r'\[(BPF_\w*)\]\s*= (true|false),?$')
         entries = set()
         while True:
             line = self.reader.readline()
@@ -178,7 +178,7 @@ class FileExtractor(object):
         @enum_name: name of the enum to parse
         """
         start_marker = re.compile(f'enum {enum_name} {{\n')
-        pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$')
+        pattern = re.compile(r'^\s*(BPF_\w+),?(\s+/\*.*\*/)?$')
         end_marker = re.compile('^};')
         parser = BlockParser(self.reader)
         parser.search_block(start_marker)
@@ -226,8 +226,8 @@ class FileExtractor(object):
 
         @block_name: name of the blog to parse, 'TYPE' in the example
         """
-        start_marker = re.compile(f'\*{block_name}\* := {{')
-        pattern = re.compile('\*\*([\w/-]+)\*\*')
+        start_marker = re.compile(fr'\*{block_name}\* := {{')
+        pattern = re.compile(r'\*\*([\w/-]+)\*\*')
         end_marker = re.compile('}\n')
         return self.__get_description_list(start_marker, pattern, end_marker)
 
@@ -245,8 +245,8 @@ class FileExtractor(object):
 
         @block_name: name of the blog to parse, 'TYPE' in the example
         """
-        start_marker = re.compile(f'"\s*{block_name} := {{')
-        pattern = re.compile('([\w/]+) [|}]')
+        start_marker = re.compile(fr'"\s*{block_name} := {{')
+        pattern = re.compile(r'([\w/]+) [|}]')
         end_marker = re.compile('}')
         return self.__get_description_list(start_marker, pattern, end_marker)
 
@@ -264,8 +264,8 @@ class FileExtractor(object):
 
         @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example
         """
-        start_marker = re.compile(f'"\s*{macro}\s*" [|}}]')
-        pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])')
+        start_marker = re.compile(fr'"\s*{macro}\s*" [|}}]')
+        pattern = re.compile(r'([\w-]+) ?(?:\||}[ }\]])')
         end_marker = re.compile('}\\\\n')
         return self.__get_description_list(start_marker, pattern, end_marker)
 
@@ -283,8 +283,8 @@ class FileExtractor(object):
 
         @block_name: name of the blog to parse, 'TYPE' in the example
         """
-        start_marker = re.compile(f'local {block_name}=\'')
-        pattern = re.compile('(?:.*=\')?([\w/]+)')
+        start_marker = re.compile(fr'local {block_name}=\'')
+        pattern = re.compile(r'(?:.*=\')?([\w/]+)')
         end_marker = re.compile('\'$')
         return self.__get_description_list(start_marker, pattern, end_marker)
 
@@ -316,7 +316,7 @@ class MainHeaderFileExtractor(SourceFileExtractor):
             {'-p', '-d', '--pretty', '--debug', '--json', '-j'}
         """
         start_marker = re.compile(f'"OPTIONS :=')
-        pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])')
+        pattern = re.compile(r'([\w-]+) ?(?:\||}[ }\]"])')
         end_marker = re.compile('#define')
 
         parser = InlineListParser(self.reader)
@@ -338,8 +338,8 @@ class ManSubstitutionsExtractor(SourceFileExtractor):
 
             {'-p', '-d', '--pretty', '--debug', '--json', '-j'}
         """
-        start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {')
-        pattern = re.compile('\*\*([\w/-]+)\*\*')
+        start_marker = re.compile(r'\|COMMON_OPTIONS\| replace:: {')
+        pattern = re.compile(r'\*\*([\w/-]+)\*\*')
         end_marker = re.compile('}$')
 
         parser = InlineListParser(self.reader)
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c
deleted file mode 100644
index 571cc076dd7d..000000000000
--- a/tools/testing/selftests/bpf/test_flow_dissector.c
+++ /dev/null
@@ -1,780 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Inject packets with all sorts of encapsulation into the kernel.
- *
- * IPv4/IPv6	outer layer 3
- * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/..
- * IPv4/IPv6    inner layer 3
- */
-
-#define _GNU_SOURCE
-
-#include <stddef.h>
-#include <arpa/inet.h>
-#include <asm/byteorder.h>
-#include <error.h>
-#include <errno.h>
-#include <linux/if_packet.h>
-#include <linux/if_ether.h>
-#include <linux/ipv6.h>
-#include <netinet/ip.h>
-#include <netinet/in.h>
-#include <netinet/udp.h>
-#include <poll.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#define CFG_PORT_INNER	8000
-
-/* Add some protocol definitions that do not exist in userspace */
-
-struct grehdr {
-	uint16_t unused;
-	uint16_t protocol;
-} __attribute__((packed));
-
-struct guehdr {
-	union {
-		struct {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-			__u8	hlen:5,
-				control:1,
-				version:2;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-			__u8	version:2,
-				control:1,
-				hlen:5;
-#else
-#error  "Please fix <asm/byteorder.h>"
-#endif
-			__u8	proto_ctype;
-			__be16	flags;
-		};
-		__be32	word;
-	};
-};
-
-static uint8_t	cfg_dsfield_inner;
-static uint8_t	cfg_dsfield_outer;
-static uint8_t	cfg_encap_proto;
-static bool	cfg_expect_failure = false;
-static int	cfg_l3_extra = AF_UNSPEC;	/* optional SIT prefix */
-static int	cfg_l3_inner = AF_UNSPEC;
-static int	cfg_l3_outer = AF_UNSPEC;
-static int	cfg_num_pkt = 10;
-static int	cfg_num_secs = 0;
-static char	cfg_payload_char = 'a';
-static int	cfg_payload_len = 100;
-static int	cfg_port_gue = 6080;
-static bool	cfg_only_rx;
-static bool	cfg_only_tx;
-static int	cfg_src_port = 9;
-
-static char	buf[ETH_DATA_LEN];
-
-#define INIT_ADDR4(name, addr4, port)				\
-	static struct sockaddr_in name = {			\
-		.sin_family = AF_INET,				\
-		.sin_port = __constant_htons(port),		\
-		.sin_addr.s_addr = __constant_htonl(addr4),	\
-	};
-
-#define INIT_ADDR6(name, addr6, port)				\
-	static struct sockaddr_in6 name = {			\
-		.sin6_family = AF_INET6,			\
-		.sin6_port = __constant_htons(port),		\
-		.sin6_addr = addr6,				\
-	};
-
-INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER)
-INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0)
-INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0)
-INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0)
-INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0)
-INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0)
-
-INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
-INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
-INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
-INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
-INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
-INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
-
-static unsigned long util_gettime(void)
-{
-	struct timeval tv;
-
-	gettimeofday(&tv, NULL);
-	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
-}
-
-static void util_printaddr(const char *msg, struct sockaddr *addr)
-{
-	unsigned long off = 0;
-	char nbuf[INET6_ADDRSTRLEN];
-
-	switch (addr->sa_family) {
-	case PF_INET:
-		off = __builtin_offsetof(struct sockaddr_in, sin_addr);
-		break;
-	case PF_INET6:
-		off = __builtin_offsetof(struct sockaddr_in6, sin6_addr);
-		break;
-	default:
-		error(1, 0, "printaddr: unsupported family %u\n",
-		      addr->sa_family);
-	}
-
-	if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf,
-		       sizeof(nbuf)))
-		error(1, errno, "inet_ntop");
-
-	fprintf(stderr, "%s: %s\n", msg, nbuf);
-}
-
-static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
-{
-	unsigned long sum = 0;
-	int i;
-
-	for (i = 0; i < num_u16; i++)
-		sum += start[i];
-
-	return sum;
-}
-
-static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
-			      unsigned long sum)
-{
-	sum += add_csum_hword(start, num_u16);
-
-	while (sum >> 16)
-		sum = (sum & 0xffff) + (sum >> 16);
-
-	return ~sum;
-}
-
-static void build_ipv4_header(void *header, uint8_t proto,
-			      uint32_t src, uint32_t dst,
-			      int payload_len, uint8_t tos)
-{
-	struct iphdr *iph = header;
-
-	iph->ihl = 5;
-	iph->version = 4;
-	iph->tos = tos;
-	iph->ttl = 8;
-	iph->tot_len = htons(sizeof(*iph) + payload_len);
-	iph->id = htons(1337);
-	iph->protocol = proto;
-	iph->saddr = src;
-	iph->daddr = dst;
-	iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
-}
-
-static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
-{
-	uint16_t val, *ptr = (uint16_t *)ip6h;
-
-	val = ntohs(*ptr);
-	val &= 0xF00F;
-	val |= ((uint16_t) dsfield) << 4;
-	*ptr = htons(val);
-}
-
-static void build_ipv6_header(void *header, uint8_t proto,
-			      struct sockaddr_in6 *src,
-			      struct sockaddr_in6 *dst,
-			      int payload_len, uint8_t dsfield)
-{
-	struct ipv6hdr *ip6h = header;
-
-	ip6h->version = 6;
-	ip6h->payload_len = htons(payload_len);
-	ip6h->nexthdr = proto;
-	ip6h->hop_limit = 8;
-	ipv6_set_dsfield(ip6h, dsfield);
-
-	memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
-	memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
-}
-
-static uint16_t build_udp_v4_csum(const struct iphdr *iph,
-				  const struct udphdr *udph,
-				  int num_words)
-{
-	unsigned long pseudo_sum;
-	int num_u16 = sizeof(iph->saddr);	/* halfwords: twice byte len */
-
-	pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16);
-	pseudo_sum += htons(IPPROTO_UDP);
-	pseudo_sum += udph->len;
-	return build_ip_csum((void *) udph, num_words, pseudo_sum);
-}
-
-static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h,
-				  const struct udphdr *udph,
-				  int num_words)
-{
-	unsigned long pseudo_sum;
-	int num_u16 = sizeof(ip6h->saddr);	/* halfwords: twice byte len */
-
-	pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16);
-	pseudo_sum += htons(ip6h->nexthdr);
-	pseudo_sum += ip6h->payload_len;
-	return build_ip_csum((void *) udph, num_words, pseudo_sum);
-}
-
-static void build_udp_header(void *header, int payload_len,
-			     uint16_t dport, int family)
-{
-	struct udphdr *udph = header;
-	int len = sizeof(*udph) + payload_len;
-
-	udph->source = htons(cfg_src_port);
-	udph->dest = htons(dport);
-	udph->len = htons(len);
-	udph->check = 0;
-	if (family == AF_INET)
-		udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
-						udph, len >> 1);
-	else
-		udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
-						udph, len >> 1);
-}
-
-static void build_gue_header(void *header, uint8_t proto)
-{
-	struct guehdr *gueh = header;
-
-	gueh->proto_ctype = proto;
-}
-
-static void build_gre_header(void *header, uint16_t proto)
-{
-	struct grehdr *greh = header;
-
-	greh->protocol = htons(proto);
-}
-
-static int l3_length(int family)
-{
-	if (family == AF_INET)
-		return sizeof(struct iphdr);
-	else
-		return sizeof(struct ipv6hdr);
-}
-
-static int build_packet(void)
-{
-	int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
-	int el3_len = 0;
-
-	if (cfg_l3_extra)
-		el3_len = l3_length(cfg_l3_extra);
-
-	/* calculate header offsets */
-	if (cfg_encap_proto) {
-		ol3_len = l3_length(cfg_l3_outer);
-
-		if (cfg_encap_proto == IPPROTO_GRE)
-			ol4_len = sizeof(struct grehdr);
-		else if (cfg_encap_proto == IPPROTO_UDP)
-			ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
-	}
-
-	il3_len = l3_length(cfg_l3_inner);
-	il4_len = sizeof(struct udphdr);
-
-	if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >=
-	    sizeof(buf))
-		error(1, 0, "packet too large\n");
-
-	/*
-	 * Fill packet from inside out, to calculate correct checksums.
-	 * But create ip before udp headers, as udp uses ip for pseudo-sum.
-	 */
-	memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
-	       cfg_payload_char, cfg_payload_len);
-
-	/* add zero byte for udp csum padding */
-	buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0;
-
-	switch (cfg_l3_inner) {
-	case PF_INET:
-		build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
-				  IPPROTO_UDP,
-				  in_saddr4.sin_addr.s_addr,
-				  in_daddr4.sin_addr.s_addr,
-				  il4_len + cfg_payload_len,
-				  cfg_dsfield_inner);
-		break;
-	case PF_INET6:
-		build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
-				  IPPROTO_UDP,
-				  &in_saddr6, &in_daddr6,
-				  il4_len + cfg_payload_len,
-				  cfg_dsfield_inner);
-		break;
-	}
-
-	build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
-			 cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner);
-
-	if (!cfg_encap_proto)
-		return il3_len + il4_len + cfg_payload_len;
-
-	switch (cfg_l3_outer) {
-	case PF_INET:
-		build_ipv4_header(buf + el3_len, cfg_encap_proto,
-				  out_saddr4.sin_addr.s_addr,
-				  out_daddr4.sin_addr.s_addr,
-				  ol4_len + il3_len + il4_len + cfg_payload_len,
-				  cfg_dsfield_outer);
-		break;
-	case PF_INET6:
-		build_ipv6_header(buf + el3_len, cfg_encap_proto,
-				  &out_saddr6, &out_daddr6,
-				  ol4_len + il3_len + il4_len + cfg_payload_len,
-				  cfg_dsfield_outer);
-		break;
-	}
-
-	switch (cfg_encap_proto) {
-	case IPPROTO_UDP:
-		build_gue_header(buf + el3_len + ol3_len + ol4_len -
-				 sizeof(struct guehdr),
-				 cfg_l3_inner == PF_INET ? IPPROTO_IPIP
-							 : IPPROTO_IPV6);
-		build_udp_header(buf + el3_len + ol3_len,
-				 sizeof(struct guehdr) + il3_len + il4_len +
-				 cfg_payload_len,
-				 cfg_port_gue, cfg_l3_outer);
-		break;
-	case IPPROTO_GRE:
-		build_gre_header(buf + el3_len + ol3_len,
-				 cfg_l3_inner == PF_INET ? ETH_P_IP
-							 : ETH_P_IPV6);
-		break;
-	}
-
-	switch (cfg_l3_extra) {
-	case PF_INET:
-		build_ipv4_header(buf,
-				  cfg_l3_outer == PF_INET ? IPPROTO_IPIP
-							  : IPPROTO_IPV6,
-				  extra_saddr4.sin_addr.s_addr,
-				  extra_daddr4.sin_addr.s_addr,
-				  ol3_len + ol4_len + il3_len + il4_len +
-				  cfg_payload_len, 0);
-		break;
-	case PF_INET6:
-		build_ipv6_header(buf,
-				  cfg_l3_outer == PF_INET ? IPPROTO_IPIP
-							  : IPPROTO_IPV6,
-				  &extra_saddr6, &extra_daddr6,
-				  ol3_len + ol4_len + il3_len + il4_len +
-				  cfg_payload_len, 0);
-		break;
-	}
-
-	return el3_len + ol3_len + ol4_len + il3_len + il4_len +
-	       cfg_payload_len;
-}
-
-/* sender transmits encapsulated over RAW or unencap'd over UDP */
-static int setup_tx(void)
-{
-	int family, fd, ret;
-
-	if (cfg_l3_extra)
-		family = cfg_l3_extra;
-	else if (cfg_l3_outer)
-		family = cfg_l3_outer;
-	else
-		family = cfg_l3_inner;
-
-	fd = socket(family, SOCK_RAW, IPPROTO_RAW);
-	if (fd == -1)
-		error(1, errno, "socket tx");
-
-	if (cfg_l3_extra) {
-		if (cfg_l3_extra == PF_INET)
-			ret = connect(fd, (void *) &extra_daddr4,
-				      sizeof(extra_daddr4));
-		else
-			ret = connect(fd, (void *) &extra_daddr6,
-				      sizeof(extra_daddr6));
-		if (ret)
-			error(1, errno, "connect tx");
-	} else if (cfg_l3_outer) {
-		/* connect to destination if not encapsulated */
-		if (cfg_l3_outer == PF_INET)
-			ret = connect(fd, (void *) &out_daddr4,
-				      sizeof(out_daddr4));
-		else
-			ret = connect(fd, (void *) &out_daddr6,
-				      sizeof(out_daddr6));
-		if (ret)
-			error(1, errno, "connect tx");
-	} else {
-		/* otherwise using loopback */
-		if (cfg_l3_inner == PF_INET)
-			ret = connect(fd, (void *) &in_daddr4,
-				      sizeof(in_daddr4));
-		else
-			ret = connect(fd, (void *) &in_daddr6,
-				      sizeof(in_daddr6));
-		if (ret)
-			error(1, errno, "connect tx");
-	}
-
-	return fd;
-}
-
-/* receiver reads unencapsulated UDP */
-static int setup_rx(void)
-{
-	int fd, ret;
-
-	fd = socket(cfg_l3_inner, SOCK_DGRAM, 0);
-	if (fd == -1)
-		error(1, errno, "socket rx");
-
-	if (cfg_l3_inner == PF_INET)
-		ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4));
-	else
-		ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6));
-	if (ret)
-		error(1, errno, "bind rx");
-
-	return fd;
-}
-
-static int do_tx(int fd, const char *pkt, int len)
-{
-	int ret;
-
-	ret = write(fd, pkt, len);
-	if (ret == -1)
-		error(1, errno, "send");
-	if (ret != len)
-		error(1, errno, "send: len (%d < %d)\n", ret, len);
-
-	return 1;
-}
-
-static int do_poll(int fd, short events, int timeout)
-{
-	struct pollfd pfd;
-	int ret;
-
-	pfd.fd = fd;
-	pfd.events = events;
-
-	ret = poll(&pfd, 1, timeout);
-	if (ret == -1)
-		error(1, errno, "poll");
-	if (ret && !(pfd.revents & POLLIN))
-		error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents);
-
-	return ret;
-}
-
-static int do_rx(int fd)
-{
-	char rbuf;
-	int ret, num = 0;
-
-	while (1) {
-		ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
-		if (ret == -1 && errno == EAGAIN)
-			break;
-		if (ret == -1)
-			error(1, errno, "recv");
-		if (rbuf != cfg_payload_char)
-			error(1, 0, "recv: payload mismatch");
-		num++;
-	}
-
-	return num;
-}
-
-static int do_main(void)
-{
-	unsigned long tstop, treport, tcur;
-	int fdt = -1, fdr = -1, len, tx = 0, rx = 0;
-
-	if (!cfg_only_tx)
-		fdr = setup_rx();
-	if (!cfg_only_rx)
-		fdt = setup_tx();
-
-	len = build_packet();
-
-	tcur = util_gettime();
-	treport = tcur + 1000;
-	tstop = tcur + (cfg_num_secs * 1000);
-
-	while (1) {
-		if (!cfg_only_rx)
-			tx += do_tx(fdt, buf, len);
-
-		if (!cfg_only_tx)
-			rx += do_rx(fdr);
-
-		if (cfg_num_secs) {
-			tcur = util_gettime();
-			if (tcur >= tstop)
-				break;
-			if (tcur >= treport) {
-				fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
-				tx = 0;
-				rx = 0;
-				treport = tcur + 1000;
-			}
-		} else {
-			if (tx == cfg_num_pkt)
-				break;
-		}
-	}
-
-	/* read straggler packets, if any */
-	if (rx < tx) {
-		tstop = util_gettime() + 100;
-		while (rx < tx) {
-			tcur = util_gettime();
-			if (tcur >= tstop)
-				break;
-
-			do_poll(fdr, POLLIN, tstop - tcur);
-			rx += do_rx(fdr);
-		}
-	}
-
-	fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
-
-	if (fdr != -1 && close(fdr))
-		error(1, errno, "close rx");
-	if (fdt != -1 && close(fdt))
-		error(1, errno, "close tx");
-
-	/*
-	 * success (== 0) only if received all packets
-	 * unless failure is expected, in which case none must arrive.
-	 */
-	if (cfg_expect_failure)
-		return rx != 0;
-	else
-		return rx != tx;
-}
-
-
-static void __attribute__((noreturn)) usage(const char *filepath)
-{
-	fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] "
-			"[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] "
-			"[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] "
-			"[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n",
-		filepath);
-	exit(1);
-}
-
-static void parse_addr(int family, void *addr, const char *optarg)
-{
-	int ret;
-
-	ret = inet_pton(family, optarg, addr);
-	if (ret == -1)
-		error(1, errno, "inet_pton");
-	if (ret == 0)
-		error(1, 0, "inet_pton: bad string");
-}
-
-static void parse_addr4(struct sockaddr_in *addr, const char *optarg)
-{
-	parse_addr(AF_INET, &addr->sin_addr, optarg);
-}
-
-static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg)
-{
-	parse_addr(AF_INET6, &addr->sin6_addr, optarg);
-}
-
-static int parse_protocol_family(const char *filepath, const char *optarg)
-{
-	if (!strcmp(optarg, "4"))
-		return PF_INET;
-	if (!strcmp(optarg, "6"))
-		return PF_INET6;
-
-	usage(filepath);
-}
-
-static void parse_opts(int argc, char **argv)
-{
-	int c;
-
-	while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) {
-		switch (c) {
-		case 'd':
-			if (cfg_l3_outer == AF_UNSPEC)
-				error(1, 0, "-d must be preceded by -o");
-			if (cfg_l3_outer == AF_INET)
-				parse_addr4(&out_daddr4, optarg);
-			else
-				parse_addr6(&out_daddr6, optarg);
-			break;
-		case 'D':
-			if (cfg_l3_inner == AF_UNSPEC)
-				error(1, 0, "-D must be preceded by -i");
-			if (cfg_l3_inner == AF_INET)
-				parse_addr4(&in_daddr4, optarg);
-			else
-				parse_addr6(&in_daddr6, optarg);
-			break;
-		case 'e':
-			if (!strcmp(optarg, "gre"))
-				cfg_encap_proto = IPPROTO_GRE;
-			else if (!strcmp(optarg, "gue"))
-				cfg_encap_proto = IPPROTO_UDP;
-			else if (!strcmp(optarg, "bare"))
-				cfg_encap_proto = IPPROTO_IPIP;
-			else if (!strcmp(optarg, "none"))
-				cfg_encap_proto = IPPROTO_IP;	/* == 0 */
-			else
-				usage(argv[0]);
-			break;
-		case 'f':
-			cfg_src_port = strtol(optarg, NULL, 0);
-			break;
-		case 'F':
-			cfg_expect_failure = true;
-			break;
-		case 'h':
-			usage(argv[0]);
-			break;
-		case 'i':
-			if (!strcmp(optarg, "4"))
-				cfg_l3_inner = PF_INET;
-			else if (!strcmp(optarg, "6"))
-				cfg_l3_inner = PF_INET6;
-			else
-				usage(argv[0]);
-			break;
-		case 'l':
-			cfg_payload_len = strtol(optarg, NULL, 0);
-			break;
-		case 'n':
-			cfg_num_pkt = strtol(optarg, NULL, 0);
-			break;
-		case 'o':
-			cfg_l3_outer = parse_protocol_family(argv[0], optarg);
-			break;
-		case 'O':
-			cfg_l3_extra = parse_protocol_family(argv[0], optarg);
-			break;
-		case 'R':
-			cfg_only_rx = true;
-			break;
-		case 's':
-			if (cfg_l3_outer == AF_INET)
-				parse_addr4(&out_saddr4, optarg);
-			else
-				parse_addr6(&out_saddr6, optarg);
-			break;
-		case 'S':
-			if (cfg_l3_inner == AF_INET)
-				parse_addr4(&in_saddr4, optarg);
-			else
-				parse_addr6(&in_saddr6, optarg);
-			break;
-		case 't':
-			cfg_num_secs = strtol(optarg, NULL, 0);
-			break;
-		case 'T':
-			cfg_only_tx = true;
-			break;
-		case 'x':
-			cfg_dsfield_outer = strtol(optarg, NULL, 0);
-			break;
-		case 'X':
-			cfg_dsfield_inner = strtol(optarg, NULL, 0);
-			break;
-		}
-	}
-
-	if (cfg_only_rx && cfg_only_tx)
-		error(1, 0, "options: cannot combine rx-only and tx-only");
-
-	if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC)
-		error(1, 0, "options: must specify outer with encap");
-	else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC)
-		error(1, 0, "options: cannot combine no-encap and outer");
-	else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC)
-		error(1, 0, "options: cannot combine no-encap and extra");
-
-	if (cfg_l3_inner == AF_UNSPEC)
-		cfg_l3_inner = AF_INET6;
-	if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP)
-		cfg_encap_proto = IPPROTO_IPV6;
-
-	/* RFC 6040 4.2:
-	 *   on decap, if outer encountered congestion (CE == 0x3),
-	 *   but inner cannot encode ECN (NoECT == 0x0), then drop packet.
-	 */
-	if (((cfg_dsfield_outer & 0x3) == 0x3) &&
-	    ((cfg_dsfield_inner & 0x3) == 0x0))
-		cfg_expect_failure = true;
-}
-
-static void print_opts(void)
-{
-	if (cfg_l3_inner == PF_INET6) {
-		util_printaddr("inner.dest6", (void *) &in_daddr6);
-		util_printaddr("inner.source6", (void *) &in_saddr6);
-	} else {
-		util_printaddr("inner.dest4", (void *) &in_daddr4);
-		util_printaddr("inner.source4", (void *) &in_saddr4);
-	}
-
-	if (!cfg_l3_outer)
-		return;
-
-	fprintf(stderr, "encap proto:   %u\n", cfg_encap_proto);
-
-	if (cfg_l3_outer == PF_INET6) {
-		util_printaddr("outer.dest6", (void *) &out_daddr6);
-		util_printaddr("outer.source6", (void *) &out_saddr6);
-	} else {
-		util_printaddr("outer.dest4", (void *) &out_daddr4);
-		util_printaddr("outer.source4", (void *) &out_saddr4);
-	}
-
-	if (!cfg_l3_extra)
-		return;
-
-	if (cfg_l3_outer == PF_INET6) {
-		util_printaddr("extra.dest6", (void *) &extra_daddr6);
-		util_printaddr("extra.source6", (void *) &extra_saddr6);
-	} else {
-		util_printaddr("extra.dest4", (void *) &extra_daddr4);
-		util_printaddr("extra.source4", (void *) &extra_saddr4);
-	}
-
-}
-
-int main(int argc, char **argv)
-{
-	parse_opts(argc, argv);
-	print_opts();
-	return do_main();
-}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
deleted file mode 100755
index 4b298863797a..000000000000
--- a/tools/testing/selftests/bpf/test_flow_dissector.sh
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Load BPF flow dissector and verify it correctly dissects traffic
-
-BPF_FILE="bpf_flow.bpf.o"
-export TESTNAME=test_flow_dissector
-unmount=0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-msg="skip all tests:"
-if [ $UID != 0 ]; then
-	echo $msg please run this as root >&2
-	exit $ksft_skip
-fi
-
-# This test needs to be run in a network namespace with in_netns.sh. Check if
-# this is the case and run it with in_netns.sh if it is being run in the root
-# namespace.
-if [[ -z $(ip netns identify $$) ]]; then
-	err=0
-	if bpftool="$(which bpftool)"; then
-		echo "Testing global flow dissector..."
-
-		$bpftool prog loadall $BPF_FILE /sys/fs/bpf/flow \
-			type flow_dissector
-
-		if ! unshare --net $bpftool prog attach pinned \
-			/sys/fs/bpf/flow/_dissect flow_dissector; then
-			echo "Unexpected unsuccessful attach in namespace" >&2
-			err=1
-		fi
-
-		$bpftool prog attach pinned /sys/fs/bpf/flow/_dissect \
-			flow_dissector
-
-		if unshare --net $bpftool prog attach pinned \
-			/sys/fs/bpf/flow/_dissect flow_dissector; then
-			echo "Unexpected successful attach in namespace" >&2
-			err=1
-		fi
-
-		if ! $bpftool prog detach pinned \
-			/sys/fs/bpf/flow/_dissect flow_dissector; then
-			echo "Failed to detach flow dissector" >&2
-			err=1
-		fi
-
-		rm -rf /sys/fs/bpf/flow
-	else
-		echo "Skipping root flow dissector test, bpftool not found" >&2
-	fi
-
-	# Run the rest of the tests in a net namespace.
-	../net/in_netns.sh "$0" "$@"
-	err=$(( $err + $? ))
-
-	if (( $err == 0 )); then
-		echo "selftests: $TESTNAME [PASS]";
-	else
-		echo "selftests: $TESTNAME [FAILED]";
-	fi
-
-	exit $err
-fi
-
-# Determine selftest success via shell exit code
-exit_handler()
-{
-	set +e
-
-	# Cleanup
-	tc filter del dev lo ingress pref 1337 2> /dev/null
-	tc qdisc del dev lo ingress 2> /dev/null
-	./flow_dissector_load -d 2> /dev/null
-	if [ $unmount -ne 0 ]; then
-		umount bpffs 2> /dev/null
-	fi
-}
-
-# Exit script immediately (well catched by trap handler) if any
-# program/thing exits with a non-zero status.
-set -e
-
-# (Use 'trap -l' to list meaning of numbers)
-trap exit_handler 0 2 3 6 9
-
-# Mount BPF file system
-if /bin/mount | grep /sys/fs/bpf > /dev/null; then
-	echo "bpffs already mounted"
-else
-	echo "bpffs not mounted. Mounting..."
-	unmount=1
-	/bin/mount bpffs /sys/fs/bpf -t bpf
-fi
-
-# Attach BPF program
-./flow_dissector_load -p $BPF_FILE -s _dissect
-
-# Setup
-tc qdisc add dev lo ingress
-echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter
-echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
-echo 0 > /proc/sys/net/ipv4/conf/lo/rp_filter
-
-echo "Testing IPv4..."
-# Drops all IP/UDP packets coming from port 9
-tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
-	udp src_port 9 action drop
-
-# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
-./test_flow_dissector -i 4 -f 8
-# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
-./test_flow_dissector -i 4 -f 9 -F
-# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
-./test_flow_dissector -i 4 -f 10
-
-echo "Testing IPv4 from 127.0.0.127 (fallback to generic dissector)..."
-# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
-./test_flow_dissector -i 4 -S 127.0.0.127 -f 8
-# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
-./test_flow_dissector -i 4 -S 127.0.0.127 -f 9 -F
-# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
-./test_flow_dissector -i 4 -S 127.0.0.127 -f 10
-
-echo "Testing IPIP..."
-# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
-	-D 192.168.0.1 -S 1.1.1.1 -f 8
-# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
-	-D 192.168.0.1 -S 1.1.1.1 -f 9 -F
-# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
-	-D 192.168.0.1 -S 1.1.1.1 -f 10
-
-echo "Testing IPv4 + GRE..."
-# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
-	-D 192.168.0.1 -S 1.1.1.1 -f 8
-# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
-	-D 192.168.0.1 -S 1.1.1.1 -f 9 -F
-# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
-	-D 192.168.0.1 -S 1.1.1.1 -f 10
-
-tc filter del dev lo ingress pref 1337
-
-echo "Testing port range..."
-# Drops all IP/UDP packets coming from port 8-10
-tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
-	udp src_port 8-10 action drop
-
-# Send 10 IPv4/UDP packets from port 7. Filter should not drop any.
-./test_flow_dissector -i 4 -f 7
-# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
-./test_flow_dissector -i 4 -f 9 -F
-# Send 10 IPv4/UDP packets from port 11. Filter should not drop any.
-./test_flow_dissector -i 4 -f 11
-
-tc filter del dev lo ingress pref 1337
-
-echo "Testing IPv6..."
-# Drops all IPv6/UDP packets coming from port 9
-tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \
-	udp src_port 9 action drop
-
-# Send 10 IPv6/UDP packets from port 8. Filter should not drop any.
-./test_flow_dissector -i 6 -f 8
-# Send 10 IPv6/UDP packets from port 9. Filter should drop all.
-./test_flow_dissector -i 6 -f 9 -F
-# Send 10 IPv6/UDP packets from port 10. Filter should not drop any.
-./test_flow_dissector -i 6 -f 10
-
-exit 0
diff --git a/tools/testing/selftests/bpf/bpf_testmod/.gitignore b/tools/testing/selftests/bpf/test_kmods/.gitignore
index ded513777281..ded513777281 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/.gitignore
+++ b/tools/testing/selftests/bpf/test_kmods/.gitignore
diff --git a/tools/testing/selftests/bpf/test_kmods/Makefile b/tools/testing/selftests/bpf/test_kmods/Makefile
new file mode 100644
index 000000000000..d4e50c4509c9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/Makefile
@@ -0,0 +1,21 @@
+TEST_KMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= $(abspath $(TEST_KMOD_DIR)/../../../../..)
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+MODULES = bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
+	bpf_test_modorder_y.ko
+
+$(foreach m,$(MODULES),$(eval obj-m += $(m:.ko=.o)))
+
+CFLAGS_bpf_testmod.o = -I$(src)
+
+all:
+	$(Q)$(MAKE) -C $(KDIR) M=$(TEST_KMOD_DIR) modules
+
+clean:
+	$(Q)$(MAKE) -C $(KDIR) M=$(TEST_KMOD_DIR) clean
diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c
index 0cc747fa912f..0cc747fa912f 100644
--- a/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c
diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c
index c627ee085d13..c627ee085d13 100644
--- a/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c
diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c
index 948eb3962732..948eb3962732 100644
--- a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h
index aeef86b3da74..aeef86b3da74 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index cc9dde507aba..cc9dde507aba 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
index 356803d1c10e..356803d1c10e 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
index b58817938deb..b58817938deb 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 6088d8222d59..c9e745d49493 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1282,6 +1282,21 @@ void crash_handler(int signum)
 	backtrace_symbols_fd(bt, sz, STDERR_FILENO);
 }
 
+void hexdump(const char *prefix, const void *buf, size_t len)
+{
+	for (int i = 0; i < len; i++) {
+		if (!(i % 16)) {
+			if (i)
+				fprintf(stdout, "\n");
+			fprintf(stdout, "%s", prefix);
+		}
+		if (i && !(i % 8) && (i % 16))
+			fprintf(stdout, "\t");
+		fprintf(stdout, "%02X ", ((uint8_t *)(buf))[i]);
+	}
+	fprintf(stdout, "\n");
+}
+
 static void sigint_handler(int signum)
 {
 	int i;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 74de33ae37e5..404d0d4915d5 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -185,6 +185,7 @@ void test__end_subtest(void);
 void test__skip(void);
 void test__fail(void);
 int test__join_cgroup(const char *path);
+void hexdump(const char *prefix, const void *buf, size_t len);
 
 #define PRINT_FAIL(format...)                                                  \
 	({                                                                     \
@@ -344,6 +345,20 @@ int test__join_cgroup(const char *path);
 	___ok;								\
 })
 
+#define ASSERT_MEMEQ(actual, expected, len, name) ({			\
+	static int duration = 0;					\
+	const void *__act = actual;					\
+	const void *__exp = expected;					\
+	int __len = len;						\
+	bool ___ok = memcmp(__act, __exp, __len) == 0;			\
+	CHECK(!___ok, (name), "unexpected memory mismatch\n");		\
+	fprintf(stdout, "actual:\n");					\
+	hexdump("\t", __act, __len);					\
+	fprintf(stdout, "expected:\n");					\
+	hexdump("\t", __exp, __len);					\
+	___ok;								\
+})
+
 #define ASSERT_OK(res, name) ({						\
 	static int duration = 0;					\
 	long long ___res = (res);					\
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 7989ec608454..cb55a908bb0d 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -305,6 +305,7 @@ else
 	client_connect
 	verify_data
 	server_listen
+	wait_for_port ${port} ${netcat_opt}
 fi
 
 # serverside, use BPF for decap
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
deleted file mode 100755
index 2740322c1878..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_meta.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/sh
-
-BPF_FILE="test_xdp_meta.bpf.o"
-# Kselftest framework requirement - SKIP code is 4.
-readonly KSFT_SKIP=4
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-
-cleanup()
-{
-	if [ "$?" = "0" ]; then
-		echo "selftests: test_xdp_meta [PASS]";
-	else
-		echo "selftests: test_xdp_meta [FAILED]";
-	fi
-
-	set +e
-	ip link del veth1 2> /dev/null
-	ip netns del ${NS1} 2> /dev/null
-	ip netns del ${NS2} 2> /dev/null
-}
-
-ip link set dev lo xdp off 2>/dev/null > /dev/null
-if [ $? -ne 0 ];then
-	echo "selftests: [SKIP] Could not run test without the ip xdp support"
-	exit $KSFT_SKIP
-fi
-set -e
-
-ip netns add ${NS1}
-ip netns add ${NS2}
-
-trap cleanup 0 2 3 6 9
-
-ip link add veth1 type veth peer name veth2
-
-ip link set veth1 netns ${NS1}
-ip link set veth2 netns ${NS2}
-
-ip netns exec ${NS1} ip addr add 10.1.1.11/24 dev veth1
-ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2
-
-ip netns exec ${NS1} tc qdisc add dev veth1 clsact
-ip netns exec ${NS2} tc qdisc add dev veth2 clsact
-
-ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec t
-ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec t
-
-ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec x
-ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec x
-
-ip netns exec ${NS1} ip link set dev veth1 up
-ip netns exec ${NS2} ip link set dev veth2 up
-
-ip netns exec ${NS1} ping -c 1 10.1.1.22
-ip netns exec ${NS2} ping -c 1 10.1.1.11
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
deleted file mode 100755
index 0746a4fde9d3..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/bash
-# Create 2 namespaces with two veth peers, and
-# forward packets in-between using generic XDP
-#
-# NS1(veth11)     NS2(veth22)
-#     |               |
-#     |               |
-#   (veth1, ------ (veth2,
-#   id:111)         id:222)
-#     | xdp forwarding |
-#     ------------------
-
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-ret=0
-
-setup()
-{
-
-	local xdpmode=$1
-
-	ip netns add ${NS1}
-	ip netns add ${NS2}
-
-	ip link add veth1 index 111 type veth peer name veth11 netns ${NS1}
-	ip link add veth2 index 222 type veth peer name veth22 netns ${NS2}
-
-	ip link set veth1 up
-	ip link set veth2 up
-	ip -n ${NS1} link set dev veth11 up
-	ip -n ${NS2} link set dev veth22 up
-
-	ip -n ${NS1} addr add 10.1.1.11/24 dev veth11
-	ip -n ${NS2} addr add 10.1.1.22/24 dev veth22
-}
-
-cleanup()
-{
-	ip link del veth1 2> /dev/null
-	ip link del veth2 2> /dev/null
-	ip netns del ${NS1} 2> /dev/null
-	ip netns del ${NS2} 2> /dev/null
-}
-
-test_xdp_redirect()
-{
-	local xdpmode=$1
-
-	setup
-
-	ip link set dev veth1 $xdpmode off &> /dev/null
-	if [ $? -ne 0 ];then
-		echo "selftests: test_xdp_redirect $xdpmode [SKIP]"
-		return 0
-	fi
-
-	ip -n ${NS1} link set veth11 $xdpmode obj xdp_dummy.bpf.o sec xdp &> /dev/null
-	ip -n ${NS2} link set veth22 $xdpmode obj xdp_dummy.bpf.o sec xdp &> /dev/null
-	ip link set dev veth1 $xdpmode obj test_xdp_redirect.bpf.o sec redirect_to_222 &> /dev/null
-	ip link set dev veth2 $xdpmode obj test_xdp_redirect.bpf.o sec redirect_to_111 &> /dev/null
-
-	if ip netns exec ${NS1} ping -c 1 10.1.1.22 &> /dev/null &&
-	   ip netns exec ${NS2} ping -c 1 10.1.1.11 &> /dev/null; then
-		echo "selftests: test_xdp_redirect $xdpmode [PASS]";
-	else
-		ret=1
-		echo "selftests: test_xdp_redirect $xdpmode [FAILED]";
-	fi
-
-	cleanup
-}
-
-set -e
-trap cleanup 2 3 6 9
-
-test_xdp_redirect xdpgeneric
-test_xdp_redirect xdpdrv
-
-exit $ret
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 7afc2619ab14..18596ae0b0c1 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -2252,7 +2252,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 7 },
-	.errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8",
+	.errstr_unpriv = "invalid read from stack R2 off -8+0 size 8",
 	.result_unpriv = REJECT,
 	/* in privileged mode reads from uninitialized stack locations are permitted */
 	.result = ACCEPT,
diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c
index f420c0312aa0..4b39f8472f9b 100644
--- a/tools/testing/selftests/bpf/verifier/map_kptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_kptr.c
@@ -373,7 +373,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.fixup_map_kptr = { 1 },
 	.result = REJECT,
-	.errstr = "Unreleased reference id=5 alloc_insn=20",
+	.errstr = "Unreleased reference id=4 alloc_insn=20",
 	.fixup_kfunc_btf_id = {
 		{ "bpf_kfunc_call_test_acquire", 15 },
 	}
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index e12ef953fba8..06af5029885b 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -21,11 +21,20 @@
 #include <gelf.h>
 #include <float.h>
 #include <math.h>
+#include <limits.h>
 
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 #endif
 
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
 enum stat_id {
 	VERDICT,
 	DURATION,
@@ -34,6 +43,11 @@ enum stat_id {
 	PEAK_STATES,
 	MAX_STATES_PER_INSN,
 	MARK_READ_MAX_LEN,
+	SIZE,
+	JITED_SIZE,
+	STACK,
+	PROG_TYPE,
+	ATTACH_TYPE,
 
 	FILE_NAME,
 	PROG_NAME,
@@ -203,7 +217,8 @@ const char argp_program_doc[] =
 "\n"
 "USAGE: veristat <obj-file> [<obj-file>...]\n"
 "   OR: veristat -C <baseline.csv> <comparison.csv>\n"
-"   OR: veristat -R <results.csv>\n";
+"   OR: veristat -R <results.csv>\n"
+"   OR: veristat -vl2 <to_analyze.bpf.o>\n";
 
 enum {
 	OPT_LOG_FIXED = 1000,
@@ -215,7 +230,7 @@ static const struct argp_option opts[] = {
 	{ "version", 'V', NULL, 0, "Print version" },
 	{ "verbose", 'v', NULL, 0, "Verbose mode" },
 	{ "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
-	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
+	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode, 2 for full verification log)" },
 	{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
 	{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
 	{ "top-n", 'n', "N", 0, "Emit only up to first N results." },
@@ -640,19 +655,21 @@ cleanup:
 }
 
 static const struct stat_specs default_output_spec = {
-	.spec_cnt = 7,
+	.spec_cnt = 8,
 	.ids = {
 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
-		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
+		TOTAL_INSNS, TOTAL_STATES, SIZE, JITED_SIZE
 	},
 };
 
 static const struct stat_specs default_csv_output_spec = {
-	.spec_cnt = 9,
+	.spec_cnt = 14,
 	.ids = {
 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
 		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
 		MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
+		SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE,
+		STACK,
 	},
 };
 
@@ -688,6 +705,11 @@ static struct stat_def {
 	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
 	[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
+	[SIZE] = { "Program size", {"prog_size"}, },
+	[JITED_SIZE] = { "Jited size", {"prog_size_jited"}, },
+	[STACK] = {"Stack depth", {"stack_depth", "stack"}, },
+	[PROG_TYPE] = { "Program type", {"prog_type"}, },
+	[ATTACH_TYPE] = { "Attach type", {"attach_type", }, },
 };
 
 static bool parse_stat_id_var(const char *name, size_t len, int *id,
@@ -835,7 +857,8 @@ static char verif_log_buf[64 * 1024];
 static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
 {
 	const char *cur;
-	int pos, lines;
+	int pos, lines, sub_stack, cnt = 0;
+	char *state = NULL, *token, stack[512];
 
 	buf[buf_sz - 1] = '\0';
 
@@ -853,15 +876,22 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *
 
 		if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
 			continue;
-		if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
+		if (5 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
 				&s->stats[TOTAL_INSNS],
 				&s->stats[MAX_STATES_PER_INSN],
 				&s->stats[TOTAL_STATES],
 				&s->stats[PEAK_STATES],
 				&s->stats[MARK_READ_MAX_LEN]))
 			continue;
-	}
 
+		if (1 == sscanf(cur, "stack depth %511s", stack))
+			continue;
+	}
+	while ((token = strtok_r(cnt++ ? NULL : stack, "+", &state))) {
+		if (sscanf(token, "%d", &sub_stack) == 0)
+			break;
+		s->stats[STACK] += sub_stack;
+	}
 	return 0;
 }
 
@@ -884,7 +914,7 @@ static int line_cnt_cmp(const void *a, const void *b)
 	const struct line_cnt *b_cnt = (const struct line_cnt *)b;
 
 	if (a_cnt->cnt != b_cnt->cnt)
-		return a_cnt->cnt < b_cnt->cnt ? -1 : 1;
+		return a_cnt->cnt > b_cnt->cnt ? -1 : 1;
 	return strcmp(a_cnt->line, b_cnt->line);
 }
 
@@ -1032,6 +1062,41 @@ static int guess_prog_type_by_ctx_name(const char *ctx_name,
 	return -ESRCH;
 }
 
+/* Make sure only target program is referenced from struct_ops map,
+ * otherwise libbpf would automatically set autocreate for all
+ * referenced programs.
+ * See libbpf.c:bpf_object_adjust_struct_ops_autoload.
+ */
+static void mask_unrelated_struct_ops_progs(struct bpf_object *obj,
+					    struct bpf_map *map,
+					    struct bpf_program *prog)
+{
+	struct btf *btf = bpf_object__btf(obj);
+	const struct btf_type *t, *mt;
+	struct btf_member *m;
+	int i, moff;
+	size_t data_sz, ptr_sz = sizeof(void *);
+	void *data;
+
+	t = btf__type_by_id(btf, bpf_map__btf_value_type_id(map));
+	if (!btf_is_struct(t))
+		return;
+
+	data = bpf_map__initial_value(map, &data_sz);
+	for (i = 0; i < btf_vlen(t); i++) {
+		m = &btf_members(t)[i];
+		mt = btf__type_by_id(btf, m->type);
+		if (!btf_is_ptr(mt))
+			continue;
+		moff = m->offset / 8;
+		if (moff + ptr_sz > data_sz)
+			continue;
+		if (memcmp(data + moff, &prog, ptr_sz) == 0)
+			continue;
+		memset(data + moff, 0, ptr_sz);
+	}
+}
+
 static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
 {
 	struct bpf_map *map;
@@ -1047,6 +1112,9 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch
 		case BPF_MAP_TYPE_INODE_STORAGE:
 		case BPF_MAP_TYPE_CGROUP_STORAGE:
 			break;
+		case BPF_MAP_TYPE_STRUCT_OPS:
+			mask_unrelated_struct_ops_progs(obj, map, prog);
+			break;
 		default:
 			if (bpf_map__max_entries(map) == 0)
 				bpf_map__set_max_entries(map, 1);
@@ -1146,8 +1214,11 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 	char *buf;
 	int buf_sz, log_level;
 	struct verif_stats *stats;
+	struct bpf_prog_info info;
+	__u32 info_len = sizeof(info);
 	int err = 0;
 	void *tmp;
+	int fd;
 
 	if (!should_process_file_prog(base_filename, bpf_program__name(prog))) {
 		env.progs_skipped++;
@@ -1196,6 +1267,15 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 	stats->file_name = strdup(base_filename);
 	stats->prog_name = strdup(bpf_program__name(prog));
 	stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
+	stats->stats[SIZE] = bpf_program__insn_cnt(prog);
+	stats->stats[PROG_TYPE] = bpf_program__type(prog);
+	stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog);
+
+	memset(&info, 0, info_len);
+	fd = bpf_program__fd(prog);
+	if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0)
+		stats->stats[JITED_SIZE] = info.jited_prog_len;
+
 	parse_verif_log(buf, buf_sz, stats);
 
 	if (env.verbose) {
@@ -1309,6 +1389,11 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
 	case PROG_NAME:
 		cmp = strcmp(s1->prog_name, s2->prog_name);
 		break;
+	case ATTACH_TYPE:
+	case PROG_TYPE:
+	case SIZE:
+	case JITED_SIZE:
+	case STACK:
 	case VERDICT:
 	case DURATION:
 	case TOTAL_INSNS:
@@ -1523,12 +1608,27 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id,
 		else
 			*str = s->stats[VERDICT] ? "success" : "failure";
 		break;
+	case ATTACH_TYPE:
+		if (!s)
+			*str = "N/A";
+		else
+			*str = libbpf_bpf_attach_type_str(s->stats[ATTACH_TYPE]) ?: "N/A";
+		break;
+	case PROG_TYPE:
+		if (!s)
+			*str = "N/A";
+		else
+			*str = libbpf_bpf_prog_type_str(s->stats[PROG_TYPE]) ?: "N/A";
+		break;
 	case DURATION:
 	case TOTAL_INSNS:
 	case TOTAL_STATES:
 	case PEAK_STATES:
 	case MAX_STATES_PER_INSN:
 	case MARK_READ_MAX_LEN:
+	case STACK:
+	case SIZE:
+	case JITED_SIZE:
 		*val = s ? s->stats[id] : 0;
 		break;
 	default:
@@ -1612,7 +1712,10 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats
 	case TOTAL_STATES:
 	case PEAK_STATES:
 	case MAX_STATES_PER_INSN:
-	case MARK_READ_MAX_LEN: {
+	case MARK_READ_MAX_LEN:
+	case SIZE:
+	case JITED_SIZE:
+	case STACK: {
 		long val;
 		int err, n;
 
@@ -1625,6 +1728,42 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats
 		st->stats[id] = val;
 		break;
 	}
+	case PROG_TYPE: {
+		enum bpf_prog_type prog_type = 0;
+		const char *type;
+
+		while ((type = libbpf_bpf_prog_type_str(prog_type)))  {
+			if (strcmp(type, str) == 0) {
+				st->stats[id] = prog_type;
+				break;
+			}
+			prog_type++;
+		}
+
+		if (!type) {
+			fprintf(stderr, "Unrecognized prog type %s\n", str);
+			return -EINVAL;
+		}
+		break;
+	}
+	case ATTACH_TYPE: {
+		enum bpf_attach_type attach_type = 0;
+		const char *type;
+
+		while ((type = libbpf_bpf_attach_type_str(attach_type)))  {
+			if (strcmp(type, str) == 0) {
+				st->stats[id] = attach_type;
+				break;
+			}
+			attach_type++;
+		}
+
+		if (!type) {
+			fprintf(stderr, "Unrecognized attach type %s\n", str);
+			return -EINVAL;
+		}
+		break;
+	}
 	default:
 		fprintf(stderr, "Unrecognized stat #%d\n", id);
 		return -EINVAL;
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 6f9956eed797..6f7b15d6c6ed 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -27,7 +27,7 @@
 #include <linux/errqueue.h>
 #include <linux/if_link.h>
 #include <linux/net_tstamp.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 #include <linux/sockios.h>
 #include <linux/if_xdp.h>
 #include <sys/mman.h>
@@ -79,7 +79,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
-		.flags = XSK_UMEM__DEFAULT_FLAGS,
+		.flags = XDP_UMEM_TX_METADATA_LEN,
 		.tx_metadata_len = sizeof(struct xsk_tx_metadata),
 	};
 	__u32 idx = 0;
@@ -551,6 +551,7 @@ static void hwtstamp_enable(const char *ifname)
 {
 	struct hwtstamp_config cfg = {
 		.rx_filter = HWTSTAMP_FILTER_ALL,
+		.tx_type = HWTSTAMP_TX_ON,
 	};
 
 	hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
index 3f45512fb512..7406c24be1ac 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 #
 # Test the special cpuset v1 hotplug case where a cpuset become empty of
diff --git a/tools/testing/selftests/cpufreq/.gitignore b/tools/testing/selftests/cpufreq/.gitignore
new file mode 100644
index 000000000000..67604e91e068
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+cpufreq_selftest.*
diff --git a/tools/testing/selftests/cpufreq/Makefile b/tools/testing/selftests/cpufreq/Makefile
index c86ca8342222..9b2ccb10b0cf 100644
--- a/tools/testing/selftests/cpufreq/Makefile
+++ b/tools/testing/selftests/cpufreq/Makefile
@@ -3,6 +3,7 @@ all:
 
 TEST_PROGS := main.sh
 TEST_FILES := cpu.sh cpufreq.sh governor.sh module.sh special-tests.sh
+EXTRA_CLEAN := cpufreq_selftest.dmesg_cpufreq.txt cpufreq_selftest.dmesg_full.txt cpufreq_selftest.txt
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/damon/.gitignore b/tools/testing/selftests/damon/.gitignore
index 2ab675fecb6b..2f0297657c81 100644
--- a/tools/testing/selftests/damon/.gitignore
+++ b/tools/testing/selftests/damon/.gitignore
@@ -1,6 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
-huge_count_read_write
-debugfs_target_ids_read_before_terminate_race
-debugfs_target_ids_pid_leak
 access_memory
 access_memory_even
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index 812f656260fb..ecbf07afc6dd 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -1,15 +1,11 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for damon selftests
 
-TEST_GEN_FILES += huge_count_read_write
-TEST_GEN_FILES += debugfs_target_ids_read_before_terminate_race
-TEST_GEN_FILES += debugfs_target_ids_pid_leak
 TEST_GEN_FILES += access_memory access_memory_even
 
-TEST_FILES = _chk_dependency.sh _debugfs_common.sh _damon_sysfs.py
+TEST_FILES = _chk_dependency.sh _damon_sysfs.py
 
 # functionality tests
-TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
 TEST_PROGS += sysfs.sh
 TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py
 TEST_PROGS += damos_quota.py damos_quota_goal.py damos_apply_interval.py
@@ -17,11 +13,6 @@ TEST_PROGS += damos_tried_regions.py damon_nr_regions.py
 TEST_PROGS += reclaim.sh lru_sort.sh
 
 # regression tests (reproducers of previously found bugs)
-TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
-TEST_PROGS += debugfs_duplicate_context_creation.sh
-TEST_PROGS += debugfs_rm_non_contexts.sh
-TEST_PROGS += debugfs_target_ids_read_before_terminate_race.sh
-TEST_PROGS += debugfs_target_ids_pid_leak.sh
 TEST_PROGS += sysfs_update_removed_scheme_dir.sh
 TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py
 
diff --git a/tools/testing/selftests/damon/config b/tools/testing/selftests/damon/config
index 0daf38974eb0..a68a9fead5dc 100644
--- a/tools/testing/selftests/damon/config
+++ b/tools/testing/selftests/damon/config
@@ -1,6 +1,5 @@
 CONFIG_DAMON=y
 CONFIG_DAMON_SYSFS=y
-CONFIG_DAMON_DBGFS=y
 CONFIG_DAMON_PADDR=y
 CONFIG_DAMON_VADDR=y
 CONFIG_DAMON_RECLAIM=y
diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py
index 2e8a74aff543..58f3291fed12 100755
--- a/tools/testing/selftests/damon/damon_nr_regions.py
+++ b/tools/testing/selftests/damon/damon_nr_regions.py
@@ -65,6 +65,7 @@ def test_nr_regions(real_nr_regions, min_nr_regions, max_nr_regions):
 
     test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % (
             real_nr_regions, min_nr_regions, max_nr_regions)
+    collected_nr_regions.sort()
     if (collected_nr_regions[0] < min_nr_regions or
         collected_nr_regions[-1] > max_nr_regions):
         print('fail %s' % test_name)
@@ -109,6 +110,7 @@ def main():
     attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs
     attrs.min_nr_regions = 3
     attrs.max_nr_regions = 7
+    attrs.update_us = 100000
     err = kdamonds.kdamonds[0].commit()
     if err is not None:
         proc.terminate()
diff --git a/tools/testing/selftests/damon/damos_quota.py b/tools/testing/selftests/damon/damos_quota.py
index 7d4c6bb2e3cd..57c4937aaed2 100755
--- a/tools/testing/selftests/damon/damos_quota.py
+++ b/tools/testing/selftests/damon/damos_quota.py
@@ -51,16 +51,19 @@ def main():
         nr_quota_exceeds = scheme.stats.qt_exceeds
 
     wss_collected.sort()
+    nr_expected_quota_exceeds = 0
     for wss in wss_collected:
         if wss > sz_quota:
             print('quota is not kept: %s > %s' % (wss, sz_quota))
             print('collected samples are as below')
             print('\n'.join(['%d' % wss for wss in wss_collected]))
             exit(1)
+        if wss == sz_quota:
+            nr_expected_quota_exceeds += 1
 
-    if nr_quota_exceeds < len(wss_collected):
-        print('quota is not always exceeded: %d > %d' %
-              (len(wss_collected), nr_quota_exceeds))
+    if nr_quota_exceeds < nr_expected_quota_exceeds:
+        print('quota is exceeded less than expected: %d < %d' %
+              (nr_quota_exceeds, nr_expected_quota_exceeds))
         exit(1)
 
 if __name__ == '__main__':
diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py
index 18246f3b62f7..f76e0412b564 100755
--- a/tools/testing/selftests/damon/damos_quota_goal.py
+++ b/tools/testing/selftests/damon/damos_quota_goal.py
@@ -63,6 +63,9 @@ def main():
             if last_effective_bytes != 0 else -1.0))
 
         if last_effective_bytes == goal.effective_bytes:
+            # effective quota was already minimum that cannot be more reduced
+            if expect_increase is False and last_effective_bytes == 1:
+                continue
             print('efective bytes not changed: %d' % goal.effective_bytes)
             exit(1)
 
diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh
deleted file mode 100755
index 902e312bca89..000000000000
--- a/tools/testing/selftests/damon/debugfs_attrs.sh
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test attrs file
-# ===============
-
-file="$DBGFS/attrs"
-orig_content=$(cat "$file")
-
-test_write_succ "$file" "1 2 3 4 5" "$orig_content" "valid input"
-test_write_fail "$file" "1 2 3 4" "$orig_content" "no enough fields"
-test_write_fail "$file" "1 2 3 5 4" "$orig_content" \
-	"min_nr_regions > max_nr_regions"
-test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written"
-echo "$orig_content" > "$file"
diff --git a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
deleted file mode 100755
index bd6c22d96ead..000000000000
--- a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test duplicated context creation
-# ================================
-
-if ! echo foo > "$DBGFS/mk_contexts"
-then
-	echo "context creation failed"
-	exit 1
-fi
-
-if echo foo > "$DBGFS/mk_contexts" 2> /dev/null
-then
-	echo "duplicate context creation success"
-	exit 1
-fi
-
-if ! echo foo > "$DBGFS/rm_contexts"
-then
-	echo "context deletion failed"
-	exit 1
-fi
-
-exit 0
diff --git a/tools/testing/selftests/damon/debugfs_empty_targets.sh b/tools/testing/selftests/damon/debugfs_empty_targets.sh
deleted file mode 100755
index effbea33dc16..000000000000
--- a/tools/testing/selftests/damon/debugfs_empty_targets.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test empty targets case
-# =======================
-
-orig_target_ids=$(cat "$DBGFS/target_ids")
-echo "" > "$DBGFS/target_ids"
-
-if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
-then
-	monitor_on_file="$DBGFS/monitor_on_DEPRECATED"
-else
-	monitor_on_file="$DBGFS/monitor_on"
-fi
-
-orig_monitor_on=$(cat "$monitor_on_file")
-test_write_fail "$monitor_on_file" "on" "orig_monitor_on" "empty target ids"
-echo "$orig_target_ids" > "$DBGFS/target_ids"
diff --git a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
deleted file mode 100755
index 922cadac2950..000000000000
--- a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test huge count read write
-# ==========================
-
-dmesg -C
-
-for file in "$DBGFS/"*
-do
-	./huge_count_read_write "$file"
-done
-
-if dmesg | grep -q WARNING
-then
-	dmesg
-	exit 1
-else
-	exit 0
-fi
diff --git a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
deleted file mode 100755
index f3ffeb1343cf..000000000000
--- a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test putting non-ctx files/dirs to rm_contexts file
-# ===================================================
-
-dmesg -C
-
-for file in "$DBGFS/"*
-do
-	(echo "$(basename "$f")" > "$DBGFS/rm_contexts") &> /dev/null
-	if dmesg | grep -q BUG
-	then
-		dmesg
-		exit 1
-	fi
-done
diff --git a/tools/testing/selftests/damon/debugfs_schemes.sh b/tools/testing/selftests/damon/debugfs_schemes.sh
deleted file mode 100755
index 5b39ab44731c..000000000000
--- a/tools/testing/selftests/damon/debugfs_schemes.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test schemes file
-# =================
-
-file="$DBGFS/schemes"
-orig_content=$(cat "$file")
-
-test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \
-	"$orig_content" "valid input"
-test_write_fail "$file" "1 2
-3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines"
-test_write_succ "$file" "" "$orig_content" "disabling"
-test_write_fail "$file" "2 1 2 1 10 1 3 10 1 1 1 1 1 1 1 1 2 3" \
-	"$orig_content" "wrong condition ranges"
-echo "$orig_content" > "$file"
diff --git a/tools/testing/selftests/damon/debugfs_target_ids.sh b/tools/testing/selftests/damon/debugfs_target_ids.sh
deleted file mode 100755
index 49aeabdb0aae..000000000000
--- a/tools/testing/selftests/damon/debugfs_target_ids.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source _debugfs_common.sh
-
-# Test target_ids file
-# ====================
-
-file="$DBGFS/target_ids"
-orig_content=$(cat "$file")
-
-test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input"
-test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input"
-test_content "$file" "$orig_content" "1 2" "non-integer was there"
-test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input"
-test_content "$file" "$orig_content" "" "wrong input written"
-test_write_succ "$file" "" "$orig_content" "empty input"
-test_content "$file" "$orig_content" "" "empty input written"
-echo "$orig_content" > "$file"
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c
deleted file mode 100644
index 0cc2eef7d142..000000000000
--- a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c
+++ /dev/null
@@ -1,68 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Author: SeongJae Park <sj@kernel.org>
- */
-
-#define _GNU_SOURCE
-
-#include <fcntl.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/time.h>
-#include <unistd.h>
-
-#define DBGFS_TARGET_IDS "/sys/kernel/debug/damon/target_ids"
-
-static void write_targetid_exit(void)
-{
-	int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR);
-	char pid_str[128];
-
-	snprintf(pid_str, sizeof(pid_str), "%d", getpid());
-	write(target_ids_fd, pid_str, sizeof(pid_str));
-	close(target_ids_fd);
-	exit(0);
-}
-
-unsigned long msec_timestamp(void)
-{
-	struct timeval tv;
-
-	gettimeofday(&tv, NULL);
-	return tv.tv_sec * 1000UL + tv.tv_usec / 1000;
-}
-
-int main(int argc, char *argv[])
-{
-	unsigned long start_ms;
-	int time_to_run, nr_forks = 0;
-
-	if (argc != 2) {
-		fprintf(stderr, "Usage: %s <msecs to run>\n", argv[0]);
-		exit(1);
-	}
-	time_to_run = atoi(argv[1]);
-
-	start_ms = msec_timestamp();
-	while (true) {
-		int pid = fork();
-
-		if (pid < 0) {
-			fprintf(stderr, "fork() failed\n");
-			exit(1);
-		}
-		if (pid == 0)
-			write_targetid_exit();
-		wait(NULL);
-		nr_forks++;
-
-		if (msec_timestamp() - start_ms > time_to_run)
-			break;
-	}
-	printf("%d\n", nr_forks);
-	return 0;
-}
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh
deleted file mode 100755
index 31fe33c2b032..000000000000
--- a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-before=$(grep "^pid " /proc/slabinfo | awk '{print $2}')
-
-nr_leaks=$(./debugfs_target_ids_pid_leak 1000)
-expected_after_max=$((before + nr_leaks / 2))
-
-after=$(grep "^pid " /proc/slabinfo | awk '{print $2}')
-
-echo > /sys/kernel/debug/damon/target_ids
-
-echo "tried $nr_leaks pid leak"
-echo "number of active pid slabs: $before -> $after"
-echo "(up to $expected_after_max expected)"
-if [ $after -gt $expected_after_max ]
-then
-	echo "maybe pids are leaking"
-	exit 1
-else
-	exit 0
-fi
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c
deleted file mode 100644
index b06f52a8ce2d..000000000000
--- a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Author: SeongJae Park <sj@kernel.org>
- */
-#define _GNU_SOURCE
-
-#include <fcntl.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <time.h>
-#include <unistd.h>
-
-#define DBGFS_MONITOR_ON "/sys/kernel/debug/damon/monitor_on_DEPRECATED"
-#define DBGFS_TARGET_IDS "/sys/kernel/debug/damon/target_ids"
-
-static void turn_damon_on_exit(void)
-{
-	int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR);
-	int monitor_on_fd = open(DBGFS_MONITOR_ON, O_RDWR);
-	char pid_str[128];
-
-	snprintf(pid_str, sizeof(pid_str), "%d", getpid());
-	write(target_ids_fd, pid_str, sizeof(pid_str));
-	write(monitor_on_fd, "on\n", 3);
-	close(target_ids_fd);
-	close(monitor_on_fd);
-	usleep(1000);
-	exit(0);
-}
-
-static void try_race(void)
-{
-	int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR);
-	int pid = fork();
-	int buf[256];
-
-	if (pid < 0) {
-		fprintf(stderr, "fork() failed\n");
-		exit(1);
-	}
-	if (pid == 0)
-		turn_damon_on_exit();
-	while (true) {
-		int status;
-
-		read(target_ids_fd, buf, sizeof(buf));
-		if (waitpid(-1, &status, WNOHANG) == pid)
-			break;
-	}
-	close(target_ids_fd);
-}
-
-static inline uint64_t ts_to_ms(struct timespec *ts)
-{
-	return (uint64_t)ts->tv_sec * 1000 + (uint64_t)ts->tv_nsec / 1000000;
-}
-
-int main(int argc, char *argv[])
-{
-	struct timespec start_time, now;
-	int runtime_ms;
-
-	if (argc != 2) {
-		fprintf(stderr, "Usage: %s <runtime in ms>\n", argv[0]);
-		exit(1);
-	}
-	runtime_ms = atoi(argv[1]);
-	clock_gettime(CLOCK_MONOTONIC, &start_time);
-	while (true) {
-		try_race();
-		clock_gettime(CLOCK_MONOTONIC, &now);
-		if (ts_to_ms(&now) - ts_to_ms(&start_time) > runtime_ms)
-			break;
-	}
-	return 0;
-}
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh
deleted file mode 100755
index fc793c4c9aea..000000000000
--- a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-dmesg -C
-
-./debugfs_target_ids_read_before_terminate_race 5000
-
-if dmesg | grep -q dbgfs_target_ids_read
-then
-	dmesg
-	exit 1
-else
-	exit 0
-fi
diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c
deleted file mode 100644
index 53e69a669668..000000000000
--- a/tools/testing/selftests/damon/huge_count_read_write.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Author: SeongJae Park <sj@kernel.org>
- */
-
-#include <fcntl.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdio.h>
-
-#pragma GCC diagnostic push
-#if __GNUC__ >= 11 && __GNUC_MINOR__ >= 1
-/* Ignore read(2) overflow and write(2) overread compile warnings */
-#pragma GCC diagnostic ignored "-Wstringop-overread"
-#pragma GCC diagnostic ignored "-Wstringop-overflow"
-#endif
-
-void write_read_with_huge_count(char *file)
-{
-	int filedesc = open(file, O_RDWR);
-	char buf[256];
-	int ret;
-
-	printf("%s %s\n", __func__, file);
-	if (filedesc < 0) {
-		fprintf(stderr, "failed opening %s\n", file);
-		exit(1);
-	}
-
-	write(filedesc, "", 0xfffffffful);
-	ret = read(filedesc, buf, 0xfffffffful);
-	close(filedesc);
-}
-
-#pragma GCC diagnostic pop
-
-int main(int argc, char *argv[])
-{
-	if (argc != 2) {
-		fprintf(stderr, "Usage: %s <file>\n", argv[0]);
-		exit(1);
-	}
-	write_read_with_huge_count(argv[1]);
-
-	return 0;
-}
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 0fec8f9801ad..137470bdee0c 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -1,15 +1,18 @@
 # SPDX-License-Identifier: GPL-2.0
 
 TEST_INCLUDES := $(wildcard lib/py/*.py) \
+		 $(wildcard lib/sh/*.sh) \
 		 ../../net/net_helper.sh \
 		 ../../net/lib.sh \
 
 TEST_PROGS := \
 	netcons_basic.sh \
+	netcons_overflow.sh \
 	ping.py \
 	queues.py \
 	stats.py \
 	shaper.py \
+	hds.py \
 # end of TEST_PROGS
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 03a089165d3f..2b10854e4b1e 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -10,7 +10,7 @@ TEST_PROGS := \
 	mode-2-recovery-updelay.sh \
 	bond_options.sh \
 	bond-eth-type-change.sh \
-	bond_macvlan.sh
+	bond_macvlan_ipvlan.sh
 
 TEST_FILES := \
 	lag_lib.sh \
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
deleted file mode 100755
index b609fb6231f4..000000000000
--- a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test macvlan over balance-alb
-
-lib_dir=$(dirname "$0")
-source ${lib_dir}/bond_topo_2d1c.sh
-
-m1_ns="m1-$(mktemp -u XXXXXX)"
-m2_ns="m1-$(mktemp -u XXXXXX)"
-m1_ip4="192.0.2.11"
-m1_ip6="2001:db8::11"
-m2_ip4="192.0.2.12"
-m2_ip6="2001:db8::12"
-
-cleanup()
-{
-	ip -n ${m1_ns} link del macv0
-	ip netns del ${m1_ns}
-	ip -n ${m2_ns} link del macv0
-	ip netns del ${m2_ns}
-
-	client_destroy
-	server_destroy
-	gateway_destroy
-}
-
-check_connection()
-{
-	local ns=${1}
-	local target=${2}
-	local message=${3:-"macvlan_over_bond"}
-	RET=0
-
-
-	ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
-	check_err $? "ping failed"
-	log_test "$mode: $message"
-}
-
-macvlan_over_bond()
-{
-	local param="$1"
-	RET=0
-
-	# setup new bond mode
-	bond_reset "${param}"
-
-	ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
-	ip -n ${s_ns} link set macv0 netns ${m1_ns}
-	ip -n ${m1_ns} link set dev macv0 up
-	ip -n ${m1_ns} addr add ${m1_ip4}/24 dev macv0
-	ip -n ${m1_ns} addr add ${m1_ip6}/24 dev macv0
-
-	ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
-	ip -n ${s_ns} link set macv0 netns ${m2_ns}
-	ip -n ${m2_ns} link set dev macv0 up
-	ip -n ${m2_ns} addr add ${m2_ip4}/24 dev macv0
-	ip -n ${m2_ns} addr add ${m2_ip6}/24 dev macv0
-
-	sleep 2
-
-	check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server"
-	check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server"
-	check_connection "${c_ns}" "${m1_ip4}" "IPv4: client->macvlan_1"
-	check_connection "${c_ns}" "${m1_ip6}" "IPv6: client->macvlan_1"
-	check_connection "${c_ns}" "${m2_ip4}" "IPv4: client->macvlan_2"
-	check_connection "${c_ns}" "${m2_ip6}" "IPv6: client->macvlan_2"
-	check_connection "${m1_ns}" "${m2_ip4}" "IPv4: macvlan_1->macvlan_2"
-	check_connection "${m1_ns}" "${m2_ip6}" "IPv6: macvlan_1->macvlan_2"
-
-
-	sleep 5
-
-	check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client"
-	check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client"
-	check_connection "${m1_ns}" "${c_ip4}" "IPv4: macvlan_1->client"
-	check_connection "${m1_ns}" "${c_ip6}" "IPv6: macvlan_1->client"
-	check_connection "${m2_ns}" "${c_ip4}" "IPv4: macvlan_2->client"
-	check_connection "${m2_ns}" "${c_ip6}" "IPv6: macvlan_2->client"
-	check_connection "${m2_ns}" "${m1_ip4}" "IPv4: macvlan_2->macvlan_2"
-	check_connection "${m2_ns}" "${m1_ip6}" "IPv6: macvlan_2->macvlan_2"
-
-	ip -n ${c_ns} neigh flush dev eth0
-}
-
-trap cleanup EXIT
-
-setup_prepare
-ip netns add ${m1_ns}
-ip netns add ${m2_ns}
-
-modes="active-backup balance-tlb balance-alb"
-
-for mode in $modes; do
-	macvlan_over_bond "mode $mode"
-done
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
new file mode 100755
index 000000000000..c4711272fe45
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test macvlan/ipvlan over bond
+
+lib_dir=$(dirname "$0")
+source ${lib_dir}/bond_topo_2d1c.sh
+
+xvlan1_ns="xvlan1-$(mktemp -u XXXXXX)"
+xvlan2_ns="xvlan2-$(mktemp -u XXXXXX)"
+xvlan1_ip4="192.0.2.11"
+xvlan1_ip6="2001:db8::11"
+xvlan2_ip4="192.0.2.12"
+xvlan2_ip6="2001:db8::12"
+
+cleanup()
+{
+	client_destroy
+	server_destroy
+	gateway_destroy
+
+	ip netns del ${xvlan1_ns}
+	ip netns del ${xvlan2_ns}
+}
+
+check_connection()
+{
+	local ns=${1}
+	local target=${2}
+	local message=${3}
+	RET=0
+
+	ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
+	check_err $? "ping failed"
+	log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}"
+}
+
+xvlan_over_bond()
+{
+	local param="$1"
+	local xvlan_type="$2"
+	local xvlan_mode="$3"
+	RET=0
+
+	# setup new bond mode
+	bond_reset "${param}"
+
+	ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode}
+	ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan1_ns}
+	ip -n ${xvlan1_ns} link set dev ${xvlan_type}0 up
+	ip -n ${xvlan1_ns} addr add ${xvlan1_ip4}/24 dev ${xvlan_type}0
+	ip -n ${xvlan1_ns} addr add ${xvlan1_ip6}/24 dev ${xvlan_type}0
+
+	ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode}
+	ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan2_ns}
+	ip -n ${xvlan2_ns} link set dev ${xvlan_type}0 up
+	ip -n ${xvlan2_ns} addr add ${xvlan2_ip4}/24 dev ${xvlan_type}0
+	ip -n ${xvlan2_ns} addr add ${xvlan2_ip6}/24 dev ${xvlan_type}0
+
+	sleep 2
+
+	check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server"
+	check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server"
+	check_connection "${c_ns}" "${xvlan1_ip4}" "IPv4: client->${xvlan_type}_1"
+	check_connection "${c_ns}" "${xvlan1_ip6}" "IPv6: client->${xvlan_type}_1"
+	check_connection "${c_ns}" "${xvlan2_ip4}" "IPv4: client->${xvlan_type}_2"
+	check_connection "${c_ns}" "${xvlan2_ip6}" "IPv6: client->${xvlan_type}_2"
+	check_connection "${xvlan1_ns}" "${xvlan2_ip4}" "IPv4: ${xvlan_type}_1->${xvlan_type}_2"
+	check_connection "${xvlan1_ns}" "${xvlan2_ip6}" "IPv6: ${xvlan_type}_1->${xvlan_type}_2"
+
+	check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client"
+	check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client"
+	check_connection "${xvlan1_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_1->client"
+	check_connection "${xvlan1_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_1->client"
+	check_connection "${xvlan2_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_2->client"
+	check_connection "${xvlan2_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_2->client"
+	check_connection "${xvlan2_ns}" "${xvlan1_ip4}" "IPv4: ${xvlan_type}_2->${xvlan_type}_1"
+	check_connection "${xvlan2_ns}" "${xvlan1_ip6}" "IPv6: ${xvlan_type}_2->${xvlan_type}_1"
+
+	ip -n ${c_ns} neigh flush dev eth0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+ip netns add ${xvlan1_ns}
+ip netns add ${xvlan2_ns}
+
+bond_modes="active-backup balance-tlb balance-alb"
+
+for bond_mode in ${bond_modes}; do
+	xvlan_over_bond "mode ${bond_mode}" macvlan bridge
+	xvlan_over_bond "mode ${bond_mode}" ipvlan  l2
+done
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index edc56e2cc606..7bc148889ca7 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -11,8 +11,8 @@ ALL_TESTS="
 
 lib_dir=$(dirname "$0")
 source ${lib_dir}/bond_topo_3d1c.sh
-c_maddr="33:33:00:00:00:10"
-g_maddr="33:33:00:00:02:54"
+c_maddr="33:33:ff:00:00:10"
+g_maddr="33:33:ff:00:02:54"
 
 skip_prio()
 {
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
index 899d7fb6ea8e..dad4e5fda4db 100644
--- a/tools/testing/selftests/drivers/net/bonding/config
+++ b/tools/testing/selftests/drivers/net/bonding/config
@@ -3,6 +3,7 @@ CONFIG_BRIDGE=y
 CONFIG_DUMMY=y
 CONFIG_IPV6=y
 CONFIG_MACVLAN=y
+CONFIG_IPVLAN=y
 CONFIG_NET_ACT_GACT=y
 CONFIG_NET_CLS_FLOWER=y
 CONFIG_NET_SCH_INGRESS=y
diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py
new file mode 100755
index 000000000000..873f5219e41d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hds.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import errno
+import os
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx
+from lib.py import CmdExitFailure, EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+from lib.py import defer, ethtool, ip
+
+
+def _get_hds_mode(cfg, netnl) -> str:
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'tcp-data-split' not in rings:
+        raise KsftSkipEx('tcp-data-split not supported by device')
+    return rings['tcp-data-split']
+
+
+def _xdp_onoff(cfg):
+    test_dir = os.path.dirname(os.path.realpath(__file__))
+    prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+    ip("link set dev %s xdp obj %s sec xdp" %
+       (cfg.ifname, prog))
+    ip("link set dev %s xdp off" % cfg.ifname)
+
+
+def _ioctl_ringparam_modify(cfg, netnl) -> None:
+    """
+    Helper for performing a hopefully unimportant IOCTL SET.
+    IOCTL does not support HDS, so it should not affect the HDS config.
+    """
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+
+    if 'tx' not in rings:
+        raise KsftSkipEx('setting Tx ring size not supported')
+
+    try:
+        ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] // 2}")
+    except CmdExitFailure as e:
+        ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] * 2}")
+    defer(ethtool, f"-G {cfg.ifname} tx {rings['tx']}")
+
+
+def get_hds(cfg, netnl) -> None:
+    _get_hds_mode(cfg, netnl)
+
+
+def get_hds_thresh(cfg, netnl) -> None:
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'hds-thresh' not in rings:
+        raise KsftSkipEx('hds-thresh not supported by device')
+
+def set_hds_enable(cfg, netnl) -> None:
+    try:
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'})
+    except NlError as e:
+        if e.error == errno.EINVAL:
+            raise KsftSkipEx("disabling of HDS not supported by the device")
+        elif e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("ring-set not supported by the device")
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'tcp-data-split' not in rings:
+        raise KsftSkipEx('tcp-data-split not supported by device')
+
+    ksft_eq('enabled', rings['tcp-data-split'])
+
+def set_hds_disable(cfg, netnl) -> None:
+    try:
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'})
+    except NlError as e:
+        if e.error == errno.EINVAL:
+            raise KsftSkipEx("disabling of HDS not supported by the device")
+        elif e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("ring-set not supported by the device")
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'tcp-data-split' not in rings:
+        raise KsftSkipEx('tcp-data-split not supported by device')
+
+    ksft_eq('disabled', rings['tcp-data-split'])
+
+def set_hds_thresh_zero(cfg, netnl) -> None:
+    try:
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0})
+    except NlError as e:
+        if e.error == errno.EINVAL:
+            raise KsftSkipEx("hds-thresh-set not supported by the device")
+        elif e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("ring-set not supported by the device")
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'hds-thresh' not in rings:
+        raise KsftSkipEx('hds-thresh not supported by device')
+
+    ksft_eq(0, rings['hds-thresh'])
+
+def set_hds_thresh_max(cfg, netnl) -> None:
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'hds-thresh' not in rings:
+        raise KsftSkipEx('hds-thresh not supported by device')
+    try:
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': rings['hds-thresh-max']})
+    except NlError as e:
+        if e.error == errno.EINVAL:
+            raise KsftSkipEx("hds-thresh-set not supported by the device")
+        elif e.error == errno.EOPNOTSUPP:
+            raise KsftSkipEx("ring-set not supported by the device")
+    rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    ksft_eq(rings['hds-thresh'], rings['hds-thresh-max'])
+
+def set_hds_thresh_gt(cfg, netnl) -> None:
+    try:
+        rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+    except NlError as e:
+        raise KsftSkipEx('ring-get not supported by device')
+    if 'hds-thresh' not in rings:
+        raise KsftSkipEx('hds-thresh not supported by device')
+    if 'hds-thresh-max' not in rings:
+        raise KsftSkipEx('hds-thresh-max not defined by device')
+    hds_gt = rings['hds-thresh-max'] + 1
+    with ksft_raises(NlError) as e:
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt})
+    ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+
+def set_xdp(cfg, netnl) -> None:
+    """
+    Enable single-buffer XDP on the device.
+    When HDS is in "auto" / UNKNOWN mode, XDP installation should work.
+    """
+    mode = _get_hds_mode(cfg, netnl)
+    if mode == 'enabled':
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+                         'tcp-data-split': 'unknown'})
+
+    _xdp_onoff(cfg)
+
+
+def enabled_set_xdp(cfg, netnl) -> None:
+    """
+    Enable single-buffer XDP on the device.
+    When HDS is in "enabled" mode, XDP installation should not work.
+    """
+    _get_hds_mode(cfg, netnl)
+    netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+                     'tcp-data-split': 'enabled'})
+
+    defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+                            'tcp-data-split': 'unknown'})
+
+    with ksft_raises(CmdExitFailure) as e:
+        _xdp_onoff(cfg)
+
+
+def set_xdp(cfg, netnl) -> None:
+    """
+    Enable single-buffer XDP on the device.
+    When HDS is in "auto" / UNKNOWN mode, XDP installation should work.
+    """
+    mode = _get_hds_mode(cfg, netnl)
+    if mode == 'enabled':
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+                         'tcp-data-split': 'unknown'})
+
+    _xdp_onoff(cfg)
+
+
+def enabled_set_xdp(cfg, netnl) -> None:
+    """
+    Enable single-buffer XDP on the device.
+    When HDS is in "enabled" mode, XDP installation should not work.
+    """
+    _get_hds_mode(cfg, netnl)  # Trigger skip if not supported
+
+    netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+                     'tcp-data-split': 'enabled'})
+    defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+                            'tcp-data-split': 'unknown'})
+
+    with ksft_raises(CmdExitFailure) as e:
+        _xdp_onoff(cfg)
+
+
+def ioctl(cfg, netnl) -> None:
+    mode1 = _get_hds_mode(cfg, netnl)
+    _ioctl_ringparam_modify(cfg, netnl)
+    mode2 = _get_hds_mode(cfg, netnl)
+
+    ksft_eq(mode1, mode2)
+
+
+def ioctl_set_xdp(cfg, netnl) -> None:
+    """
+    Like set_xdp(), but we perturb the settings via the legacy ioctl.
+    """
+    mode = _get_hds_mode(cfg, netnl)
+    if mode == 'enabled':
+        netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+                         'tcp-data-split': 'unknown'})
+
+    _ioctl_ringparam_modify(cfg, netnl)
+
+    _xdp_onoff(cfg)
+
+
+def ioctl_enabled_set_xdp(cfg, netnl) -> None:
+    """
+    Enable single-buffer XDP on the device.
+    When HDS is in "enabled" mode, XDP installation should not work.
+    """
+    _get_hds_mode(cfg, netnl)  # Trigger skip if not supported
+
+    netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+                     'tcp-data-split': 'enabled'})
+    defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+                            'tcp-data-split': 'unknown'})
+
+    with ksft_raises(CmdExitFailure) as e:
+        _xdp_onoff(cfg)
+
+
+def main() -> None:
+    with NetDrvEnv(__file__, queue_count=3) as cfg:
+        ksft_run([get_hds,
+                  get_hds_thresh,
+                  set_hds_disable,
+                  set_hds_enable,
+                  set_hds_thresh_zero,
+                  set_hds_thresh_max,
+                  set_hds_thresh_gt,
+                  set_xdp,
+                  enabled_set_xdp,
+                  ioctl,
+                  ioctl_set_xdp,
+                  ioctl_enabled_set_xdp],
+                 args=(cfg, EthtoolFamily()))
+    ksft_exit()
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 8e502a1f8f9b..19a6969643f4 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -619,9 +619,6 @@ int do_server(struct memory_buffer *mem)
 	fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
 		page_aligned_frags, non_page_aligned_frags);
 
-	fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
-		page_aligned_frags, non_page_aligned_frags);
-
 cleanup:
 
 	free(tmp_mem);
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
index 05b6fbb3fcdd..ad192fef3117 100755
--- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
+++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
@@ -21,9 +21,9 @@ def _enable_pp_allocation_fail():
     if not os.path.exists("/sys/kernel/debug/fail_function"):
         raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
 
-    if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
+    if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
         with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
-            fp.write("page_pool_alloc_pages\n")
+            fp.write("page_pool_alloc_netmems\n")
 
     _write_fail_config({
         "verbose": 0,
@@ -37,7 +37,7 @@ def _disable_pp_allocation_fail():
     if not os.path.exists("/sys/kernel/debug/fail_function"):
         return
 
-    if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
+    if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
         with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
             fp.write("\n")
 
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
index ca8a7edff3dd..319aaa004c40 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -252,6 +252,7 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True):
         try:
             # this targets queue 4, which doesn't exist
             ntuple2 = ethtool_create(cfg, "-N", flow)
+            defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}")
         except CmdExitFailure:
             pass
         else:
@@ -259,7 +260,13 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True):
         # change the table to target queues 0 and 2
         ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0")
         # ntuple rule therefore targets queues 1 and 3
-        ntuple2 = ethtool_create(cfg, "-N", flow)
+        try:
+            ntuple2 = ethtool_create(cfg, "-N", flow)
+        except CmdExitFailure:
+            ksft_pr("Driver does not support rss + queue offset")
+            return
+
+        defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}")
         # should replace existing filter
         ksft_eq(ntuple, ntuple2)
         _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3),
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index 1ea9bb695e94..987e452d3a45 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -5,7 +5,7 @@ import time
 from pathlib import Path
 from lib.py import KsftSkipEx, KsftXfailEx
 from lib.py import ksft_setup
-from lib.py import cmd, ethtool, ip
+from lib.py import cmd, ethtool, ip, CmdExitFailure
 from lib.py import NetNS, NetdevSimDev
 from .remote import Remote
 
@@ -48,6 +48,7 @@ class NetDrvEnv:
         else:
             self._ns = NetdevSimDev(**kwargs)
             self.dev = self._ns.nsims[0].dev
+        self.ifname = self.dev['ifname']
         self.ifindex = self.dev['ifindex']
 
     def __enter__(self):
@@ -234,7 +235,12 @@ class NetDrvEpEnv:
         Good drivers will tell us via ethtool what their sync period is.
         """
         if self._stats_settle_time is None:
-            data = ethtool("-c " + self.ifname, json=True)[0]
+            data = {}
+            try:
+                data = ethtool("-c " + self.ifname, json=True)[0]
+            except CmdExitFailure as e:
+                if "Operation not supported" not in e.cmd.stderr:
+                    raise
 
             self._stats_settle_time = 0.025 + \
                 data.get('stats-block-usecs', 0) / 1000 / 1000
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
new file mode 100644
index 000000000000..3acaba41ac7b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -0,0 +1,225 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This file contains functions and helpers to support the netconsole
+# selftests
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+SRCIF="" # to be populated later
+SRCIP=192.0.2.1
+DSTIF="" # to be populated later
+DSTIP=192.0.2.2
+
+PORT="6666"
+MSG="netconsole selftest"
+USERDATA_KEY="key"
+USERDATA_VALUE="value"
+TARGET=$(mktemp -u netcons_XXXXX)
+DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk)
+NETCONS_CONFIGFS="/sys/kernel/config/netconsole"
+NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
+# NAMESPACE will be populated by setup_ns with a random value
+NAMESPACE=""
+
+# IDs for netdevsim
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
+
+# Used to create and delete namespaces
+source "${LIBDIR}"/../../../../net/lib.sh
+source "${LIBDIR}"/../../../../net/net_helper.sh
+
+# Create netdevsim interfaces
+create_ifaces() {
+
+	echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
+	echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
+	udevadm settle 2> /dev/null || true
+
+	local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID"
+	local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID"
+
+	# These are global variables
+	SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \
+		-path "$NSIM1"/net -exec basename {} \;)
+	DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \
+		-path "$NSIM2"/net -exec basename {} \;)
+}
+
+link_ifaces() {
+	local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
+	local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex)
+	local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex)
+
+	exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}"
+	exec {INITNS_FD}</proc/self/ns/net
+
+	# Bind the dst interface to namespace
+	ip link set "${DSTIF}" netns "${NAMESPACE}"
+
+	# Linking one device to the other one (on the other namespace}
+	if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX"  > $NSIM_DEV_SYS_LINK
+	then
+		echo "linking netdevsim1 with netdevsim2 should succeed"
+		cleanup
+		exit "${ksft_skip}"
+	fi
+}
+
+function configure_ip() {
+	# Configure the IPs for both interfaces
+	ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}"
+	ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up
+
+	ip addr add "${SRCIP}"/24 dev "${SRCIF}"
+	ip link set "${SRCIF}" up
+}
+
+function set_network() {
+	# setup_ns function is coming from lib.sh
+	setup_ns NAMESPACE
+
+	# Create both interfaces, and assign the destination to a different
+	# namespace
+	create_ifaces
+
+	# Link both interfaces back to back
+	link_ifaces
+
+	configure_ip
+}
+
+function create_dynamic_target() {
+	DSTMAC=$(ip netns exec "${NAMESPACE}" \
+		 ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+
+	# Create a dynamic target
+	mkdir "${NETCONS_PATH}"
+
+	echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip
+	echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip
+	echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
+	echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
+
+	echo 1 > "${NETCONS_PATH}"/enabled
+}
+
+function cleanup() {
+	local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
+
+	# delete netconsole dynamic reconfiguration
+	echo 0 > "${NETCONS_PATH}"/enabled
+	# Remove all the keys that got created during the selftest
+	find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
+	# Remove the configfs entry
+	rmdir "${NETCONS_PATH}"
+
+	# Delete netdevsim devices
+	echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
+	echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
+
+	# this is coming from lib.sh
+	cleanup_all_ns
+
+	# Restoring printk configurations
+	echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
+}
+
+function set_user_data() {
+	if [[ ! -d "${NETCONS_PATH}""/userdata" ]]
+	then
+		echo "Userdata path not available in ${NETCONS_PATH}/userdata"
+		exit "${ksft_skip}"
+	fi
+
+	KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}"
+	mkdir -p "${KEY_PATH}"
+	VALUE_PATH="${KEY_PATH}""/value"
+	echo "${USERDATA_VALUE}" > "${VALUE_PATH}"
+}
+
+function listen_port_and_save_to() {
+	local OUTPUT=${1}
+	# Just wait for 2 seconds
+	timeout 2 ip netns exec "${NAMESPACE}" \
+		socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
+}
+
+function validate_result() {
+	local TMPFILENAME="$1"
+
+	# TMPFILENAME will contain something like:
+	# 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+	#  key=value
+
+	# Check if the file exists
+	if [ ! -f "$TMPFILENAME" ]; then
+		echo "FAIL: File was not generated." >&2
+		exit "${ksft_fail}"
+	fi
+
+	if ! grep -q "${MSG}" "${TMPFILENAME}"; then
+		echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2
+		cat "${TMPFILENAME}" >&2
+		exit "${ksft_fail}"
+	fi
+
+	if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
+		echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
+		cat "${TMPFILENAME}" >&2
+		exit "${ksft_fail}"
+	fi
+
+	# Delete the file once it is validated, otherwise keep it
+	# for debugging purposes
+	rm "${TMPFILENAME}"
+	exit "${ksft_pass}"
+}
+
+function check_for_dependencies() {
+	if [ "$(id -u)" -ne 0 ]; then
+		echo "This test must be run as root" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ! which socat > /dev/null ; then
+		echo "SKIP: socat(1) is not available" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ! which ip > /dev/null ; then
+		echo "SKIP: ip(1) is not available" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ! which udevadm > /dev/null ; then
+		echo "SKIP: udevadm(1) is not available" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then
+		echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if [ ! -d "${NETCONS_CONFIGFS}" ]; then
+		echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ip link show "${DSTIF}" 2> /dev/null; then
+		echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2
+		exit "${ksft_skip}"
+	fi
+
+	if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
+		echo "SKIP: IPs already in use. Skipping it" >&2
+		exit "${ksft_skip}"
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh
index b79542a4dcc7..4a11bf1d514a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh
@@ -12,6 +12,7 @@ ALL_TESTS="
 	bridge_rif_remaster_port
 "
 
+REQUIRE_TEAMD="yes"
 NUM_NETIFS=2
 source $lib_dir/lib.sh
 source $lib_dir/devlink_lib.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh
index e28f978104f3..b8bbe94f4736 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh
@@ -10,6 +10,7 @@ ALL_TESTS="
 	lag_rif_nomaster_addr
 "
 
+REQUIRE_TEAMD="yes"
 NUM_NETIFS=2
 source $lib_dir/lib.sh
 source $lib_dir/devlink_lib.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh
index 6318cfa6434c..d1a9d379eaf3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh
@@ -10,6 +10,7 @@ ALL_TESTS="
 	lag_rif_nomaster_addr
 "
 
+REQUIRE_TEAMD="yes"
 NUM_NETIFS=2
 source $lib_dir/lib.sh
 source $lib_dir/devlink_lib.sh
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
index b175f4d966e5..fe765da498e8 100755
--- a/tools/testing/selftests/drivers/net/netcons_basic.sh
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -18,224 +18,8 @@ set -euo pipefail
 
 SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
 
-# Simple script to test dynamic targets in netconsole
-SRCIF="" # to be populated later
-SRCIP=192.0.2.1
-DSTIF="" # to be populated later
-DSTIP=192.0.2.2
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
 
-PORT="6666"
-MSG="netconsole selftest"
-USERDATA_KEY="key"
-USERDATA_VALUE="value"
-TARGET=$(mktemp -u netcons_XXXXX)
-DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk)
-NETCONS_CONFIGFS="/sys/kernel/config/netconsole"
-NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
-KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}"
-# NAMESPACE will be populated by setup_ns with a random value
-NAMESPACE=""
-
-# IDs for netdevsim
-NSIM_DEV_1_ID=$((256 + RANDOM % 256))
-NSIM_DEV_2_ID=$((512 + RANDOM % 256))
-NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
-
-# Used to create and delete namespaces
-source "${SCRIPTDIR}"/../../net/lib.sh
-source "${SCRIPTDIR}"/../../net/net_helper.sh
-
-# Create netdevsim interfaces
-create_ifaces() {
-
-	echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
-	echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
-	udevadm settle 2> /dev/null || true
-
-	local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID"
-	local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID"
-
-	# These are global variables
-	SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \
-		-path "$NSIM1"/net -exec basename {} \;)
-	DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \
-		-path "$NSIM2"/net -exec basename {} \;)
-}
-
-link_ifaces() {
-	local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
-	local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex)
-	local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex)
-
-	exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}"
-	exec {INITNS_FD}</proc/self/ns/net
-
-	# Bind the dst interface to namespace
-	ip link set "${DSTIF}" netns "${NAMESPACE}"
-
-	# Linking one device to the other one (on the other namespace}
-	if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX"  > $NSIM_DEV_SYS_LINK
-	then
-		echo "linking netdevsim1 with netdevsim2 should succeed"
-		cleanup
-		exit "${ksft_skip}"
-	fi
-}
-
-function configure_ip() {
-	# Configure the IPs for both interfaces
-	ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}"
-	ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up
-
-	ip addr add "${SRCIP}"/24 dev "${SRCIF}"
-	ip link set "${SRCIF}" up
-}
-
-function set_network() {
-	# setup_ns function is coming from lib.sh
-	setup_ns NAMESPACE
-
-	# Create both interfaces, and assign the destination to a different
-	# namespace
-	create_ifaces
-
-	# Link both interfaces back to back
-	link_ifaces
-
-	configure_ip
-}
-
-function create_dynamic_target() {
-	DSTMAC=$(ip netns exec "${NAMESPACE}" \
-		 ip link show "${DSTIF}" | awk '/ether/ {print $2}')
-
-	# Create a dynamic target
-	mkdir "${NETCONS_PATH}"
-
-	echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip
-	echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip
-	echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
-	echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
-
-	echo 1 > "${NETCONS_PATH}"/enabled
-}
-
-function cleanup() {
-	local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
-
-	# delete netconsole dynamic reconfiguration
-	echo 0 > "${NETCONS_PATH}"/enabled
-	# Remove key
-	rmdir "${KEY_PATH}"
-	# Remove the configfs entry
-	rmdir "${NETCONS_PATH}"
-
-	# Delete netdevsim devices
-	echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
-	echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
-
-	# this is coming from lib.sh
-	cleanup_all_ns
-
-	# Restoring printk configurations
-	echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
-}
-
-function set_user_data() {
-	if [[ ! -d "${NETCONS_PATH}""/userdata" ]]
-	then
-		echo "Userdata path not available in ${NETCONS_PATH}/userdata"
-		exit "${ksft_skip}"
-	fi
-
-	mkdir -p "${KEY_PATH}"
-	VALUE_PATH="${KEY_PATH}""/value"
-	echo "${USERDATA_VALUE}" > "${VALUE_PATH}"
-}
-
-function listen_port_and_save_to() {
-	local OUTPUT=${1}
-	# Just wait for 2 seconds
-	timeout 2 ip netns exec "${NAMESPACE}" \
-		socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
-}
-
-function validate_result() {
-	local TMPFILENAME="$1"
-
-	# TMPFILENAME will contain something like:
-	# 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
-	#  key=value
-
-	# Check if the file exists
-	if [ ! -f "$TMPFILENAME" ]; then
-		echo "FAIL: File was not generated." >&2
-		exit "${ksft_fail}"
-	fi
-
-	if ! grep -q "${MSG}" "${TMPFILENAME}"; then
-		echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2
-		cat "${TMPFILENAME}" >&2
-		exit "${ksft_fail}"
-	fi
-
-	if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
-		echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
-		cat "${TMPFILENAME}" >&2
-		exit "${ksft_fail}"
-	fi
-
-	# Delete the file once it is validated, otherwise keep it
-	# for debugging purposes
-	rm "${TMPFILENAME}"
-	exit "${ksft_pass}"
-}
-
-function check_for_dependencies() {
-	if [ "$(id -u)" -ne 0 ]; then
-		echo "This test must be run as root" >&2
-		exit "${ksft_skip}"
-	fi
-
-	if ! which socat > /dev/null ; then
-		echo "SKIP: socat(1) is not available" >&2
-		exit "${ksft_skip}"
-	fi
-
-	if ! which ip > /dev/null ; then
-		echo "SKIP: ip(1) is not available" >&2
-		exit "${ksft_skip}"
-	fi
-
-	if ! which udevadm > /dev/null ; then
-		echo "SKIP: udevadm(1) is not available" >&2
-		exit "${ksft_skip}"
-	fi
-
-	if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then
-		echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2
-		exit "${ksft_skip}"
-	fi
-
-	if [ ! -d "${NETCONS_CONFIGFS}" ]; then
-		echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2
-		exit "${ksft_skip}"
-	fi
-
-	if ip link show "${DSTIF}" 2> /dev/null; then
-		echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2
-		exit "${ksft_skip}"
-	fi
-
-	if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
-		echo "SKIP: IPs already in use. Skipping it" >&2
-		exit "${ksft_skip}"
-	fi
-}
-
-# ========== #
-# Start here #
-# ========== #
 modprobe netdevsim 2> /dev/null || true
 modprobe netconsole 2> /dev/null || true
 
diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netcons_overflow.sh
new file mode 100755
index 000000000000..29bad56448a2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_overflow.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test verifies that users can successfully create up to
+# MAX_USERDATA_ITEMS userdata entries without encountering any failures.
+#
+# Additionally, it tests for expected failure when attempting to exceed this
+# maximum limit.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+# This is coming from netconsole code. Check for it in drivers/net/netconsole.c
+MAX_USERDATA_ITEMS=16
+
+# Function to create userdata entries
+function create_userdata_max_entries() {
+	# All these keys should be created without any error
+	for i in $(seq $MAX_USERDATA_ITEMS)
+	do
+		# USERDATA_KEY is used by set_user_data
+		USERDATA_KEY="key"${i}
+		set_user_data
+	done
+}
+
+# Function to verify the entry limit
+function verify_entry_limit() {
+	# Allowing the test to fail without exiting, since the next command
+	# will fail
+	set +e
+	mkdir "${NETCONS_PATH}/userdata/key_that_will_fail" 2> /dev/null
+	ret="$?"
+	set -e
+	if [ "$ret" -eq 0 ];
+	then
+		echo "Adding more than ${MAX_USERDATA_ITEMS} entries in userdata should fail, but it didn't" >&2
+		ls "${NETCONS_PATH}/userdata/" >&2
+		exit "${ksft_fail}"
+	fi
+}
+
+# ========== #
+# Start here #
+# ========== #
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# populate the maximum number of supported keys in userdata
+create_userdata_max_entries
+# Verify an additional entry is not allowed
+verify_entry_limit
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
index 384cfa3d38a6..92c2f0376c08 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -142,7 +142,7 @@ function pre_ethtool {
 }
 
 function check_table {
-    local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+    local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1
     local -n expected=$2
     local last=$3
 
@@ -212,7 +212,7 @@ function check_tables {
 }
 
 function print_table {
-    local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+    local path=$NSIM_DEV_DFS/ports/$port/udp_ports/table$1
     read -a have < $path
 
     tree $NSIM_DEV_DFS/
@@ -641,7 +641,7 @@ for port in 0 1; do
     NSIM_NETDEV=`get_netdev_name old_netdevs`
     ip link set dev $NSIM_NETDEV up
 
-    echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+    echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error
 
     msg="1 - create VxLANs v6"
     exp0=( 0 0 0 0 )
@@ -663,7 +663,7 @@ for port in 0 1; do
     new_geneve gnv0 20000
 
     msg="2 - destroy GENEVE"
-    echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+    echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports/inject_error
     exp1=( `mke 20000 2` 0 0 0 )
     del_dev gnv0
 
@@ -764,7 +764,7 @@ for port in 0 1; do
     msg="create VxLANs v4"
     new_vxlan vxlan0 10000 $NSIM_NETDEV
 
-    echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+    echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
     check_tables
 
     msg="NIC device goes down"
@@ -775,7 +775,7 @@ for port in 0 1; do
     fi
     check_tables
 
-    echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+    echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
     check_tables
 
     msg="NIC device goes up again"
@@ -789,7 +789,7 @@ for port in 0 1; do
     del_dev vxlan0
     check_tables
 
-    echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+    echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
     check_tables
 
     msg="destroy NIC"
@@ -896,7 +896,7 @@ msg="vacate VxLAN in overflow table"
 exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
 del_dev vxlan2
 
-echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports/reset
 check_tables
 
 msg="tunnels destroyed 2"
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index eb83e7b48797..fc69bfcc37c4 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -1,49 +1,219 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 
+import os
+import random, string, time
 from lib.py import ksft_run, ksft_exit
-from lib.py import ksft_eq
-from lib.py import NetDrvEpEnv
+from lib.py import ksft_eq, KsftSkipEx, KsftFailEx
+from lib.py import EthtoolFamily, NetDrvEpEnv
 from lib.py import bkg, cmd, wait_port_listen, rand_port
+from lib.py import defer, ethtool, ip
 
+remote_ifname=""
+no_sleep=False
 
-def test_v4(cfg) -> None:
+def _test_v4(cfg) -> None:
     cfg.require_v4()
 
     cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}")
     cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote)
+    cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v4}")
+    cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v4}", host=cfg.remote)
 
-
-def test_v6(cfg) -> None:
+def _test_v6(cfg) -> None:
     cfg.require_v6()
 
-    cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}")
-    cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote)
-
+    cmd(f"ping -c 1 -W5 {cfg.remote_v6}")
+    cmd(f"ping -c 1 -W5 {cfg.v6}", host=cfg.remote)
+    cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v6}")
+    cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v6}", host=cfg.remote)
 
-def test_tcp(cfg) -> None:
+def _test_tcp(cfg) -> None:
     cfg.require_cmd("socat", remote=True)
 
     port = rand_port()
     listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT"
 
+    test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
     with bkg(listen_cmd, exit_wait=True) as nc:
         wait_port_listen(port)
 
-        cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
+        cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
             shell=True, host=cfg.remote)
-    ksft_eq(nc.stdout.strip(), "ping")
+    ksft_eq(nc.stdout.strip(), test_string)
 
+    test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
     with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
         wait_port_listen(port, host=cfg.remote)
 
-        cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
-    ksft_eq(nc.stdout.strip(), "ping")
-
+        cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
+    ksft_eq(nc.stdout.strip(), test_string)
+
+def _set_offload_checksum(cfg, netnl, on) -> None:
+    try:
+        ethtool(f" -K {cfg.ifname} rx {on} tx {on} ")
+    except:
+        return
+
+def _set_xdp_generic_sb_on(cfg) -> None:
+    test_dir = os.path.dirname(os.path.realpath(__file__))
+    prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+    cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+    cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True)
+    defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off")
+
+    if no_sleep != True:
+        time.sleep(10)
+
+def _set_xdp_generic_mb_on(cfg) -> None:
+    test_dir = os.path.dirname(os.path.realpath(__file__))
+    prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+    cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+    defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote)
+    ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog))
+    defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off")
+
+    if no_sleep != True:
+        time.sleep(10)
+
+def _set_xdp_native_sb_on(cfg) -> None:
+    test_dir = os.path.dirname(os.path.realpath(__file__))
+    prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+    cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+    cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True)
+    defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+    xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+    if xdp_info['xdp']['mode'] != 1:
+        """
+        If the interface doesn't support native-mode, it falls back to generic mode.
+        The mode value 1 is native and 2 is generic.
+        So it raises an exception if mode is not 1(native mode).
+        """
+        raise KsftSkipEx('device does not support native-XDP')
+
+    if no_sleep != True:
+        time.sleep(10)
+
+def _set_xdp_native_mb_on(cfg) -> None:
+    test_dir = os.path.dirname(os.path.realpath(__file__))
+    prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+    cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+    defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote)
+    try:
+        cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True)
+        defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+    except Exception as e:
+        raise KsftSkipEx('device does not support native-multi-buffer XDP')
+
+    if no_sleep != True:
+        time.sleep(10)
+
+def _set_xdp_offload_on(cfg) -> None:
+    test_dir = os.path.dirname(os.path.realpath(__file__))
+    prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+    cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+    try:
+        cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True)
+    except Exception as e:
+        raise KsftSkipEx('device does not support offloaded XDP')
+    defer(ip, f"link set dev {cfg.ifname} xdpoffload off")
+    cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+    if no_sleep != True:
+        time.sleep(10)
+
+def get_interface_info(cfg) -> None:
+    global remote_ifname
+    global no_sleep
+
+    remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_v4} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout
+    remote_ifname = remote_info.rstrip('\n')
+    if remote_ifname == "":
+        raise KsftFailEx('Can not get remote interface')
+    local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+    if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim":
+        no_sleep=True
+    if 'linkinfo' in local_info and local_info['linkinfo']['info_kind'] == "veth":
+        no_sleep=True
+
+def set_interface_init(cfg) -> None:
+    cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+    cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True)
+    cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True)
+    cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True)
+    cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+def test_default(cfg, netnl) -> None:
+    _set_offload_checksum(cfg, netnl, "off")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+    _set_offload_checksum(cfg, netnl, "on")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+
+def test_xdp_generic_sb(cfg, netnl) -> None:
+    _set_xdp_generic_sb_on(cfg)
+    _set_offload_checksum(cfg, netnl, "off")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+    _set_offload_checksum(cfg, netnl, "on")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+
+def test_xdp_generic_mb(cfg, netnl) -> None:
+    _set_xdp_generic_mb_on(cfg)
+    _set_offload_checksum(cfg, netnl, "off")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+    _set_offload_checksum(cfg, netnl, "on")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+
+def test_xdp_native_sb(cfg, netnl) -> None:
+    _set_xdp_native_sb_on(cfg)
+    _set_offload_checksum(cfg, netnl, "off")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+    _set_offload_checksum(cfg, netnl, "on")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+
+def test_xdp_native_mb(cfg, netnl) -> None:
+    _set_xdp_native_mb_on(cfg)
+    _set_offload_checksum(cfg, netnl, "off")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+    _set_offload_checksum(cfg, netnl, "on")
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
+
+def test_xdp_offload(cfg, netnl) -> None:
+    _set_xdp_offload_on(cfg)
+    _test_v4(cfg)
+    _test_v6(cfg)
+    _test_tcp(cfg)
 
 def main() -> None:
     with NetDrvEpEnv(__file__) as cfg:
-        ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+        get_interface_info(cfg)
+        set_interface_init(cfg)
+        ksft_run([test_default,
+                  test_xdp_generic_sb,
+                  test_xdp_generic_mb,
+                  test_xdp_native_sb,
+                  test_xdp_native_mb,
+                  test_xdp_offload],
+                 args=(cfg, EthtoolFamily()))
     ksft_exit()
 
 
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
index 38303da957ee..8a518905a9f9 100755
--- a/tools/testing/selftests/drivers/net/queues.py
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -45,10 +45,9 @@ def addremove_queues(cfg, nl) -> None:
 
     netnl = EthtoolFamily()
     channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}})
-    if channels['combined-count'] == 0:
-        rx_type = 'rx'
-    else:
-        rx_type = 'combined'
+    rx_type = 'rx'
+    if channels.get('combined-count', 0) > 0:
+            rx_type = 'combined'
 
     expected = curr_queues - 1
     cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10)
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index 031ac9def6c0..efcc1e10575b 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -2,12 +2,15 @@
 # SPDX-License-Identifier: GPL-2.0
 
 import errno
+import subprocess
+import time
 from lib.py import ksft_run, ksft_exit, ksft_pr
-from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx
+from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
+from lib.py import KsftSkipEx, KsftXfailEx
 from lib.py import ksft_disruptive
 from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
 from lib.py import NetDrvEnv
-from lib.py import ip, defer
+from lib.py import cmd, ip, defer
 
 ethnl = EthtoolFamily()
 netfam = NetdevFamily()
@@ -174,10 +177,95 @@ def check_down(cfg) -> None:
     netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)
 
 
+def __run_inf_loop(body):
+    body = body.strip()
+    if body[-1] != ';':
+        body += ';'
+
+    return subprocess.Popen(f"while true; do {body} done", shell=True,
+                            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+
+def __stats_increase_sanely(old, new) -> None:
+    for k in old.keys():
+        ksft_ge(new[k], old[k])
+        ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error")
+
+
+def procfs_hammer(cfg) -> None:
+    """
+    Reading stats via procfs only holds the RCU lock, which is not an exclusive
+    lock, make sure drivers can handle parallel reads of stats.
+    """
+    one = __run_inf_loop("cat /proc/net/dev")
+    defer(one.kill)
+    two = __run_inf_loop("cat /proc/net/dev")
+    defer(two.kill)
+
+    time.sleep(1)
+    # Make sure the processes are running
+    ksft_is(one.poll(), None)
+    ksft_is(two.poll(), None)
+
+    rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+    time.sleep(2)
+    rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+    __stats_increase_sanely(rtstat1, rtstat2)
+    # defers will kill the loops
+
+
+@ksft_disruptive
+def procfs_downup_hammer(cfg) -> None:
+    """
+    Reading stats via procfs only holds the RCU lock, drivers often try
+    to sleep when reading the stats, or don't protect against races.
+    """
+    # Max out the queues, we'll flip between max and 1
+    channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+    if channels['combined-count'] == 0:
+        rx_type = 'rx'
+    else:
+        rx_type = 'combined'
+    cur_queue_cnt = channels[f'{rx_type}-count']
+    max_queue_cnt = channels[f'{rx_type}-max']
+
+    cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
+    defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
+
+    # Real test stats
+    stats = __run_inf_loop("cat /proc/net/dev")
+    defer(stats.kill)
+
+    ipset = f"ip link set dev {cfg.ifname}"
+    defer(ip, f"link set dev {cfg.ifname} up")
+    # The "echo -n 1" lets us count iterations below
+    updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \
+             f"ethtool -L {cfg.ifname} {rx_type} 1; " + \
+             f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \
+              "echo -n 1"
+    updown = __run_inf_loop(updown)
+    kill_updown = defer(updown.kill)
+
+    time.sleep(1)
+    # Make sure the processes are running
+    ksft_is(stats.poll(), None)
+    ksft_is(updown.poll(), None)
+
+    rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+    # We're looking for crashes, give it extra time
+    time.sleep(9)
+    rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+    __stats_increase_sanely(rtstat1, rtstat2)
+
+    kill_updown.exec()
+    stdout, _ = updown.communicate(timeout=5)
+    ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8')))
+
+
 def main() -> None:
     with NetDrvEnv(__file__, queue_count=100) as cfg:
         ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
-                  check_down],
+                  check_down, procfs_hammer, procfs_downup_hammer],
                  args=(cfg, ))
     ksft_exit()
 
diff --git a/tools/testing/selftests/drivers/ntsync/.gitignore b/tools/testing/selftests/drivers/ntsync/.gitignore
new file mode 100644
index 000000000000..848573a3d3ea
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/.gitignore
@@ -0,0 +1 @@
+ntsync
diff --git a/tools/testing/selftests/drivers/ntsync/Makefile b/tools/testing/selftests/drivers/ntsync/Makefile
new file mode 100644
index 000000000000..dbf2b055c0b2
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/Makefile
@@ -0,0 +1,7 @@
+# SPDX-LICENSE-IDENTIFIER: GPL-2.0-only
+TEST_GEN_PROGS := ntsync
+
+CFLAGS += $(KHDR_INCLUDES)
+LDLIBS += -lpthread
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/ntsync/config b/tools/testing/selftests/drivers/ntsync/config
new file mode 100644
index 000000000000..60539c826d06
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/config
@@ -0,0 +1 @@
+CONFIG_WINESYNC=y
diff --git a/tools/testing/selftests/drivers/ntsync/ntsync.c b/tools/testing/selftests/drivers/ntsync/ntsync.c
new file mode 100644
index 000000000000..3aad311574c4
--- /dev/null
+++ b/tools/testing/selftests/drivers/ntsync/ntsync.c
@@ -0,0 +1,1343 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Various unit tests for the "ntsync" synchronization primitive driver.
+ *
+ * Copyright (C) 2021-2022 Elizabeth Figura <zfigura@codeweavers.com>
+ */
+
+#define _GNU_SOURCE
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/ntsync.h>
+#include "../../kselftest_harness.h"
+
+static int read_sem_state(int sem, __u32 *count, __u32 *max)
+{
+	struct ntsync_sem_args args;
+	int ret;
+
+	memset(&args, 0xcc, sizeof(args));
+	ret = ioctl(sem, NTSYNC_IOC_SEM_READ, &args);
+	*count = args.count;
+	*max = args.max;
+	return ret;
+}
+
+#define check_sem_state(sem, count, max) \
+	({ \
+		__u32 __count, __max; \
+		int ret = read_sem_state((sem), &__count, &__max); \
+		EXPECT_EQ(0, ret); \
+		EXPECT_EQ((count), __count); \
+		EXPECT_EQ((max), __max); \
+	})
+
+static int release_sem(int sem, __u32 *count)
+{
+	return ioctl(sem, NTSYNC_IOC_SEM_RELEASE, count);
+}
+
+static int read_mutex_state(int mutex, __u32 *count, __u32 *owner)
+{
+	struct ntsync_mutex_args args;
+	int ret;
+
+	memset(&args, 0xcc, sizeof(args));
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &args);
+	*count = args.count;
+	*owner = args.owner;
+	return ret;
+}
+
+#define check_mutex_state(mutex, count, owner) \
+	({ \
+		__u32 __count, __owner; \
+		int ret = read_mutex_state((mutex), &__count, &__owner); \
+		EXPECT_EQ(0, ret); \
+		EXPECT_EQ((count), __count); \
+		EXPECT_EQ((owner), __owner); \
+	})
+
+static int unlock_mutex(int mutex, __u32 owner, __u32 *count)
+{
+	struct ntsync_mutex_args args;
+	int ret;
+
+	args.owner = owner;
+	args.count = 0xdeadbeef;
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_UNLOCK, &args);
+	*count = args.count;
+	return ret;
+}
+
+static int read_event_state(int event, __u32 *signaled, __u32 *manual)
+{
+	struct ntsync_event_args args;
+	int ret;
+
+	memset(&args, 0xcc, sizeof(args));
+	ret = ioctl(event, NTSYNC_IOC_EVENT_READ, &args);
+	*signaled = args.signaled;
+	*manual = args.manual;
+	return ret;
+}
+
+#define check_event_state(event, signaled, manual) \
+	({ \
+		__u32 __signaled, __manual; \
+		int ret = read_event_state((event), &__signaled, &__manual); \
+		EXPECT_EQ(0, ret); \
+		EXPECT_EQ((signaled), __signaled); \
+		EXPECT_EQ((manual), __manual); \
+	})
+
+static int wait_objs(int fd, unsigned long request, __u32 count,
+		     const int *objs, __u32 owner, int alert, __u32 *index)
+{
+	struct ntsync_wait_args args = {0};
+	struct timespec timeout;
+	int ret;
+
+	clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+	args.timeout = timeout.tv_sec * 1000000000 + timeout.tv_nsec;
+	args.count = count;
+	args.objs = (uintptr_t)objs;
+	args.owner = owner;
+	args.index = 0xdeadbeef;
+	args.alert = alert;
+	ret = ioctl(fd, request, &args);
+	*index = args.index;
+	return ret;
+}
+
+static int wait_any(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index)
+{
+	return wait_objs(fd, NTSYNC_IOC_WAIT_ANY, count, objs, owner, 0, index);
+}
+
+static int wait_all(int fd, __u32 count, const int *objs, __u32 owner, __u32 *index)
+{
+	return wait_objs(fd, NTSYNC_IOC_WAIT_ALL, count, objs, owner, 0, index);
+}
+
+static int wait_any_alert(int fd, __u32 count, const int *objs,
+			  __u32 owner, int alert, __u32 *index)
+{
+	return wait_objs(fd, NTSYNC_IOC_WAIT_ANY,
+			 count, objs, owner, alert, index);
+}
+
+static int wait_all_alert(int fd, __u32 count, const int *objs,
+			  __u32 owner, int alert, __u32 *index)
+{
+	return wait_objs(fd, NTSYNC_IOC_WAIT_ALL,
+			 count, objs, owner, alert, index);
+}
+
+TEST(semaphore_state)
+{
+	struct ntsync_sem_args sem_args;
+	struct timespec timeout;
+	__u32 count, index;
+	int fd, ret, sem;
+
+	clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 3;
+	sem_args.max = 2;
+	sem = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_EQ(-1, sem);
+	EXPECT_EQ(EINVAL, errno);
+
+	sem_args.count = 2;
+	sem_args.max = 2;
+	sem = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, sem);
+	check_sem_state(sem, 2, 2);
+
+	count = 0;
+	ret = release_sem(sem, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, count);
+	check_sem_state(sem, 2, 2);
+
+	count = 1;
+	ret = release_sem(sem, &count);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOVERFLOW, errno);
+	check_sem_state(sem, 2, 2);
+
+	ret = wait_any(fd, 1, &sem, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(sem, 1, 2);
+
+	ret = wait_any(fd, 1, &sem, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(sem, 0, 2);
+
+	ret = wait_any(fd, 1, &sem, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	count = 3;
+	ret = release_sem(sem, &count);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOVERFLOW, errno);
+	check_sem_state(sem, 0, 2);
+
+	count = 2;
+	ret = release_sem(sem, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+	check_sem_state(sem, 2, 2);
+
+	ret = wait_any(fd, 1, &sem, 123, &index);
+	EXPECT_EQ(0, ret);
+	ret = wait_any(fd, 1, &sem, 123, &index);
+	EXPECT_EQ(0, ret);
+
+	count = 1;
+	ret = release_sem(sem, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+	check_sem_state(sem, 1, 2);
+
+	count = ~0u;
+	ret = release_sem(sem, &count);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOVERFLOW, errno);
+	check_sem_state(sem, 1, 2);
+
+	close(sem);
+
+	close(fd);
+}
+
+TEST(mutex_state)
+{
+	struct ntsync_mutex_args mutex_args;
+	__u32 owner, count, index;
+	struct timespec timeout;
+	int fd, ret, mutex;
+
+	clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	mutex_args.owner = 123;
+	mutex_args.count = 0;
+	mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_EQ(-1, mutex);
+	EXPECT_EQ(EINVAL, errno);
+
+	mutex_args.owner = 0;
+	mutex_args.count = 2;
+	mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_EQ(-1, mutex);
+	EXPECT_EQ(EINVAL, errno);
+
+	mutex_args.owner = 123;
+	mutex_args.count = 2;
+	mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, mutex);
+	check_mutex_state(mutex, 2, 123);
+
+	ret = unlock_mutex(mutex, 0, &count);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+
+	ret = unlock_mutex(mutex, 456, &count);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EPERM, errno);
+	check_mutex_state(mutex, 2, 123);
+
+	ret = unlock_mutex(mutex, 123, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, count);
+	check_mutex_state(mutex, 1, 123);
+
+	ret = unlock_mutex(mutex, 123, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, count);
+	check_mutex_state(mutex, 0, 0);
+
+	ret = unlock_mutex(mutex, 123, &count);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EPERM, errno);
+
+	ret = wait_any(fd, 1, &mutex, 456, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_mutex_state(mutex, 1, 456);
+
+	ret = wait_any(fd, 1, &mutex, 456, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_mutex_state(mutex, 2, 456);
+
+	ret = unlock_mutex(mutex, 456, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, count);
+	check_mutex_state(mutex, 1, 456);
+
+	ret = wait_any(fd, 1, &mutex, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	owner = 0;
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+
+	owner = 123;
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EPERM, errno);
+	check_mutex_state(mutex, 1, 456);
+
+	owner = 456;
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+	EXPECT_EQ(0, ret);
+
+	memset(&mutex_args, 0xcc, sizeof(mutex_args));
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	EXPECT_EQ(0, mutex_args.count);
+	EXPECT_EQ(0, mutex_args.owner);
+
+	memset(&mutex_args, 0xcc, sizeof(mutex_args));
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	EXPECT_EQ(0, mutex_args.count);
+	EXPECT_EQ(0, mutex_args.owner);
+
+	ret = wait_any(fd, 1, &mutex, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	EXPECT_EQ(0, index);
+	check_mutex_state(mutex, 1, 123);
+
+	owner = 123;
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_KILL, &owner);
+	EXPECT_EQ(0, ret);
+
+	memset(&mutex_args, 0xcc, sizeof(mutex_args));
+	ret = ioctl(mutex, NTSYNC_IOC_MUTEX_READ, &mutex_args);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	EXPECT_EQ(0, mutex_args.count);
+	EXPECT_EQ(0, mutex_args.owner);
+
+	ret = wait_any(fd, 1, &mutex, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	EXPECT_EQ(0, index);
+	check_mutex_state(mutex, 1, 123);
+
+	close(mutex);
+
+	mutex_args.owner = 0;
+	mutex_args.count = 0;
+	mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, mutex);
+	check_mutex_state(mutex, 0, 0);
+
+	ret = wait_any(fd, 1, &mutex, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_mutex_state(mutex, 1, 123);
+
+	close(mutex);
+
+	mutex_args.owner = 123;
+	mutex_args.count = ~0u;
+	mutex = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, mutex);
+	check_mutex_state(mutex, ~0u, 123);
+
+	ret = wait_any(fd, 1, &mutex, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	close(mutex);
+
+	close(fd);
+}
+
+TEST(manual_event_state)
+{
+	struct ntsync_event_args event_args;
+	__u32 index, signaled;
+	int fd, event, ret;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	event_args.manual = 1;
+	event_args.signaled = 0;
+	event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, event);
+	check_event_state(event, 0, 1);
+
+	signaled = 0xdeadbeef;
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(event, 1, 1);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+	check_event_state(event, 1, 1);
+
+	ret = wait_any(fd, 1, &event, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_event_state(event, 1, 1);
+
+	signaled = 0xdeadbeef;
+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+	check_event_state(event, 0, 1);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(event, 0, 1);
+
+	ret = wait_any(fd, 1, &event, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+	check_event_state(event, 0, 1);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(event, 0, 1);
+
+	close(event);
+
+	close(fd);
+}
+
+TEST(auto_event_state)
+{
+	struct ntsync_event_args event_args;
+	__u32 index, signaled;
+	int fd, event, ret;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	event_args.manual = 0;
+	event_args.signaled = 1;
+	event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, event);
+
+	check_event_state(event, 1, 0);
+
+	signaled = 0xdeadbeef;
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+	check_event_state(event, 1, 0);
+
+	ret = wait_any(fd, 1, &event, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_event_state(event, 0, 0);
+
+	signaled = 0xdeadbeef;
+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(event, 0, 0);
+
+	ret = wait_any(fd, 1, &event, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+	check_event_state(event, 0, 0);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_PULSE, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(event, 0, 0);
+
+	close(event);
+
+	close(fd);
+}
+
+TEST(test_wait_any)
+{
+	int objs[NTSYNC_MAX_WAIT_COUNT + 1], fd, ret;
+	struct ntsync_mutex_args mutex_args = {0};
+	struct ntsync_sem_args sem_args = {0};
+	__u32 owner, index, count, i;
+	struct timespec timeout;
+
+	clock_gettime(CLOCK_MONOTONIC, &timeout);
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 2;
+	sem_args.max = 3;
+	objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[0]);
+
+	mutex_args.owner = 0;
+	mutex_args.count = 0;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, objs[1]);
+
+	ret = wait_any(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 1, 3);
+	check_mutex_state(objs[1], 0, 0);
+
+	ret = wait_any(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 0, 3);
+	check_mutex_state(objs[1], 0, 0);
+
+	ret = wait_any(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, index);
+	check_sem_state(objs[0], 0, 3);
+	check_mutex_state(objs[1], 1, 123);
+
+	count = 1;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+
+	ret = wait_any(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 0, 3);
+	check_mutex_state(objs[1], 1, 123);
+
+	ret = wait_any(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, index);
+	check_sem_state(objs[0], 0, 3);
+	check_mutex_state(objs[1], 2, 123);
+
+	ret = wait_any(fd, 2, objs, 456, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	owner = 123;
+	ret = ioctl(objs[1], NTSYNC_IOC_MUTEX_KILL, &owner);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_any(fd, 2, objs, 456, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	EXPECT_EQ(1, index);
+
+	ret = wait_any(fd, 2, objs, 456, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, index);
+
+	close(objs[1]);
+
+	/* test waiting on the same object twice */
+
+	count = 2;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+
+	objs[1] = objs[0];
+	ret = wait_any(fd, 2, objs, 456, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 1, 3);
+
+	ret = wait_any(fd, 0, NULL, 456, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	for (i = 1; i < NTSYNC_MAX_WAIT_COUNT + 1; ++i)
+		objs[i] = objs[0];
+
+	ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	ret = wait_any(fd, NTSYNC_MAX_WAIT_COUNT + 1, objs, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+
+	ret = wait_any(fd, -1, objs, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+
+	close(objs[0]);
+
+	close(fd);
+}
+
+TEST(test_wait_all)
+{
+	struct ntsync_event_args event_args = {0};
+	struct ntsync_mutex_args mutex_args = {0};
+	struct ntsync_sem_args sem_args = {0};
+	__u32 owner, index, count;
+	int objs[2], fd, ret;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 2;
+	sem_args.max = 3;
+	objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[0]);
+
+	mutex_args.owner = 0;
+	mutex_args.count = 0;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, objs[1]);
+
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 1, 3);
+	check_mutex_state(objs[1], 1, 123);
+
+	ret = wait_all(fd, 2, objs, 456, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+	check_sem_state(objs[0], 1, 3);
+	check_mutex_state(objs[1], 1, 123);
+
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 0, 3);
+	check_mutex_state(objs[1], 2, 123);
+
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+	check_sem_state(objs[0], 0, 3);
+	check_mutex_state(objs[1], 2, 123);
+
+	count = 3;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 2, 3);
+	check_mutex_state(objs[1], 3, 123);
+
+	owner = 123;
+	ret = ioctl(objs[1], NTSYNC_IOC_MUTEX_KILL, &owner);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EOWNERDEAD, errno);
+	check_sem_state(objs[0], 1, 3);
+	check_mutex_state(objs[1], 1, 123);
+
+	close(objs[1]);
+
+	event_args.manual = true;
+	event_args.signaled = true;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, objs[1]);
+
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+	check_sem_state(objs[0], 0, 3);
+	check_event_state(objs[1], 1, 1);
+
+	close(objs[1]);
+
+	/* test waiting on the same object twice */
+	objs[1] = objs[0];
+	ret = wait_all(fd, 2, objs, 123, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+
+	close(objs[0]);
+
+	close(fd);
+}
+
+struct wake_args {
+	int fd;
+	int obj;
+};
+
+struct wait_args {
+	int fd;
+	unsigned long request;
+	struct ntsync_wait_args *args;
+	int ret;
+	int err;
+};
+
+static void *wait_thread(void *arg)
+{
+	struct wait_args *args = arg;
+
+	args->ret = ioctl(args->fd, args->request, args->args);
+	args->err = errno;
+	return NULL;
+}
+
+static __u64 get_abs_timeout(unsigned int ms)
+{
+	struct timespec timeout;
+	clock_gettime(CLOCK_MONOTONIC, &timeout);
+	return (timeout.tv_sec * 1000000000) + timeout.tv_nsec + (ms * 1000000);
+}
+
+static int wait_for_thread(pthread_t thread, unsigned int ms)
+{
+	struct timespec timeout;
+
+	clock_gettime(CLOCK_REALTIME, &timeout);
+	timeout.tv_nsec += ms * 1000000;
+	timeout.tv_sec += (timeout.tv_nsec / 1000000000);
+	timeout.tv_nsec %= 1000000000;
+	return pthread_timedjoin_np(thread, NULL, &timeout);
+}
+
+TEST(wake_any)
+{
+	struct ntsync_event_args event_args = {0};
+	struct ntsync_mutex_args mutex_args = {0};
+	struct ntsync_wait_args wait_args = {0};
+	struct ntsync_sem_args sem_args = {0};
+	struct wait_args thread_args;
+	__u32 count, index, signaled;
+	int objs[2], fd, ret;
+	pthread_t thread;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 0;
+	sem_args.max = 3;
+	objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[0]);
+
+	mutex_args.owner = 123;
+	mutex_args.count = 1;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, objs[1]);
+
+	/* test waking the semaphore */
+
+	wait_args.timeout = get_abs_timeout(1000);
+	wait_args.objs = (uintptr_t)objs;
+	wait_args.count = 2;
+	wait_args.owner = 456;
+	wait_args.index = 0xdeadbeef;
+	thread_args.fd = fd;
+	thread_args.args = &wait_args;
+	thread_args.request = NTSYNC_IOC_WAIT_ANY;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	count = 1;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+	check_sem_state(objs[0], 0, 3);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(0, wait_args.index);
+
+	/* test waking the mutex */
+
+	/* first grab it again for owner 123 */
+	ret = wait_any(fd, 1, &objs[1], 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	wait_args.owner = 456;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = unlock_mutex(objs[1], 123, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, count);
+
+	ret = pthread_tryjoin_np(thread, NULL);
+	EXPECT_EQ(EBUSY, ret);
+
+	ret = unlock_mutex(objs[1], 123, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, mutex_args.count);
+	check_mutex_state(objs[1], 1, 456);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(1, wait_args.index);
+
+	close(objs[1]);
+
+	/* test waking events */
+
+	event_args.manual = false;
+	event_args.signaled = false;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, objs[1]);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = ioctl(objs[1], NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(objs[1], 0, 0);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(1, wait_args.index);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = ioctl(objs[1], NTSYNC_IOC_EVENT_PULSE, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(objs[1], 0, 0);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(1, wait_args.index);
+
+	close(objs[1]);
+
+	event_args.manual = true;
+	event_args.signaled = false;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, objs[1]);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = ioctl(objs[1], NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(objs[1], 1, 1);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(1, wait_args.index);
+
+	ret = ioctl(objs[1], NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = ioctl(objs[1], NTSYNC_IOC_EVENT_PULSE, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+	check_event_state(objs[1], 0, 1);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(1, wait_args.index);
+
+	/* delete an object while it's being waited on */
+
+	wait_args.timeout = get_abs_timeout(200);
+	wait_args.owner = 123;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	close(objs[0]);
+	close(objs[1]);
+
+	ret = wait_for_thread(thread, 200);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(-1, thread_args.ret);
+	EXPECT_EQ(ETIMEDOUT, thread_args.err);
+
+	close(fd);
+}
+
+TEST(wake_all)
+{
+	struct ntsync_event_args manual_event_args = {0};
+	struct ntsync_event_args auto_event_args = {0};
+	struct ntsync_mutex_args mutex_args = {0};
+	struct ntsync_wait_args wait_args = {0};
+	struct ntsync_sem_args sem_args = {0};
+	struct wait_args thread_args;
+	__u32 count, index, signaled;
+	int objs[4], fd, ret;
+	pthread_t thread;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 0;
+	sem_args.max = 3;
+	objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[0]);
+
+	mutex_args.owner = 123;
+	mutex_args.count = 1;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, objs[1]);
+
+	manual_event_args.manual = true;
+	manual_event_args.signaled = true;
+	objs[2] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &manual_event_args);
+	EXPECT_LE(0, objs[2]);
+
+	auto_event_args.manual = false;
+	auto_event_args.signaled = true;
+	objs[3] = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &auto_event_args);
+	EXPECT_EQ(0, objs[3]);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	wait_args.objs = (uintptr_t)objs;
+	wait_args.count = 4;
+	wait_args.owner = 456;
+	thread_args.fd = fd;
+	thread_args.args = &wait_args;
+	thread_args.request = NTSYNC_IOC_WAIT_ALL;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	count = 1;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+
+	ret = pthread_tryjoin_np(thread, NULL);
+	EXPECT_EQ(EBUSY, ret);
+
+	check_sem_state(objs[0], 1, 3);
+
+	ret = wait_any(fd, 1, &objs[0], 123, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	ret = unlock_mutex(objs[1], 123, &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, count);
+
+	ret = pthread_tryjoin_np(thread, NULL);
+	EXPECT_EQ(EBUSY, ret);
+
+	check_mutex_state(objs[1], 0, 0);
+
+	ret = ioctl(objs[2], NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+
+	count = 2;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, count);
+	check_sem_state(objs[0], 2, 3);
+
+	ret = ioctl(objs[3], NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, signaled);
+
+	ret = ioctl(objs[2], NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+
+	ret = ioctl(objs[3], NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, signaled);
+
+	check_sem_state(objs[0], 1, 3);
+	check_mutex_state(objs[1], 1, 456);
+	check_event_state(objs[2], 1, 1);
+	check_event_state(objs[3], 0, 0);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+
+	/* delete an object while it's being waited on */
+
+	wait_args.timeout = get_abs_timeout(200);
+	wait_args.owner = 123;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	close(objs[0]);
+	close(objs[1]);
+	close(objs[2]);
+	close(objs[3]);
+
+	ret = wait_for_thread(thread, 200);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(-1, thread_args.ret);
+	EXPECT_EQ(ETIMEDOUT, thread_args.err);
+
+	close(fd);
+}
+
+TEST(alert_any)
+{
+	struct ntsync_event_args event_args = {0};
+	struct ntsync_wait_args wait_args = {0};
+	struct ntsync_sem_args sem_args = {0};
+	__u32 index, count, signaled;
+	struct wait_args thread_args;
+	int objs[2], event, fd, ret;
+	pthread_t thread;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 0;
+	sem_args.max = 2;
+	objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[0]);
+
+	sem_args.count = 1;
+	sem_args.max = 2;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[1]);
+
+	event_args.manual = true;
+	event_args.signaled = true;
+	event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, event);
+
+	ret = wait_any_alert(fd, 0, NULL, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_any_alert(fd, 0, NULL, 123, event, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(1, index);
+
+	ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, index);
+
+	/* test wakeup via alert */
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	wait_args.objs = (uintptr_t)objs;
+	wait_args.count = 2;
+	wait_args.owner = 123;
+	wait_args.index = 0xdeadbeef;
+	wait_args.alert = event;
+	thread_args.fd = fd;
+	thread_args.args = &wait_args;
+	thread_args.request = NTSYNC_IOC_WAIT_ANY;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(2, wait_args.index);
+
+	close(event);
+
+	/* test with an auto-reset event */
+
+	event_args.manual = false;
+	event_args.signaled = true;
+	event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, event);
+
+	count = 1;
+	ret = release_sem(objs[0], &count);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, index);
+
+	ret = wait_any_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	close(event);
+
+	close(objs[0]);
+	close(objs[1]);
+
+	close(fd);
+}
+
+TEST(alert_all)
+{
+	struct ntsync_event_args event_args = {0};
+	struct ntsync_wait_args wait_args = {0};
+	struct ntsync_sem_args sem_args = {0};
+	struct wait_args thread_args;
+	__u32 index, count, signaled;
+	int objs[2], event, fd, ret;
+	pthread_t thread;
+
+	fd = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd);
+
+	sem_args.count = 2;
+	sem_args.max = 2;
+	objs[0] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[0]);
+
+	sem_args.count = 1;
+	sem_args.max = 2;
+	objs[1] = ioctl(fd, NTSYNC_IOC_CREATE_SEM, &sem_args);
+	EXPECT_LE(0, objs[1]);
+
+	event_args.manual = true;
+	event_args.signaled = true;
+	event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, event);
+
+	ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, index);
+
+	/* test wakeup via alert */
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_RESET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	wait_args.timeout = get_abs_timeout(1000);
+	wait_args.objs = (uintptr_t)objs;
+	wait_args.count = 2;
+	wait_args.owner = 123;
+	wait_args.index = 0xdeadbeef;
+	wait_args.alert = event;
+	thread_args.fd = fd;
+	thread_args.args = &wait_args;
+	thread_args.request = NTSYNC_IOC_WAIT_ALL;
+	ret = pthread_create(&thread, NULL, wait_thread, &thread_args);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(ETIMEDOUT, ret);
+
+	ret = ioctl(event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_for_thread(thread, 100);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, thread_args.ret);
+	EXPECT_EQ(2, wait_args.index);
+
+	close(event);
+
+	/* test with an auto-reset event */
+
+	event_args.manual = false;
+	event_args.signaled = true;
+	event = ioctl(fd, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, event);
+
+	count = 2;
+	ret = release_sem(objs[1], &count);
+	EXPECT_EQ(0, ret);
+
+	ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, index);
+
+	ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(2, index);
+
+	ret = wait_all_alert(fd, 2, objs, 123, event, &index);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(ETIMEDOUT, errno);
+
+	close(event);
+
+	close(objs[0]);
+	close(objs[1]);
+
+	close(fd);
+}
+
+#define STRESS_LOOPS 10000
+#define STRESS_THREADS 4
+
+static unsigned int stress_counter;
+static int stress_device, stress_start_event, stress_mutex;
+
+static void *stress_thread(void *arg)
+{
+	struct ntsync_wait_args wait_args = {0};
+	__u32 index, count, i;
+	int ret;
+
+	wait_args.timeout = UINT64_MAX;
+	wait_args.count = 1;
+	wait_args.objs = (uintptr_t)&stress_start_event;
+	wait_args.owner = gettid();
+	wait_args.index = 0xdeadbeef;
+
+	ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args);
+
+	wait_args.objs = (uintptr_t)&stress_mutex;
+
+	for (i = 0; i < STRESS_LOOPS; ++i) {
+		ioctl(stress_device, NTSYNC_IOC_WAIT_ANY, &wait_args);
+
+		++stress_counter;
+
+		unlock_mutex(stress_mutex, wait_args.owner, &count);
+	}
+
+	return NULL;
+}
+
+TEST(stress_wait)
+{
+	struct ntsync_event_args event_args;
+	struct ntsync_mutex_args mutex_args;
+	pthread_t threads[STRESS_THREADS];
+	__u32 signaled, i;
+	int ret;
+
+	stress_device = open("/dev/ntsync", O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, stress_device);
+
+	mutex_args.owner = 0;
+	mutex_args.count = 0;
+	stress_mutex = ioctl(stress_device, NTSYNC_IOC_CREATE_MUTEX, &mutex_args);
+	EXPECT_LE(0, stress_mutex);
+
+	event_args.manual = 1;
+	event_args.signaled = 0;
+	stress_start_event = ioctl(stress_device, NTSYNC_IOC_CREATE_EVENT, &event_args);
+	EXPECT_LE(0, stress_start_event);
+
+	for (i = 0; i < STRESS_THREADS; ++i)
+		pthread_create(&threads[i], NULL, stress_thread, NULL);
+
+	ret = ioctl(stress_start_event, NTSYNC_IOC_EVENT_SET, &signaled);
+	EXPECT_EQ(0, ret);
+
+	for (i = 0; i < STRESS_THREADS; ++i) {
+		ret = pthread_join(threads[i], NULL);
+		EXPECT_EQ(0, ret);
+	}
+
+	EXPECT_EQ(STRESS_LOOPS * STRESS_THREADS, stress_counter);
+
+	close(stress_start_event);
+	close(stress_mutex);
+	close(stress_device);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
index d374878cc0ba..c62544b966ae 100755
--- a/tools/testing/selftests/efivarfs/efivarfs.sh
+++ b/tools/testing/selftests/efivarfs/efivarfs.sh
@@ -76,11 +76,11 @@ test_create_empty()
 
 	: > $file
 
-	if [ ! -e $file ]; then
-		echo "$file can not be created without writing" >&2
+	if [ -e $file ]; then
+		echo "$file can be created without writing" >&2
+		file_cleanup $file
 		exit 1
 	fi
-	file_cleanup $file
 }
 
 test_create_read()
@@ -89,10 +89,13 @@ test_create_read()
 	./create-read $file
 	if [ $? -ne 0 ]; then
 		echo "create and read $file failed"
+		exit 1
+	fi
+	if [ -e $file ]; then
+		echo "file still exists and should not"
 		file_cleanup $file
 		exit 1
 	fi
-	file_cleanup $file
 }
 
 test_delete()
@@ -202,6 +205,158 @@ test_invalid_filenames()
 	exit $ret
 }
 
+test_no_set_size()
+{
+	local attrs='\x07\x00\x00\x00'
+	local file=$efivarfs_mount/$FUNCNAME-$test_guid
+	local ret=0
+
+	printf "$attrs\x00" > $file
+	[ -e $file -a -s $file ] || exit 1
+	chattr -i $file
+	: > $file
+	if [ $? != 0 ]; then
+		echo "variable file failed to accept truncation"
+		ret=1
+	elif [ -e $file -a ! -s $file ]; then
+		echo "file can be truncated to zero size"
+		ret=1
+	fi
+	rm $file || exit 1
+
+	exit $ret
+}
+
+setup_test_multiple()
+{
+       ##
+       # we're going to do multi-threaded tests, so create a set of
+       # pipes for synchronization.  We use pipes 1..3 to start the
+       # stalled shell job and pipes 4..6 as indicators that the job
+       # has started.  If you need more than 3 jobs the two +3's below
+       # need increasing
+       ##
+
+       declare -ag p
+
+       # empty is because arrays number from 0 but jobs number from 1
+       p[0]=""
+
+       for f in 1 2 3 4 5 6; do
+               p[$f]=/tmp/efivarfs_pipe${f}
+               mknod ${p[$f]} p
+       done
+
+       declare -g var=$efivarfs_mount/test_multiple-$test_guid
+
+       cleanup() {
+               for f in ${p[@]}; do
+                       rm -f ${f}
+               done
+               if [ -e $var ]; then
+                       file_cleanup $var
+               fi
+       }
+       trap cleanup exit
+
+       waitstart() {
+               cat ${p[$[$1+3]]} > /dev/null
+       }
+
+       waitpipe() {
+               echo 1 > ${p[$[$1+3]]}
+               cat ${p[$1]} > /dev/null
+       }
+
+       endjob() {
+               echo 1 > ${p[$1]}
+               wait -n %$1
+       }
+}
+
+test_multiple_zero_size()
+{
+       ##
+       # check for remove on last close, set up three threads all
+       # holding the variable (one write and two reads) and then
+       # close them sequentially (waiting for completion) and check
+       # the state of the variable
+       ##
+
+       { waitpipe 1; echo 1; } > $var 2> /dev/null &
+       waitstart 1
+       # zero length file should exist
+       [ -e $var ] || exit 1
+       # second and third delayed close
+       { waitpipe 2; } < $var &
+       waitstart 2
+       { waitpipe 3; } < $var &
+       waitstart 3
+       # close first fd
+       endjob 1
+       # var should only be deleted on last close
+       [ -e $var ] || exit 1
+       # close second fd
+       endjob 2
+       [ -e $var ] || exit 1
+       # file should go on last close
+       endjob 3
+       [ ! -e $var ] || exit 1
+}
+
+test_multiple_create()
+{
+       ##
+       # set multiple threads to access the variable but delay
+       # the final write to check the close of 2 and 3.  The
+       # final write should succeed in creating the variable
+       ##
+       { waitpipe 1; printf '\x07\x00\x00\x00\x54'; } > $var &
+       waitstart 1
+       [ -e $var -a ! -s $var ] || exit 1
+       { waitpipe 2; } < $var &
+       waitstart 2
+       { waitpipe 3; } < $var &
+       waitstart 3
+       # close second and third fds
+       endjob 2
+       # var should only be created (have size) on last close
+       [ -e $var -a ! -s $var ] || exit 1
+       endjob 3
+       [ -e $var -a ! -s $var ] || exit 1
+       # close first fd
+       endjob 1
+       # variable should still exist
+       [ -s $var ] || exit 1
+       file_cleanup $var
+}
+
+test_multiple_delete_on_write() {
+       ##
+       # delete the variable on final write; seqencing similar
+       # to test_multiple_create()
+       ##
+       printf '\x07\x00\x00\x00\x54' > $var
+       chattr -i $var
+       { waitpipe 1; printf '\x07\x00\x00\x00'; } > $var &
+       waitstart 1
+       [ -e $var -a -s $var ] || exit 1
+       { waitpipe 2; } < $var &
+       waitstart 2
+       { waitpipe 3; } < $var &
+       waitstart 3
+       # close first fd; write should set variable size to zero
+       endjob 1
+       # var should only be deleted on last close
+       [ -e $var -a ! -s $var ] || exit 1
+       endjob 2
+       [ -e $var ] || exit 1
+       # close last fd
+       endjob 3
+       # variable should now be removed
+       [ ! -e $var ] || exit 1
+}
+
 check_prereqs
 
 rc=0
@@ -214,5 +369,10 @@ run_test test_zero_size_delete
 run_test test_open_unlink
 run_test test_valid_filenames
 run_test test_invalid_filenames
+run_test test_no_set_size
+setup_test_multiple
+run_test test_multiple_zero_size
+run_test test_multiple_create
+run_test test_multiple_delete_on_write
 
 exit $rc
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index a0dc5d4bf733..7f3d1ae762ec 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -9,9 +9,13 @@ execveat.ephemeral
 execveat.denatured
 non-regular
 null-argv
+/check-exec
+/false
+/inc
 /load_address.*
 !load_address.c
 /recursion-depth
+/set-exec
 xxxxxxxx*
 pipe
 S_I*.test
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index ba012bc5aab9..45a3cfc435cf 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -1,26 +1,33 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS = -Wall
 CFLAGS += -Wno-nonnull
+CFLAGS += $(KHDR_INCLUDES)
+
+LDLIBS += -lcap
 
 ALIGNS := 0x1000 0x200000 0x1000000
 ALIGN_PIES        := $(patsubst %,load_address.%,$(ALIGNS))
 ALIGN_STATIC_PIES := $(patsubst %,load_address.static.%,$(ALIGNS))
 ALIGNMENT_TESTS   := $(ALIGN_PIES) $(ALIGN_STATIC_PIES)
 
-TEST_PROGS := binfmt_script.py
+TEST_PROGS := binfmt_script.py check-exec-tests.sh
 TEST_GEN_PROGS := execveat non-regular $(ALIGNMENT_TESTS)
+TEST_GEN_PROGS_EXTENDED := false inc set-exec script-exec.inc script-noexec.inc
 TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
 # Makefile is a run-time dependency, since it's accessed by the execveat test
 TEST_FILES := Makefile
 
 TEST_GEN_PROGS += recursion-depth
 TEST_GEN_PROGS += null-argv
+TEST_GEN_PROGS += check-exec
 
 EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*	\
 	       $(OUTPUT)/S_I*.test
 
 include ../lib.mk
 
+CHECK_EXEC_SAMPLES := $(top_srcdir)/samples/check-exec
+
 $(OUTPUT)/subdir:
 	mkdir -p $@
 $(OUTPUT)/script: Makefile
@@ -38,3 +45,13 @@ $(OUTPUT)/load_address.0x%: load_address.c
 $(OUTPUT)/load_address.static.0x%: load_address.c
 	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \
 		-fPIE -static-pie $< -o $@
+$(OUTPUT)/false: false.c
+	$(CC) $(CFLAGS) $(LDFLAGS) -static $< -o $@
+$(OUTPUT)/inc: $(CHECK_EXEC_SAMPLES)/inc.c
+	$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+$(OUTPUT)/set-exec: $(CHECK_EXEC_SAMPLES)/set-exec.c
+	$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+$(OUTPUT)/script-exec.inc: $(CHECK_EXEC_SAMPLES)/script-exec.inc
+	cp $< $@
+$(OUTPUT)/script-noexec.inc: $(CHECK_EXEC_SAMPLES)/script-noexec.inc
+	cp $< $@
diff --git a/tools/testing/selftests/exec/check-exec-tests.sh b/tools/testing/selftests/exec/check-exec-tests.sh
new file mode 100755
index 000000000000..87102906ae3c
--- /dev/null
+++ b/tools/testing/selftests/exec/check-exec-tests.sh
@@ -0,0 +1,205 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test the "inc" interpreter.
+#
+# See include/uapi/linux/securebits.h, include/uapi/linux/fcntl.h and
+# samples/check-exec/inc.c
+#
+# Copyright © 2024 Microsoft Corporation
+
+set -u -e -o pipefail
+
+EXPECTED_OUTPUT="1"
+exec 2>/dev/null
+
+DIR="$(dirname $(readlink -f "$0"))"
+source "${DIR}"/../kselftest/ktap_helpers.sh
+
+exec_direct() {
+	local expect="$1"
+	local script="$2"
+	shift 2
+	local ret=0
+	local out
+
+	# Updates PATH for `env` to execute the `inc` interpreter.
+	out="$(PATH="." "$@" "${script}")" || ret=$?
+
+	if [[ ${ret} -ne ${expect} ]]; then
+		echo "ERROR: Wrong expectation for direct file execution: ${ret}"
+		return 1
+	fi
+	if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then
+		echo "ERROR: Wrong output for direct file execution: ${out}"
+		return 1
+	fi
+}
+
+exec_indirect() {
+	local expect="$1"
+	local script="$2"
+	shift 2
+	local ret=0
+	local out
+
+	# Script passed as argument.
+	out="$("$@" ./inc "${script}")" || ret=$?
+
+	if [[ ${ret} -ne ${expect} ]]; then
+		echo "ERROR: Wrong expectation for indirect file execution: ${ret}"
+		return 1
+	fi
+	if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then
+		echo "ERROR: Wrong output for indirect file execution: ${out}"
+		return 1
+	fi
+}
+
+exec_stdin_reg() {
+	local expect="$1"
+	local script="$2"
+	shift 2
+	local ret=0
+	local out
+
+	# Executing stdin must be allowed if the related file is executable.
+	out="$("$@" ./inc -i < "${script}")" || ret=$?
+
+	if [[ ${ret} -ne ${expect} ]]; then
+		echo "ERROR: Wrong expectation for stdin regular file execution: ${ret}"
+		return 1
+	fi
+	if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then
+		echo "ERROR: Wrong output for stdin regular file execution: ${out}"
+		return 1
+	fi
+}
+
+exec_stdin_pipe() {
+	local expect="$1"
+	shift
+	local ret=0
+	local out
+
+	# A pipe is not executable.
+	out="$(cat script-exec.inc | "$@" ./inc -i)" || ret=$?
+
+	if [[ ${ret} -ne ${expect} ]]; then
+		echo "ERROR: Wrong expectation for stdin pipe execution: ${ret}"
+		return 1
+	fi
+}
+
+exec_argument() {
+	local expect="$1"
+	local ret=0
+	shift
+	local out
+
+	# Script not coming from a file must not be executed.
+	out="$("$@" ./inc -c "$(< script-exec.inc)")" || ret=$?
+
+	if [[ ${ret} -ne ${expect} ]]; then
+		echo "ERROR: Wrong expectation for arbitrary argument execution: ${ret}"
+		return 1
+	fi
+	if [[ ${ret} -eq 0 && "${out}" != "${EXPECTED_OUTPUT}" ]]; then
+		echo "ERROR: Wrong output for arbitrary argument execution: ${out}"
+		return 1
+	fi
+}
+
+exec_interactive() {
+	exec_stdin_pipe "$@"
+	exec_argument "$@"
+}
+
+ktap_test() {
+	ktap_test_result "$*" "$@"
+}
+
+ktap_print_header
+ktap_set_plan 28
+
+# Without secbit configuration, nothing is changed.
+
+ktap_print_msg "By default, executable scripts are allowed to be interpreted and executed."
+ktap_test exec_direct 0 script-exec.inc
+ktap_test exec_indirect 0 script-exec.inc
+
+ktap_print_msg "By default, executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-exec.inc
+
+ktap_print_msg "By default, non-executable scripts are allowed to be interpreted, but not directly executed."
+# We get 126 because of direct execution by Bash.
+ktap_test exec_direct 126 script-noexec.inc
+ktap_test exec_indirect 0 script-noexec.inc
+
+ktap_print_msg "By default, non-executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-noexec.inc
+
+ktap_print_msg "By default, interactive commands are allowed to be interpreted."
+ktap_test exec_interactive 0
+
+# With only file restriction: protect non-malicious users from inadvertent errors (e.g. python ~/Downloads/*.py).
+
+ktap_print_msg "With -f, executable scripts are allowed to be interpreted and executed."
+ktap_test exec_direct 0 script-exec.inc ./set-exec -f --
+ktap_test exec_indirect 0 script-exec.inc ./set-exec -f --
+
+ktap_print_msg "With -f, executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -f --
+
+ktap_print_msg "With -f, non-executable scripts are not allowed to be executed nor interpreted."
+# Direct execution of non-executable script is alwayse denied by the kernel.
+ktap_test exec_direct 1 script-noexec.inc ./set-exec -f --
+ktap_test exec_indirect 1 script-noexec.inc ./set-exec -f --
+
+ktap_print_msg "With -f, non-executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-noexec.inc ./set-exec -f --
+
+ktap_print_msg "With -f, interactive commands are allowed to be interpreted."
+ktap_test exec_interactive 0 ./set-exec -f --
+
+# With only denied interactive commands: check or monitor script content (e.g. with LSM).
+
+ktap_print_msg "With -i, executable scripts are allowed to be interpreted and executed."
+ktap_test exec_direct 0 script-exec.inc ./set-exec -i --
+ktap_test exec_indirect 0 script-exec.inc ./set-exec -i --
+
+ktap_print_msg "With -i, executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -i --
+
+ktap_print_msg "With -i, non-executable scripts are allowed to be interpreted, but not directly executed."
+# Direct execution of non-executable script is alwayse denied by the kernel.
+ktap_test exec_direct 1 script-noexec.inc ./set-exec -i --
+ktap_test exec_indirect 0 script-noexec.inc ./set-exec -i --
+
+ktap_print_msg "With -i, non-executable stdin is not allowed to be interpreted."
+ktap_test exec_stdin_reg 1 script-noexec.inc ./set-exec -i --
+
+ktap_print_msg "With -i, interactive commands are not allowed to be interpreted."
+ktap_test exec_interactive 1 ./set-exec -i --
+
+# With both file restriction and denied interactive commands: only allow executable scripts.
+
+ktap_print_msg "With -fi, executable scripts are allowed to be interpreted and executed."
+ktap_test exec_direct 0 script-exec.inc ./set-exec -fi --
+ktap_test exec_indirect 0 script-exec.inc ./set-exec -fi --
+
+ktap_print_msg "With -fi, executable stdin is allowed to be interpreted."
+ktap_test exec_stdin_reg 0 script-exec.inc ./set-exec -fi --
+
+ktap_print_msg "With -fi, non-executable scripts are not allowed to be interpreted nor executed."
+# Direct execution of non-executable script is alwayse denied by the kernel.
+ktap_test exec_direct 1 script-noexec.inc ./set-exec -fi --
+ktap_test exec_indirect 1 script-noexec.inc ./set-exec -fi --
+
+ktap_print_msg "With -fi, non-executable stdin is not allowed to be interpreted."
+ktap_test exec_stdin_reg 1 script-noexec.inc ./set-exec -fi --
+
+ktap_print_msg "With -fi, interactive commands are not allowed to be interpreted."
+ktap_test exec_interactive 1 ./set-exec -fi --
+
+ktap_finished
diff --git a/tools/testing/selftests/exec/check-exec.c b/tools/testing/selftests/exec/check-exec.c
new file mode 100644
index 000000000000..55bce47e56b7
--- /dev/null
+++ b/tools/testing/selftests/exec/check-exec.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test execveat(2) with AT_EXECVE_CHECK, and prctl(2) with
+ * SECBIT_EXEC_RESTRICT_FILE, SECBIT_EXEC_DENY_INTERACTIVE, and their locked
+ * counterparts.
+ *
+ * Copyright © 2018-2020 ANSSI
+ * Copyright © 2024 Microsoft Corporation
+ *
+ * Author: Mickaël Salaün <mic@digikod.net>
+ */
+
+#include <asm-generic/unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/prctl.h>
+#include <linux/securebits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/capability.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/sysmacros.h>
+#include <unistd.h>
+
+/* Defines AT_EXECVE_CHECK without type conflicts. */
+#define _ASM_GENERIC_FCNTL_H
+#include <linux/fcntl.h>
+
+#include "../kselftest_harness.h"
+
+static int sys_execveat(int dirfd, const char *pathname, char *const argv[],
+			char *const envp[], int flags)
+{
+	return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+
+static void drop_privileges(struct __test_metadata *const _metadata)
+{
+	const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED;
+	cap_t cap_p;
+
+	if ((cap_get_secbits() & noroot) != noroot)
+		EXPECT_EQ(0, cap_set_secbits(noroot));
+
+	cap_p = cap_get_proc();
+	EXPECT_NE(NULL, cap_p);
+	EXPECT_NE(-1, cap_clear(cap_p));
+
+	/*
+	 * Drops everything, especially CAP_SETPCAP, CAP_DAC_OVERRIDE, and
+	 * CAP_DAC_READ_SEARCH.
+	 */
+	EXPECT_NE(-1, cap_set_proc(cap_p));
+	EXPECT_NE(-1, cap_free(cap_p));
+}
+
+static int test_secbits_set(const unsigned int secbits)
+{
+	int err;
+
+	err = prctl(PR_SET_SECUREBITS, secbits);
+	if (err)
+		return errno;
+	return 0;
+}
+
+FIXTURE(access)
+{
+	int memfd, pipefd;
+	int pipe_fds[2], socket_fds[2];
+};
+
+FIXTURE_VARIANT(access)
+{
+	const bool mount_exec;
+	const bool file_exec;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(access, mount_exec_file_exec) {
+	/* clang-format on */
+	.mount_exec = true,
+	.file_exec = true,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(access, mount_exec_file_noexec) {
+	/* clang-format on */
+	.mount_exec = true,
+	.file_exec = false,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(access, mount_noexec_file_exec) {
+	/* clang-format on */
+	.mount_exec = false,
+	.file_exec = true,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(access, mount_noexec_file_noexec) {
+	/* clang-format on */
+	.mount_exec = false,
+	.file_exec = false,
+};
+
+static const char binary_path[] = "./false";
+static const char workdir_path[] = "./test-mount";
+static const char reg_file_path[] = "./test-mount/regular_file";
+static const char dir_path[] = "./test-mount/directory";
+static const char block_dev_path[] = "./test-mount/block_device";
+static const char char_dev_path[] = "./test-mount/character_device";
+static const char fifo_path[] = "./test-mount/fifo";
+
+FIXTURE_SETUP(access)
+{
+	int procfd_path_size;
+	static const char path_template[] = "/proc/self/fd/%d";
+	char procfd_path[sizeof(path_template) + 10];
+
+	/* Makes sure we are not already restricted nor locked. */
+	EXPECT_EQ(0, test_secbits_set(0));
+
+	/*
+	 * Cleans previous workspace if any error previously happened (don't
+	 * check errors).
+	 */
+	umount(workdir_path);
+	rmdir(workdir_path);
+
+	/* Creates a clean mount point. */
+	ASSERT_EQ(0, mkdir(workdir_path, 00700));
+	ASSERT_EQ(0, mount("test", workdir_path, "tmpfs",
+			   MS_MGC_VAL | (variant->mount_exec ? 0 : MS_NOEXEC),
+			   "mode=0700,size=9m"));
+
+	/* Creates a regular file. */
+	ASSERT_EQ(0, mknod(reg_file_path,
+			   S_IFREG | (variant->file_exec ? 0700 : 0600), 0));
+	/* Creates a directory. */
+	ASSERT_EQ(0, mkdir(dir_path, variant->file_exec ? 0700 : 0600));
+	/* Creates a character device: /dev/null. */
+	ASSERT_EQ(0, mknod(char_dev_path, S_IFCHR | 0400, makedev(1, 3)));
+	/* Creates a block device: /dev/loop0 */
+	ASSERT_EQ(0, mknod(block_dev_path, S_IFBLK | 0400, makedev(7, 0)));
+	/* Creates a fifo. */
+	ASSERT_EQ(0, mknod(fifo_path, S_IFIFO | 0600, 0));
+
+	/* Creates a regular file without user mount point. */
+	self->memfd = memfd_create("test-exec-probe", MFD_CLOEXEC);
+	ASSERT_LE(0, self->memfd);
+	/* Sets mode, which must be ignored by the exec check. */
+	ASSERT_EQ(0, fchmod(self->memfd, variant->file_exec ? 0700 : 0600));
+
+	/* Creates a pipefs file descriptor. */
+	ASSERT_EQ(0, pipe(self->pipe_fds));
+	procfd_path_size = snprintf(procfd_path, sizeof(procfd_path),
+				    path_template, self->pipe_fds[0]);
+	ASSERT_LT(procfd_path_size, sizeof(procfd_path));
+	self->pipefd = open(procfd_path, O_RDWR | O_CLOEXEC);
+	ASSERT_LE(0, self->pipefd);
+	ASSERT_EQ(0, fchmod(self->pipefd, variant->file_exec ? 0700 : 0600));
+
+	/* Creates a socket file descriptor. */
+	ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0,
+				self->socket_fds));
+}
+
+FIXTURE_TEARDOWN_PARENT(access)
+{
+	/* There is no need to unlink the test files. */
+	EXPECT_EQ(0, umount(workdir_path));
+	EXPECT_EQ(0, rmdir(workdir_path));
+}
+
+static void fill_exec_fd(struct __test_metadata *_metadata, const int fd_out)
+{
+	char buf[1024];
+	size_t len;
+	int fd_in;
+
+	fd_in = open(binary_path, O_CLOEXEC | O_RDONLY);
+	ASSERT_LE(0, fd_in);
+	/* Cannot use copy_file_range(2) because of EXDEV. */
+	len = read(fd_in, buf, sizeof(buf));
+	EXPECT_LE(0, len);
+	while (len > 0) {
+		EXPECT_EQ(len, write(fd_out, buf, len))
+		{
+			TH_LOG("Failed to write: %s (%d)", strerror(errno),
+			       errno);
+		}
+		len = read(fd_in, buf, sizeof(buf));
+		EXPECT_LE(0, len);
+	}
+	EXPECT_EQ(0, close(fd_in));
+}
+
+static void fill_exec_path(struct __test_metadata *_metadata,
+			   const char *const path)
+{
+	int fd_out;
+
+	fd_out = open(path, O_CLOEXEC | O_WRONLY);
+	ASSERT_LE(0, fd_out)
+	{
+		TH_LOG("Failed to open %s: %s", path, strerror(errno));
+	}
+	fill_exec_fd(_metadata, fd_out);
+	EXPECT_EQ(0, close(fd_out));
+}
+
+static void test_exec_fd(struct __test_metadata *_metadata, const int fd,
+			 const int err_code)
+{
+	char *const argv[] = { "", NULL };
+	int access_ret, access_errno;
+
+	/*
+	 * If we really execute fd, filled with the "false" binary, the current
+	 * thread will exits with an error, which will be interpreted by the
+	 * test framework as an error.  With AT_EXECVE_CHECK, we only check a
+	 * potential successful execution.
+	 */
+	access_ret = sys_execveat(fd, "", argv, NULL,
+				  AT_EMPTY_PATH | AT_EXECVE_CHECK);
+	access_errno = errno;
+	if (err_code) {
+		EXPECT_EQ(-1, access_ret);
+		EXPECT_EQ(err_code, access_errno)
+		{
+			TH_LOG("Wrong error for execveat(2): %s (%d)",
+			       strerror(access_errno), errno);
+		}
+	} else {
+		EXPECT_EQ(0, access_ret)
+		{
+			TH_LOG("Access denied: %s", strerror(access_errno));
+		}
+	}
+}
+
+static void test_exec_path(struct __test_metadata *_metadata,
+			   const char *const path, const int err_code)
+{
+	int flags = O_CLOEXEC;
+	int fd;
+
+	/* Do not block on pipes. */
+	if (path == fifo_path)
+		flags |= O_NONBLOCK;
+
+	fd = open(path, flags | O_RDONLY);
+	ASSERT_LE(0, fd)
+	{
+		TH_LOG("Failed to open %s: %s", path, strerror(errno));
+	}
+	test_exec_fd(_metadata, fd, err_code);
+	EXPECT_EQ(0, close(fd));
+}
+
+/* Tests that we don't get ENOEXEC. */
+TEST_F(access, regular_file_empty)
+{
+	const int exec = variant->mount_exec && variant->file_exec;
+
+	test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES);
+
+	drop_privileges(_metadata);
+	test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES);
+}
+
+TEST_F(access, regular_file_elf)
+{
+	const int exec = variant->mount_exec && variant->file_exec;
+
+	fill_exec_path(_metadata, reg_file_path);
+
+	test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES);
+
+	drop_privileges(_metadata);
+	test_exec_path(_metadata, reg_file_path, exec ? 0 : EACCES);
+}
+
+/* Tests that we don't get ENOEXEC. */
+TEST_F(access, memfd_empty)
+{
+	const int exec = variant->file_exec;
+
+	test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES);
+
+	drop_privileges(_metadata);
+	test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES);
+}
+
+TEST_F(access, memfd_elf)
+{
+	const int exec = variant->file_exec;
+
+	fill_exec_fd(_metadata, self->memfd);
+
+	test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES);
+
+	drop_privileges(_metadata);
+	test_exec_fd(_metadata, self->memfd, exec ? 0 : EACCES);
+}
+
+TEST_F(access, non_regular_files)
+{
+	test_exec_path(_metadata, dir_path, EACCES);
+	test_exec_path(_metadata, block_dev_path, EACCES);
+	test_exec_path(_metadata, char_dev_path, EACCES);
+	test_exec_path(_metadata, fifo_path, EACCES);
+	test_exec_fd(_metadata, self->socket_fds[0], EACCES);
+	test_exec_fd(_metadata, self->pipefd, EACCES);
+}
+
+/* clang-format off */
+FIXTURE(secbits) {};
+/* clang-format on */
+
+FIXTURE_VARIANT(secbits)
+{
+	const bool is_privileged;
+	const int error;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(secbits, priv) {
+	/* clang-format on */
+	.is_privileged = true,
+	.error = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(secbits, unpriv) {
+	/* clang-format on */
+	.is_privileged = false,
+	.error = EPERM,
+};
+
+FIXTURE_SETUP(secbits)
+{
+	/* Makes sure no exec bits are set. */
+	EXPECT_EQ(0, test_secbits_set(0));
+	EXPECT_EQ(0, prctl(PR_GET_SECUREBITS));
+
+	if (!variant->is_privileged)
+		drop_privileges(_metadata);
+}
+
+FIXTURE_TEARDOWN(secbits)
+{
+}
+
+TEST_F(secbits, legacy)
+{
+	EXPECT_EQ(variant->error, test_secbits_set(0));
+}
+
+#define CHILD(...)                     \
+	do {                           \
+		pid_t child = vfork(); \
+		EXPECT_LE(0, child);   \
+		if (child == 0) {      \
+			__VA_ARGS__;   \
+			_exit(0);      \
+		}                      \
+	} while (0)
+
+TEST_F(secbits, exec)
+{
+	unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+	secbits |= SECBIT_EXEC_RESTRICT_FILE;
+	EXPECT_EQ(0, test_secbits_set(secbits));
+	EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS));
+	CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)));
+
+	secbits |= SECBIT_EXEC_DENY_INTERACTIVE;
+	EXPECT_EQ(0, test_secbits_set(secbits));
+	EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS));
+	CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)));
+
+	secbits &= ~(SECBIT_EXEC_RESTRICT_FILE | SECBIT_EXEC_DENY_INTERACTIVE);
+	EXPECT_EQ(0, test_secbits_set(secbits));
+	EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS));
+	CHILD(EXPECT_EQ(secbits, prctl(PR_GET_SECUREBITS)));
+}
+
+TEST_F(secbits, check_locked_set)
+{
+	unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+	secbits |= SECBIT_EXEC_RESTRICT_FILE;
+	EXPECT_EQ(0, test_secbits_set(secbits));
+	secbits |= SECBIT_EXEC_RESTRICT_FILE_LOCKED;
+	EXPECT_EQ(0, test_secbits_set(secbits));
+
+	/* Checks lock set but unchanged. */
+	EXPECT_EQ(variant->error, test_secbits_set(secbits));
+	CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits)));
+
+	secbits &= ~SECBIT_EXEC_RESTRICT_FILE;
+	EXPECT_EQ(EPERM, test_secbits_set(0));
+	CHILD(EXPECT_EQ(EPERM, test_secbits_set(0)));
+}
+
+TEST_F(secbits, check_locked_unset)
+{
+	unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+	secbits |= SECBIT_EXEC_RESTRICT_FILE_LOCKED;
+	EXPECT_EQ(0, test_secbits_set(secbits));
+
+	/* Checks lock unset but unchanged. */
+	EXPECT_EQ(variant->error, test_secbits_set(secbits));
+	CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits)));
+
+	secbits &= ~SECBIT_EXEC_RESTRICT_FILE;
+	EXPECT_EQ(EPERM, test_secbits_set(0));
+	CHILD(EXPECT_EQ(EPERM, test_secbits_set(0)));
+}
+
+TEST_F(secbits, restrict_locked_set)
+{
+	unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+	secbits |= SECBIT_EXEC_DENY_INTERACTIVE;
+	EXPECT_EQ(0, test_secbits_set(secbits));
+	secbits |= SECBIT_EXEC_DENY_INTERACTIVE_LOCKED;
+	EXPECT_EQ(0, test_secbits_set(secbits));
+
+	/* Checks lock set but unchanged. */
+	EXPECT_EQ(variant->error, test_secbits_set(secbits));
+	CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits)));
+
+	secbits &= ~SECBIT_EXEC_DENY_INTERACTIVE;
+	EXPECT_EQ(EPERM, test_secbits_set(0));
+	CHILD(EXPECT_EQ(EPERM, test_secbits_set(0)));
+}
+
+TEST_F(secbits, restrict_locked_unset)
+{
+	unsigned int secbits = prctl(PR_GET_SECUREBITS);
+
+	secbits |= SECBIT_EXEC_DENY_INTERACTIVE_LOCKED;
+	EXPECT_EQ(0, test_secbits_set(secbits));
+
+	/* Checks lock unset but unchanged. */
+	EXPECT_EQ(variant->error, test_secbits_set(secbits));
+	CHILD(EXPECT_EQ(variant->error, test_secbits_set(secbits)));
+
+	secbits &= ~SECBIT_EXEC_DENY_INTERACTIVE;
+	EXPECT_EQ(EPERM, test_secbits_set(0));
+	CHILD(EXPECT_EQ(EPERM, test_secbits_set(0)));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/exec/config b/tools/testing/selftests/exec/config
new file mode 100644
index 000000000000..c308079867b3
--- /dev/null
+++ b/tools/testing/selftests/exec/config
@@ -0,0 +1,2 @@
+CONFIG_BLK_DEV=y
+CONFIG_BLK_DEV_LOOP=y
diff --git a/tools/testing/selftests/exec/false.c b/tools/testing/selftests/exec/false.c
new file mode 100644
index 000000000000..104383ec3a79
--- /dev/null
+++ b/tools/testing/selftests/exec/false.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+int main(void)
+{
+	return 1;
+}
diff --git a/tools/testing/selftests/filesystems/mount-notify/.gitignore b/tools/testing/selftests/filesystems/mount-notify/.gitignore
new file mode 100644
index 000000000000..82a4846cbc4b
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/*_test
diff --git a/tools/testing/selftests/filesystems/mount-notify/Makefile b/tools/testing/selftests/filesystems/mount-notify/Makefile
new file mode 100644
index 000000000000..10be0227b5ae
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
+TEST_GEN_PROGS := mount-notify_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
new file mode 100644
index 000000000000..4a2d5c454fd1
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <linux/fanotify.h>
+#include <unistd.h>
+#include <sys/fanotify.h>
+#include <sys/syscall.h>
+
+#include "../../kselftest_harness.h"
+#include "../statmount/statmount.h"
+
+#ifndef FAN_MNT_ATTACH
+struct fanotify_event_info_mnt {
+	struct fanotify_event_info_header hdr;
+	__u64 mnt_id;
+};
+#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
+#endif
+
+#ifndef FAN_MNT_DETACH
+#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
+#endif
+
+#ifndef FAN_REPORT_MNT
+#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
+#endif
+
+#ifndef FAN_MARK_MNTNS
+#define FAN_MARK_MNTNS 0x00000110
+#endif
+
+static uint64_t get_mnt_id(struct __test_metadata *const _metadata,
+			   const char *path)
+{
+	struct statx sx;
+
+	ASSERT_EQ(statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx), 0);
+	ASSERT_TRUE(!!(sx.stx_mask & STATX_MNT_ID_UNIQUE));
+	return sx.stx_mnt_id;
+}
+
+static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
+
+FIXTURE(fanotify) {
+	int fan_fd;
+	char buf[256];
+	unsigned int rem;
+	void *next;
+	char root_mntpoint[sizeof(root_mntpoint_templ)];
+	int orig_root;
+	int ns_fd;
+	uint64_t root_id;
+};
+
+FIXTURE_SETUP(fanotify)
+{
+	int ret;
+
+	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+	self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+	ASSERT_GE(self->ns_fd, 0);
+
+	ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0);
+
+	strcpy(self->root_mntpoint, root_mntpoint_templ);
+	ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
+
+	self->orig_root = open("/", O_PATH | O_CLOEXEC);
+	ASSERT_GE(self->orig_root, 0);
+
+	ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
+
+	ASSERT_EQ(chroot(self->root_mntpoint), 0);
+
+	ASSERT_EQ(chdir("/"), 0);
+
+	ASSERT_EQ(mkdir("a", 0700), 0);
+
+	ASSERT_EQ(mkdir("b", 0700), 0);
+
+	self->root_id = get_mnt_id(_metadata, "/");
+	ASSERT_NE(self->root_id, 0);
+
+	self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0);
+	ASSERT_GE(self->fan_fd, 0);
+
+	ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS,
+			    FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL);
+	ASSERT_EQ(ret, 0);
+
+	self->rem = 0;
+}
+
+FIXTURE_TEARDOWN(fanotify)
+{
+	ASSERT_EQ(self->rem, 0);
+	close(self->fan_fd);
+
+	ASSERT_EQ(fchdir(self->orig_root), 0);
+
+	ASSERT_EQ(chroot("."), 0);
+
+	EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
+	EXPECT_EQ(chdir(self->root_mntpoint), 0);
+	EXPECT_EQ(chdir("/"), 0);
+	EXPECT_EQ(rmdir(self->root_mntpoint), 0);
+}
+
+static uint64_t expect_notify(struct __test_metadata *const _metadata,
+			      FIXTURE_DATA(fanotify) *self,
+			      uint64_t *mask)
+{
+	struct fanotify_event_metadata *meta;
+	struct fanotify_event_info_mnt *mnt;
+	unsigned int thislen;
+
+	if (!self->rem) {
+		ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf));
+		ASSERT_GT(len, 0);
+
+		self->rem = len;
+		self->next = (void *) self->buf;
+	}
+
+	meta = self->next;
+	ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
+
+	thislen = meta->event_len;
+	self->rem -= thislen;
+	self->next += thislen;
+
+	*mask = meta->mask;
+	thislen -= sizeof(*meta);
+
+	mnt = ((void *) meta) + meta->event_len - thislen;
+
+	ASSERT_EQ(thislen, sizeof(*mnt));
+
+	return mnt->mnt_id;
+}
+
+static void expect_notify_n(struct __test_metadata *const _metadata,
+				 FIXTURE_DATA(fanotify) *self,
+				 unsigned int n, uint64_t mask[], uint64_t mnts[])
+{
+	unsigned int i;
+
+	for (i = 0; i < n; i++)
+		mnts[i] = expect_notify(_metadata, self, &mask[i]);
+}
+
+static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
+				   FIXTURE_DATA(fanotify) *self,
+				   uint64_t expect_mask)
+{
+	uint64_t mntid, mask;
+
+	mntid = expect_notify(_metadata, self, &mask);
+	ASSERT_EQ(expect_mask, mask);
+
+	return mntid;
+}
+
+
+static void expect_notify_mask_n(struct __test_metadata *const _metadata,
+				 FIXTURE_DATA(fanotify) *self,
+				 uint64_t mask, unsigned int n, uint64_t mnts[])
+{
+	unsigned int i;
+
+	for (i = 0; i < n; i++)
+		mnts[i] = expect_notify_mask(_metadata, self, mask);
+}
+
+static void verify_mount_ids(struct __test_metadata *const _metadata,
+			     const uint64_t list1[], const uint64_t list2[],
+			     size_t num)
+{
+	unsigned int i, j;
+
+	// Check that neither list has any duplicates
+	for (i = 0; i < num; i++) {
+		for (j = 0; j < num; j++) {
+			if (i != j) {
+				ASSERT_NE(list1[i], list1[j]);
+				ASSERT_NE(list2[i], list2[j]);
+			}
+		}
+	}
+	// Check that all list1 memebers can be found in list2. Together with
+	// the above it means that the list1 and list2 represent the same sets.
+	for (i = 0; i < num; i++) {
+		for (j = 0; j < num; j++) {
+			if (list1[i] == list2[j])
+				break;
+		}
+		ASSERT_NE(j, num);
+	}
+}
+
+static void check_mounted(struct __test_metadata *const _metadata,
+			  const uint64_t mnts[], size_t num)
+{
+	ssize_t ret;
+	uint64_t *list;
+
+	list = malloc((num + 1) * sizeof(list[0]));
+	ASSERT_NE(list, NULL);
+
+	ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
+	ASSERT_EQ(ret, num);
+
+	verify_mount_ids(_metadata, mnts, list, num);
+
+	free(list);
+}
+
+static void setup_mount_tree(struct __test_metadata *const _metadata,
+			    int log2_num)
+{
+	int ret, i;
+
+	ret = mount("", "/", NULL, MS_SHARED, NULL);
+	ASSERT_EQ(ret, 0);
+
+	for (i = 0; i < log2_num; i++) {
+		ret = mount("/", "/", NULL, MS_BIND, NULL);
+		ASSERT_EQ(ret, 0);
+	}
+}
+
+TEST_F(fanotify, bind)
+{
+	int ret;
+	uint64_t mnts[2] = { self->root_id };
+
+	ret = mount("/", "/", NULL, MS_BIND, NULL);
+	ASSERT_EQ(ret, 0);
+
+	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+	ASSERT_NE(mnts[0], mnts[1]);
+
+	check_mounted(_metadata, mnts, 2);
+
+	// Cleanup
+	uint64_t detach_id;
+	ret = umount("/");
+	ASSERT_EQ(ret, 0);
+
+	detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+	ASSERT_EQ(detach_id, mnts[1]);
+
+	check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, move)
+{
+	int ret;
+	uint64_t mnts[2] = { self->root_id };
+	uint64_t move_id;
+
+	ret = mount("/", "/a", NULL, MS_BIND, NULL);
+	ASSERT_EQ(ret, 0);
+
+	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+	ASSERT_NE(mnts[0], mnts[1]);
+
+	check_mounted(_metadata, mnts, 2);
+
+	ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
+	ASSERT_EQ(ret, 0);
+
+	move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
+	ASSERT_EQ(move_id, mnts[1]);
+
+	// Cleanup
+	ret = umount("/b");
+	ASSERT_EQ(ret, 0);
+
+	check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, propagate)
+{
+	const unsigned int log2_num = 4;
+	const unsigned int num = (1 << log2_num);
+	uint64_t mnts[num];
+
+	setup_mount_tree(_metadata, log2_num);
+
+	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
+
+	mnts[0] = self->root_id;
+	check_mounted(_metadata, mnts, num);
+
+	// Cleanup
+	int ret;
+	uint64_t mnts2[num];
+	ret = umount2("/", MNT_DETACH);
+	ASSERT_EQ(ret, 0);
+
+	ret = mount("", "/", NULL, MS_PRIVATE, NULL);
+	ASSERT_EQ(ret, 0);
+
+	mnts2[0] = self->root_id;
+	expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
+	verify_mount_ids(_metadata, mnts, mnts2, num);
+
+	check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, fsmount)
+{
+	int ret, fs, mnt;
+	uint64_t mnts[2] = { self->root_id };
+
+	fs = fsopen("tmpfs", 0);
+	ASSERT_GE(fs, 0);
+
+        ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
+	ASSERT_EQ(ret, 0);
+
+        mnt = fsmount(fs, 0, 0);
+	ASSERT_GE(mnt, 0);
+
+        close(fs);
+
+	ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
+	ASSERT_EQ(ret, 0);
+
+        close(mnt);
+
+	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+	ASSERT_NE(mnts[0], mnts[1]);
+
+	check_mounted(_metadata, mnts, 2);
+
+	// Cleanup
+	uint64_t detach_id;
+	ret = umount("/a");
+	ASSERT_EQ(ret, 0);
+
+	detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+	ASSERT_EQ(detach_id, mnts[1]);
+
+	check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, reparent)
+{
+	uint64_t mnts[6] = { self->root_id };
+	uint64_t dmnts[3];
+	uint64_t masks[3];
+	unsigned int i;
+	int ret;
+
+	// Create setup with a[1] -> b[2] propagation
+	ret = mount("/", "/a", NULL, MS_BIND, NULL);
+	ASSERT_EQ(ret, 0);
+
+	ret = mount("", "/a", NULL, MS_SHARED, NULL);
+	ASSERT_EQ(ret, 0);
+
+	ret = mount("/a", "/b", NULL, MS_BIND, NULL);
+	ASSERT_EQ(ret, 0);
+
+	ret = mount("", "/b", NULL, MS_SLAVE, NULL);
+	ASSERT_EQ(ret, 0);
+
+	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+	check_mounted(_metadata, mnts, 3);
+
+	// Mount on a[3], which is propagated to b[4]
+	ret = mount("/", "/a", NULL, MS_BIND, NULL);
+	ASSERT_EQ(ret, 0);
+
+	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
+
+	check_mounted(_metadata, mnts, 5);
+
+	// Mount on b[5], not propagated
+	ret = mount("/", "/b", NULL, MS_BIND, NULL);
+	ASSERT_EQ(ret, 0);
+
+	mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+	check_mounted(_metadata, mnts, 6);
+
+	// Umount a[3], which is propagated to b[4], but not b[5]
+	// This will result in b[5] "falling" on b[2]
+	ret = umount("/a");
+	ASSERT_EQ(ret, 0);
+
+	expect_notify_n(_metadata, self, 3, masks, dmnts);
+	verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
+
+	for (i = 0; i < 3; i++) {
+		if (dmnts[i] == mnts[5]) {
+			ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
+		} else {
+			ASSERT_EQ(masks[i], FAN_MNT_DETACH);
+		}
+	}
+
+	mnts[3] = mnts[5];
+	check_mounted(_metadata, mnts, 4);
+
+	// Cleanup
+	ret = umount("/b");
+	ASSERT_EQ(ret, 0);
+
+	ret = umount("/a");
+	ASSERT_EQ(ret, 0);
+
+	ret = umount("/b");
+	ASSERT_EQ(ret, 0);
+
+	expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
+	verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
+
+	check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, rmdir)
+{
+	uint64_t mnts[3] = { self->root_id };
+	int ret;
+
+	ret = mount("/", "/a", NULL, MS_BIND, NULL);
+	ASSERT_EQ(ret, 0);
+
+	ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
+	ASSERT_EQ(ret, 0);
+
+	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+	check_mounted(_metadata, mnts, 3);
+
+	ret = chdir("/a");
+	ASSERT_EQ(ret, 0);
+
+	ret = fork();
+	ASSERT_GE(ret, 0);
+
+	if (ret == 0) {
+		chdir("/");
+		unshare(CLONE_NEWNS);
+		mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+		umount2("/a", MNT_DETACH);
+		// This triggers a detach in the other namespace
+		rmdir("/a");
+		exit(0);
+	}
+	wait(NULL);
+
+	expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
+	check_mounted(_metadata, mnts, 1);
+
+	// Cleanup
+	ret = chdir("/");
+	ASSERT_EQ(ret, 0);
+}
+
+TEST_F(fanotify, pivot_root)
+{
+	uint64_t mnts[3] = { self->root_id };
+	uint64_t mnts2[3];
+	int ret;
+
+	ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
+	ASSERT_EQ(ret, 0);
+
+	mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+	ret = mkdir("/a/new", 0700);
+	ASSERT_EQ(ret, 0);
+
+	ret = mkdir("/a/old", 0700);
+	ASSERT_EQ(ret, 0);
+
+	ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
+	ASSERT_EQ(ret, 0);
+
+	mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+	check_mounted(_metadata, mnts, 3);
+
+	ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
+	ASSERT_EQ(ret, 0);
+
+	expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
+	verify_mount_ids(_metadata, mnts, mnts2, 2);
+	check_mounted(_metadata, mnts, 3);
+
+	// Cleanup
+	ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
+	ASSERT_EQ(ret, 0);
+
+	ret = umount("/a/new");
+	ASSERT_EQ(ret, 0);
+
+	ret = umount("/a");
+	ASSERT_EQ(ret, 0);
+
+	check_mounted(_metadata, mnts, 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
index 457cf76f3c5f..a3d8015897e9 100644
--- a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
+++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
@@ -3,6 +3,8 @@
 
 #define _GNU_SOURCE
 #include <fcntl.h>
+#include <linux/auto_dev-ioctl.h>
+#include <linux/errno.h>
 #include <sched.h>
 #include <stdio.h>
 #include <string.h>
@@ -146,4 +148,16 @@ TEST_F(iterate_mount_namespaces, iterate_backward)
 	}
 }
 
+TEST_F(iterate_mount_namespaces, nfs_valid_ioctl)
+{
+	ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_OPENMOUNT, NULL), 0);
+	ASSERT_EQ(errno, ENOTTY);
+
+	ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_CLOSEMOUNT, NULL), 0);
+	ASSERT_EQ(errno, ENOTTY);
+
+	ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_READY, NULL), 0);
+	ASSERT_EQ(errno, ENOTTY);
+}
+
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile
index e8d1adb021af..6c661232b3b5 100644
--- a/tools/testing/selftests/filesystems/overlayfs/Makefile
+++ b/tools/testing/selftests/filesystems/overlayfs/Makefile
@@ -1,7 +1,14 @@
 # SPDX-License-Identifier: GPL-2.0
 
-TEST_GEN_PROGS := dev_in_maps set_layers_via_fds
+CFLAGS += -Wall
+CFLAGS += $(KHDR_INCLUDES)
+LDLIBS += -lcap
 
-CFLAGS := -Wall -Werror
+LOCAL_HDRS += wrappers.h log.h
+
+TEST_GEN_PROGS := dev_in_maps
+TEST_GEN_PROGS += set_layers_via_fds
 
 include ../../lib.mk
+
+$(OUTPUT)/set_layers_via_fds: ../utils.c
diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
index 1d0ae785a667..5074e64e74a8 100644
--- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
+++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
@@ -6,26 +6,40 @@
 #include <sched.h>
 #include <stdio.h>
 #include <string.h>
+#include <sys/socket.h>
 #include <sys/stat.h>
+#include <sys/sysmacros.h>
 #include <sys/mount.h>
 #include <unistd.h>
 
 #include "../../kselftest_harness.h"
+#include "../../pidfd/pidfd.h"
 #include "log.h"
+#include "../utils.h"
 #include "wrappers.h"
 
 FIXTURE(set_layers_via_fds) {
+	int pidfd;
 };
 
 FIXTURE_SETUP(set_layers_via_fds)
 {
-	ASSERT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
+	self->pidfd = -EBADF;
+	EXPECT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
+	EXPECT_EQ(mkdir("/set_layers_via_fds_tmpfs", 0755), 0);
 }
 
 FIXTURE_TEARDOWN(set_layers_via_fds)
 {
+	if (self->pidfd >= 0) {
+		EXPECT_EQ(sys_pidfd_send_signal(self->pidfd, SIGKILL, NULL, 0), 0);
+		EXPECT_EQ(close(self->pidfd), 0);
+	}
 	umount2("/set_layers_via_fds", 0);
-	ASSERT_EQ(rmdir("/set_layers_via_fds"), 0);
+	EXPECT_EQ(rmdir("/set_layers_via_fds"), 0);
+
+	umount2("/set_layers_via_fds_tmpfs", 0);
+	EXPECT_EQ(rmdir("/set_layers_via_fds_tmpfs"), 0);
 }
 
 TEST_F(set_layers_via_fds, set_layers_via_fds)
@@ -214,4 +228,493 @@ TEST_F(set_layers_via_fds, set_500_layers_via_fds)
 	ASSERT_EQ(close(fd_overlay), 0);
 }
 
+TEST_F(set_layers_via_fds, set_override_creds)
+{
+	int fd_context, fd_tmpfs, fd_overlay;
+	int layer_fds[] = { [0 ... 3] = -EBADF };
+	pid_t pid;
+	int pidfd;
+
+	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+	ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+	fd_context = sys_fsopen("tmpfs", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_tmpfs, 0);
+	ASSERT_EQ(close(fd_context), 0);
+
+	ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+	layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+	ASSERT_GE(layer_fds[0], 0);
+
+	layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+	ASSERT_GE(layer_fds[1], 0);
+
+	layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+	ASSERT_GE(layer_fds[2], 0);
+
+	layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+	ASSERT_GE(layer_fds[3], 0);
+
+	ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+	ASSERT_EQ(close(fd_tmpfs), 0);
+
+	fd_context = sys_fsopen("overlay", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir",   NULL, layer_fds[0]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir",  NULL, layer_fds[1]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
+
+	pid = create_child(&pidfd, 0);
+	ASSERT_GE(pid, 0);
+	if (pid == 0) {
+		if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+			TH_LOG("sys_fsconfig should have succeeded");
+			_exit(EXIT_FAILURE);
+		}
+
+		_exit(EXIT_SUCCESS);
+	}
+	ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+	ASSERT_GE(close(pidfd), 0);
+
+	pid = create_child(&pidfd, 0);
+	ASSERT_GE(pid, 0);
+	if (pid == 0) {
+		if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "nooverride_creds", NULL, 0)) {
+			TH_LOG("sys_fsconfig should have succeeded");
+			_exit(EXIT_FAILURE);
+		}
+
+		_exit(EXIT_SUCCESS);
+	}
+	ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+	ASSERT_GE(close(pidfd), 0);
+
+	pid = create_child(&pidfd, 0);
+	ASSERT_GE(pid, 0);
+	if (pid == 0) {
+		if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+			TH_LOG("sys_fsconfig should have succeeded");
+			_exit(EXIT_FAILURE);
+		}
+
+		_exit(EXIT_SUCCESS);
+	}
+	ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+	ASSERT_GE(close(pidfd), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+	fd_overlay = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_overlay, 0);
+
+	ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+	ASSERT_EQ(close(fd_context), 0);
+	ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_override_creds_invalid)
+{
+	int fd_context, fd_tmpfs, fd_overlay, ret;
+	int layer_fds[] = { [0 ... 3] = -EBADF };
+	pid_t pid;
+	int fd_userns1, fd_userns2;
+	int ipc_sockets[2];
+	char c;
+	const unsigned int predictable_fd_context_nr = 123;
+
+	fd_userns1 = get_userns_fd(0, 0, 10000);
+	ASSERT_GE(fd_userns1, 0);
+
+	fd_userns2 = get_userns_fd(0, 1234, 10000);
+	ASSERT_GE(fd_userns2, 0);
+
+	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	ASSERT_GE(ret, 0);
+
+	pid = create_child(&self->pidfd, 0);
+	ASSERT_GE(pid, 0);
+	if (pid == 0) {
+		if (close(ipc_sockets[0])) {
+			TH_LOG("close should have succeeded");
+			_exit(EXIT_FAILURE);
+		}
+
+		if (!switch_userns(fd_userns2, 0, 0, false)) {
+			TH_LOG("switch_userns should have succeeded");
+			_exit(EXIT_FAILURE);
+		}
+
+		if (read_nointr(ipc_sockets[1], &c, 1) != 1) {
+			TH_LOG("read_nointr should have succeeded");
+			_exit(EXIT_FAILURE);
+		}
+
+		if (close(ipc_sockets[1])) {
+			TH_LOG("close should have succeeded");
+			_exit(EXIT_FAILURE);
+		}
+
+		if (!sys_fsconfig(predictable_fd_context_nr, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+			TH_LOG("sys_fsconfig should have failed");
+			_exit(EXIT_FAILURE);
+		}
+
+		_exit(EXIT_SUCCESS);
+	}
+
+	ASSERT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(switch_userns(fd_userns1, 0, 0, false), true);
+	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+	ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+	fd_context = sys_fsopen("tmpfs", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_tmpfs, 0);
+	ASSERT_EQ(close(fd_context), 0);
+
+	ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+	layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+	ASSERT_GE(layer_fds[0], 0);
+
+	layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+	ASSERT_GE(layer_fds[1], 0);
+
+	layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+	ASSERT_GE(layer_fds[2], 0);
+
+	layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+	ASSERT_GE(layer_fds[3], 0);
+
+	ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+	ASSERT_EQ(close(fd_tmpfs), 0);
+
+	fd_context = sys_fsopen("overlay", 0);
+	ASSERT_GE(fd_context, 0);
+	ASSERT_EQ(dup3(fd_context, predictable_fd_context_nr, 0), predictable_fd_context_nr);
+	ASSERT_EQ(close(fd_context), 0);
+	fd_context = predictable_fd_context_nr;
+	ASSERT_EQ(write_nointr(ipc_sockets[0], "1", 1), 1);
+	ASSERT_EQ(close(ipc_sockets[0]), 0);
+
+	ASSERT_EQ(wait_for_pid(pid), 0);
+	ASSERT_EQ(close(self->pidfd), 0);
+	self->pidfd = -EBADF;
+
+	ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir",   NULL, layer_fds[0]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir",  NULL, layer_fds[1]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+
+	for (int i = 0; i < ARRAY_SIZE(layer_fds); i++)
+		ASSERT_EQ(close(layer_fds[i]), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "userxattr", NULL, 0), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+	fd_overlay = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_overlay, 0);
+
+	ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+	ASSERT_EQ(close(fd_context), 0);
+	ASSERT_EQ(close(fd_overlay), 0);
+	ASSERT_EQ(close(fd_userns1), 0);
+	ASSERT_EQ(close(fd_userns2), 0);
+}
+
+TEST_F(set_layers_via_fds, set_override_creds_nomknod)
+{
+	int fd_context, fd_tmpfs, fd_overlay;
+	int layer_fds[] = { [0 ... 3] = -EBADF };
+	pid_t pid;
+	int pidfd;
+
+	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+	ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+	fd_context = sys_fsopen("tmpfs", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_tmpfs, 0);
+	ASSERT_EQ(close(fd_context), 0);
+
+	ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+	layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+	ASSERT_GE(layer_fds[0], 0);
+
+	layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+	ASSERT_GE(layer_fds[1], 0);
+
+	layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+	ASSERT_GE(layer_fds[2], 0);
+
+	layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+	ASSERT_GE(layer_fds[3], 0);
+
+	ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+	ASSERT_EQ(close(fd_tmpfs), 0);
+
+	fd_context = sys_fsopen("overlay", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir",   NULL, layer_fds[0]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir",  NULL, layer_fds[1]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "userxattr", NULL, 0), 0);
+
+	pid = create_child(&pidfd, 0);
+	ASSERT_GE(pid, 0);
+	if (pid == 0) {
+		if (!cap_down(CAP_MKNOD))
+			_exit(EXIT_FAILURE);
+
+		if (!cap_down(CAP_SYS_ADMIN))
+			_exit(EXIT_FAILURE);
+
+		if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0))
+			_exit(EXIT_FAILURE);
+
+		_exit(EXIT_SUCCESS);
+	}
+	ASSERT_EQ(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+	ASSERT_GE(close(pidfd), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+	fd_overlay = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_overlay, 0);
+
+	ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+	ASSERT_EQ(mknodat(fd_overlay, "dev-zero", S_IFCHR | 0644, makedev(1, 5)), -1);
+	ASSERT_EQ(errno, EPERM);
+
+	ASSERT_EQ(close(fd_context), 0);
+	ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_500_layers_via_opath_fds)
+{
+	int fd_context, fd_tmpfs, fd_overlay, fd_work, fd_upper, fd_lower;
+	int layer_fds[500] = { [0 ... 499] = -EBADF };
+
+	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+	ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+	fd_context = sys_fsopen("tmpfs", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_tmpfs, 0);
+	ASSERT_EQ(close(fd_context), 0);
+
+	for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+		char path[100];
+
+		sprintf(path, "l%d", i);
+		ASSERT_EQ(mkdirat(fd_tmpfs, path, 0755), 0);
+		layer_fds[i] = openat(fd_tmpfs, path, O_DIRECTORY | O_PATH);
+		ASSERT_GE(layer_fds[i], 0);
+	}
+
+	ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+	fd_work = openat(fd_tmpfs, "w", O_DIRECTORY | O_PATH);
+	ASSERT_GE(fd_work, 0);
+
+	ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+	fd_upper = openat(fd_tmpfs, "u", O_DIRECTORY | O_PATH);
+	ASSERT_GE(fd_upper, 0);
+
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l501", 0755), 0);
+	fd_lower = openat(fd_tmpfs, "l501", O_DIRECTORY | O_PATH);
+	ASSERT_GE(fd_lower, 0);
+
+	ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+	ASSERT_EQ(close(fd_tmpfs), 0);
+
+	fd_context = sys_fsopen("overlay", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir",   NULL, fd_work), 0);
+	ASSERT_EQ(close(fd_work), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir",  NULL, fd_upper), 0);
+	ASSERT_EQ(close(fd_upper), 0);
+
+	for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+		ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[i]), 0);
+		ASSERT_EQ(close(layer_fds[i]), 0);
+	}
+
+	ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, fd_lower), 0);
+	ASSERT_EQ(close(fd_lower), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+	fd_overlay = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_overlay, 0);
+	ASSERT_EQ(close(fd_context), 0);
+	ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_layers_via_detached_mount_fds)
+{
+	int fd_context, fd_tmpfs, fd_overlay, fd_tmp;
+	int layer_fds[] = { [0 ... 8] = -EBADF };
+	bool layers_found[] = { [0 ... 8] =  false };
+	size_t len = 0;
+	char *line = NULL;
+	FILE *f_mountinfo;
+
+	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+	ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+	fd_context = sys_fsopen("tmpfs", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_tmpfs, 0);
+	ASSERT_EQ(close(fd_context), 0);
+
+	ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "u/upper", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "u/work", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l3", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "l4", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "d1", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "d2", 0755), 0);
+	ASSERT_EQ(mkdirat(fd_tmpfs, "d3", 0755), 0);
+
+	ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/set_layers_via_fds_tmpfs", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+	fd_tmp = open_tree(fd_tmpfs, "u", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(fd_tmp, 0);
+
+	layer_fds[0] = openat(fd_tmp, "upper", O_CLOEXEC | O_DIRECTORY | O_PATH);
+	ASSERT_GE(layer_fds[0], 0);
+
+	layer_fds[1] = openat(fd_tmp, "work", O_CLOEXEC | O_DIRECTORY | O_PATH);
+	ASSERT_GE(layer_fds[1], 0);
+
+	layer_fds[2] = open_tree(fd_tmpfs, "l1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[2], 0);
+
+	layer_fds[3] = open_tree(fd_tmpfs, "l2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[3], 0);
+
+	layer_fds[4] = open_tree(fd_tmpfs, "l3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[4], 0);
+
+	layer_fds[5] = open_tree(fd_tmpfs, "l4", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[5], 0);
+
+	layer_fds[6] = open_tree(fd_tmpfs, "d1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[6], 0);
+
+	layer_fds[7] = open_tree(fd_tmpfs, "d2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[7], 0);
+
+	layer_fds[8] = open_tree(fd_tmpfs, "d3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(layer_fds[8], 0);
+
+	ASSERT_EQ(close(fd_tmpfs), 0);
+
+	fd_context = sys_fsopen("overlay", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir",  NULL, layer_fds[0]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir",   NULL, layer_fds[1]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[4]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[5]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+",  NULL, layer_fds[6]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+",  NULL, layer_fds[7]), 0);
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+",  NULL, layer_fds[8]), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+	fd_overlay = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_overlay, 0);
+
+	ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+	f_mountinfo = fopen("/proc/self/mountinfo", "r");
+	ASSERT_NE(f_mountinfo, NULL);
+
+	while (getline(&line, &len, f_mountinfo) != -1) {
+		char *haystack = line;
+
+		if (strstr(haystack, "workdir=/tmp/w"))
+			layers_found[0] = true;
+		if (strstr(haystack, "upperdir=/tmp/u"))
+			layers_found[1] = true;
+		if (strstr(haystack, "lowerdir+=/tmp/l1"))
+			layers_found[2] = true;
+		if (strstr(haystack, "lowerdir+=/tmp/l2"))
+			layers_found[3] = true;
+		if (strstr(haystack, "lowerdir+=/tmp/l3"))
+			layers_found[4] = true;
+		if (strstr(haystack, "lowerdir+=/tmp/l4"))
+			layers_found[5] = true;
+		if (strstr(haystack, "datadir+=/tmp/d1"))
+			layers_found[6] = true;
+		if (strstr(haystack, "datadir+=/tmp/d2"))
+			layers_found[7] = true;
+		if (strstr(haystack, "datadir+=/tmp/d3"))
+			layers_found[8] = true;
+	}
+	free(line);
+
+	for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+		ASSERT_EQ(layers_found[i], true);
+		ASSERT_EQ(close(layer_fds[i]), 0);
+	}
+
+	ASSERT_EQ(close(fd_context), 0);
+	ASSERT_EQ(close(fd_overlay), 0);
+	ASSERT_EQ(fclose(f_mountinfo), 0);
+}
+
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/overlayfs/wrappers.h
index 071b95fd2ac0..c38bc48e0cfa 100644
--- a/tools/testing/selftests/filesystems/overlayfs/wrappers.h
+++ b/tools/testing/selftests/filesystems/overlayfs/wrappers.h
@@ -44,4 +44,21 @@ static inline int sys_move_mount(int from_dfd, const char *from_pathname,
 		       to_pathname, flags);
 }
 
+#ifndef OPEN_TREE_CLONE
+#define OPEN_TREE_CLONE 1
+#endif
+
+#ifndef OPEN_TREE_CLOEXEC
+#define OPEN_TREE_CLOEXEC O_CLOEXEC
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000
+#endif
+
+static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
+{
+	return syscall(__NR_open_tree, dfd, filename, flags);
+}
+
 #endif
diff --git a/tools/testing/selftests/filesystems/statmount/.gitignore b/tools/testing/selftests/filesystems/statmount/.gitignore
index 82a4846cbc4b..973363ad66a2 100644
--- a/tools/testing/selftests/filesystems/statmount/.gitignore
+++ b/tools/testing/selftests/filesystems/statmount/.gitignore
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
+statmount_test_ns
 /*_test
diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h
index f4294bab9d73..a7a5289ddae9 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount.h
+++ b/tools/testing/selftests/filesystems/statmount/statmount.h
@@ -25,7 +25,7 @@ static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
 	return syscall(__NR_statmount, &req, buf, bufsize, flags);
 }
 
-static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
+static inline ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
 			 uint64_t last_mnt_id, uint64_t list[], size_t num,
 			 unsigned int flags)
 {
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
index 8eb6aa606a0d..46d289611ce8 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -383,6 +383,10 @@ static void test_statmount_mnt_point(void)
 		return;
 	}
 
+	if (!(sm->mask & STATMOUNT_MNT_POINT)) {
+		ksft_test_result_fail("missing STATMOUNT_MNT_POINT in mask\n");
+		return;
+	}
 	if (strcmp(sm->str + sm->mnt_point, "/") != 0) {
 		ksft_test_result_fail("unexpected mount point: '%s' != '/'\n",
 				      sm->str + sm->mnt_point);
@@ -408,6 +412,10 @@ static void test_statmount_mnt_root(void)
 				      strerror(errno));
 		return;
 	}
+	if (!(sm->mask & STATMOUNT_MNT_ROOT)) {
+		ksft_test_result_fail("missing STATMOUNT_MNT_ROOT in mask\n");
+		return;
+	}
 	mnt_root = sm->str + sm->mnt_root;
 	last_root = strrchr(mnt_root, '/');
 	if (last_root)
@@ -437,6 +445,10 @@ static void test_statmount_fs_type(void)
 				      strerror(errno));
 		return;
 	}
+	if (!(sm->mask & STATMOUNT_FS_TYPE)) {
+		ksft_test_result_fail("missing STATMOUNT_FS_TYPE in mask\n");
+		return;
+	}
 	fs_type = sm->str + sm->fs_type;
 	for (s = known_fs; s != NULL; s++) {
 		if (strcmp(fs_type, *s) == 0)
@@ -464,6 +476,11 @@ static void test_statmount_mnt_opts(void)
 		return;
 	}
 
+	if (!(sm->mask & STATMOUNT_MNT_BASIC)) {
+		ksft_test_result_fail("missing STATMOUNT_MNT_BASIC in mask\n");
+		return;
+	}
+
 	while (getline(&line, &len, f_mountinfo) != -1) {
 		int i;
 		char *p, *p2;
@@ -514,7 +531,10 @@ static void test_statmount_mnt_opts(void)
 		if (p2)
 			*p2 = '\0';
 
-		statmount_opts = sm->str + sm->mnt_opts;
+		if (sm->mask & STATMOUNT_MNT_OPTS)
+			statmount_opts = sm->str + sm->mnt_opts;
+		else
+			statmount_opts = "";
 		if (strcmp(statmount_opts, p) != 0)
 			ksft_test_result_fail(
 				"unexpected mount options: '%s' != '%s'\n",
diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c
new file mode 100644
index 000000000000..e553c89c5b19
--- /dev/null
+++ b/tools/testing/selftests/filesystems/utils.c
@@ -0,0 +1,501 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <fcntl.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <grp.h>
+#include <linux/limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/fsuid.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/xattr.h>
+
+#include "utils.h"
+
+#define MAX_USERNS_LEVEL 32
+
+#define syserror(format, ...)                           \
+	({                                              \
+		fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__); \
+		(-errno);                               \
+	})
+
+#define syserror_set(__ret__, format, ...)                    \
+	({                                                    \
+		typeof(__ret__) __internal_ret__ = (__ret__); \
+		errno = labs(__ret__);                        \
+		fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__);       \
+		__internal_ret__;                             \
+	})
+
+#define STRLITERALLEN(x) (sizeof(""x"") - 1)
+
+#define INTTYPE_TO_STRLEN(type)             \
+	(2 + (sizeof(type) <= 1             \
+		  ? 3                       \
+		  : sizeof(type) <= 2       \
+			? 5                 \
+			: sizeof(type) <= 4 \
+			      ? 10          \
+			      : sizeof(type) <= 8 ? 20 : sizeof(int[-2 * (sizeof(type) > 8)])))
+
+#define list_for_each(__iterator, __list) \
+	for (__iterator = (__list)->next; __iterator != __list; __iterator = __iterator->next)
+
+typedef enum idmap_type_t {
+	ID_TYPE_UID,
+	ID_TYPE_GID
+} idmap_type_t;
+
+struct id_map {
+	idmap_type_t map_type;
+	__u32 nsid;
+	__u32 hostid;
+	__u32 range;
+};
+
+struct list {
+	void *elem;
+	struct list *next;
+	struct list *prev;
+};
+
+struct userns_hierarchy {
+	int fd_userns;
+	int fd_event;
+	unsigned int level;
+	struct list id_map;
+};
+
+static inline void list_init(struct list *list)
+{
+	list->elem = NULL;
+	list->next = list->prev = list;
+}
+
+static inline int list_empty(const struct list *list)
+{
+	return list == list->next;
+}
+
+static inline void __list_add(struct list *new, struct list *prev, struct list *next)
+{
+	next->prev = new;
+	new->next = next;
+	new->prev = prev;
+	prev->next = new;
+}
+
+static inline void list_add_tail(struct list *head, struct list *list)
+{
+	__list_add(list, head->prev, head);
+}
+
+static inline void list_del(struct list *list)
+{
+	struct list *next, *prev;
+
+	next = list->next;
+	prev = list->prev;
+	next->prev = prev;
+	prev->next = next;
+}
+
+static ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+	ssize_t ret;
+
+	do {
+		ret = read(fd, buf, count);
+	} while (ret < 0 && errno == EINTR);
+
+	return ret;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+	ssize_t ret;
+
+	do {
+		ret = write(fd, buf, count);
+	} while (ret < 0 && errno == EINTR);
+
+	return ret;
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+	void *stack;
+
+	stack = malloc(__STACK_SIZE);
+	if (!stack)
+		return -ENOMEM;
+
+#ifdef __ia64__
+	return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+	return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int get_userns_fd_cb(void *data)
+{
+	for (;;)
+		pause();
+	_exit(0);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+	int status, ret;
+
+again:
+	ret = waitpid(pid, &status, 0);
+	if (ret == -1) {
+		if (errno == EINTR)
+			goto again;
+
+		return -1;
+	}
+
+	if (!WIFEXITED(status))
+		return -1;
+
+	return WEXITSTATUS(status);
+}
+
+static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf, size_t buf_size)
+{
+	int fd = -EBADF, setgroups_fd = -EBADF;
+	int fret = -1;
+	int ret;
+	char path[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
+		  STRLITERALLEN("/setgroups") + 1];
+
+	if (geteuid() != 0 && map_type == ID_TYPE_GID) {
+		ret = snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
+		if (ret < 0 || ret >= sizeof(path))
+			goto out;
+
+		setgroups_fd = open(path, O_WRONLY | O_CLOEXEC);
+		if (setgroups_fd < 0 && errno != ENOENT) {
+			syserror("Failed to open \"%s\"", path);
+			goto out;
+		}
+
+		if (setgroups_fd >= 0) {
+			ret = write_nointr(setgroups_fd, "deny\n", STRLITERALLEN("deny\n"));
+			if (ret != STRLITERALLEN("deny\n")) {
+				syserror("Failed to write \"deny\" to \"/proc/%d/setgroups\"", pid);
+				goto out;
+			}
+		}
+	}
+
+	ret = snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid, map_type == ID_TYPE_UID ? 'u' : 'g');
+	if (ret < 0 || ret >= sizeof(path))
+		goto out;
+
+	fd = open(path, O_WRONLY | O_CLOEXEC);
+	if (fd < 0) {
+		syserror("Failed to open \"%s\"", path);
+		goto out;
+	}
+
+	ret = write_nointr(fd, buf, buf_size);
+	if (ret != buf_size) {
+		syserror("Failed to write %cid mapping to \"%s\"",
+			 map_type == ID_TYPE_UID ? 'u' : 'g', path);
+		goto out;
+	}
+
+	fret = 0;
+out:
+	close(fd);
+	close(setgroups_fd);
+
+	return fret;
+}
+
+static int map_ids_from_idmap(struct list *idmap, pid_t pid)
+{
+	int fill, left;
+	char mapbuf[4096] = {};
+	bool had_entry = false;
+	idmap_type_t map_type, u_or_g;
+
+	if (list_empty(idmap))
+		return 0;
+
+	for (map_type = ID_TYPE_UID, u_or_g = 'u';
+	     map_type <= ID_TYPE_GID; map_type++, u_or_g = 'g') {
+		char *pos = mapbuf;
+		int ret;
+		struct list *iterator;
+
+
+		list_for_each(iterator, idmap) {
+			struct id_map *map = iterator->elem;
+			if (map->map_type != map_type)
+				continue;
+
+			had_entry = true;
+
+			left = 4096 - (pos - mapbuf);
+			fill = snprintf(pos, left, "%u %u %u\n", map->nsid, map->hostid, map->range);
+			/*
+			 * The kernel only takes <= 4k for writes to
+			 * /proc/<pid>/{g,u}id_map
+			 */
+			if (fill <= 0 || fill >= left)
+				return syserror_set(-E2BIG, "Too many %cid mappings defined", u_or_g);
+
+			pos += fill;
+		}
+		if (!had_entry)
+			continue;
+
+		ret = write_id_mapping(map_type, pid, mapbuf, pos - mapbuf);
+		if (ret < 0)
+			return syserror("Failed to write mapping: %s", mapbuf);
+
+		memset(mapbuf, 0, sizeof(mapbuf));
+	}
+
+	return 0;
+}
+
+static int get_userns_fd_from_idmap(struct list *idmap)
+{
+	int ret;
+	pid_t pid;
+	char path_ns[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
+		     STRLITERALLEN("/ns/user") + 1];
+
+	pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER | CLONE_NEWNS);
+	if (pid < 0)
+		return -errno;
+
+	ret = map_ids_from_idmap(idmap, pid);
+	if (ret < 0)
+		return ret;
+
+	ret = snprintf(path_ns, sizeof(path_ns), "/proc/%d/ns/user", pid);
+	if (ret < 0 || (size_t)ret >= sizeof(path_ns))
+		ret = -EIO;
+	else
+		ret = open(path_ns, O_RDONLY | O_CLOEXEC | O_NOCTTY);
+
+	(void)kill(pid, SIGKILL);
+	(void)wait_for_pid(pid);
+	return ret;
+}
+
+int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
+{
+	struct list head, uid_mapl, gid_mapl;
+	struct id_map uid_map = {
+		.map_type	= ID_TYPE_UID,
+		.nsid		= nsid,
+		.hostid		= hostid,
+		.range		= range,
+	};
+	struct id_map gid_map = {
+		.map_type	= ID_TYPE_GID,
+		.nsid		= nsid,
+		.hostid		= hostid,
+		.range		= range,
+	};
+
+	list_init(&head);
+	uid_mapl.elem = &uid_map;
+	gid_mapl.elem = &gid_map;
+	list_add_tail(&head, &uid_mapl);
+	list_add_tail(&head, &gid_mapl);
+
+	return get_userns_fd_from_idmap(&head);
+}
+
+bool switch_ids(uid_t uid, gid_t gid)
+{
+	if (setgroups(0, NULL))
+		return syserror("failure: setgroups");
+
+	if (setresgid(gid, gid, gid))
+		return syserror("failure: setresgid");
+
+	if (setresuid(uid, uid, uid))
+		return syserror("failure: setresuid");
+
+	/* Ensure we can access proc files from processes we can ptrace. */
+	if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0))
+		return syserror("failure: make dumpable");
+
+	return true;
+}
+
+static int create_userns_hierarchy(struct userns_hierarchy *h);
+
+static int userns_fd_cb(void *data)
+{
+	struct userns_hierarchy *h = data;
+	char c;
+	int ret;
+
+	ret = read_nointr(h->fd_event, &c, 1);
+	if (ret < 0)
+		return syserror("failure: read from socketpair");
+
+	/* Only switch ids if someone actually wrote a mapping for us. */
+	if (c == '1') {
+		if (!switch_ids(0, 0))
+			return syserror("failure: switch ids to 0");
+	}
+
+	ret = write_nointr(h->fd_event, "1", 1);
+	if (ret < 0)
+		return syserror("failure: write to socketpair");
+
+	ret = create_userns_hierarchy(++h);
+	if (ret < 0)
+		return syserror("failure: userns level %d", h->level);
+
+	return 0;
+}
+
+static int create_userns_hierarchy(struct userns_hierarchy *h)
+{
+	int fret = -1;
+	char c;
+	int fd_socket[2];
+	int fd_userns = -EBADF, ret = -1;
+	ssize_t bytes;
+	pid_t pid;
+	char path[256];
+
+	if (h->level == MAX_USERNS_LEVEL)
+		return 0;
+
+	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fd_socket);
+	if (ret < 0)
+		return syserror("failure: create socketpair");
+
+	/* Note the CLONE_FILES | CLONE_VM when mucking with fds and memory. */
+	h->fd_event = fd_socket[1];
+	pid = do_clone(userns_fd_cb, h, CLONE_NEWUSER | CLONE_FILES | CLONE_VM);
+	if (pid < 0) {
+		syserror("failure: userns level %d", h->level);
+		goto out_close;
+	}
+
+	ret = map_ids_from_idmap(&h->id_map, pid);
+	if (ret < 0) {
+		kill(pid, SIGKILL);
+		syserror("failure: writing id mapping for userns level %d for %d", h->level, pid);
+		goto out_wait;
+	}
+
+	if (!list_empty(&h->id_map))
+		bytes = write_nointr(fd_socket[0], "1", 1); /* Inform the child we wrote a mapping. */
+	else
+		bytes = write_nointr(fd_socket[0], "0", 1); /* Inform the child we didn't write a mapping. */
+	if (bytes < 0) {
+		kill(pid, SIGKILL);
+		syserror("failure: write to socketpair");
+		goto out_wait;
+	}
+
+	/* Wait for child to set*id() and become dumpable. */
+	bytes = read_nointr(fd_socket[0], &c, 1);
+	if (bytes < 0) {
+		kill(pid, SIGKILL);
+		syserror("failure: read from socketpair");
+		goto out_wait;
+	}
+
+	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+	fd_userns = open(path, O_RDONLY | O_CLOEXEC);
+	if (fd_userns < 0) {
+		kill(pid, SIGKILL);
+		syserror("failure: open userns level %d for %d", h->level, pid);
+		goto out_wait;
+	}
+
+	fret = 0;
+
+out_wait:
+	if (!wait_for_pid(pid) && !fret) {
+		h->fd_userns = fd_userns;
+		fd_userns = -EBADF;
+	}
+
+out_close:
+	if (fd_userns >= 0)
+		close(fd_userns);
+	close(fd_socket[0]);
+	close(fd_socket[1]);
+	return fret;
+}
+
+/* caps_down - lower all effective caps */
+int caps_down(void)
+{
+	bool fret = false;
+	cap_t caps = NULL;
+	int ret = -1;
+
+	caps = cap_get_proc();
+	if (!caps)
+		goto out;
+
+	ret = cap_clear_flag(caps, CAP_EFFECTIVE);
+	if (ret)
+		goto out;
+
+	ret = cap_set_proc(caps);
+	if (ret)
+		goto out;
+
+	fret = true;
+
+out:
+	cap_free(caps);
+	return fret;
+}
+
+/* cap_down - lower an effective cap */
+int cap_down(cap_value_t down)
+{
+	bool fret = false;
+	cap_t caps = NULL;
+	cap_value_t cap = down;
+	int ret = -1;
+
+	caps = cap_get_proc();
+	if (!caps)
+		goto out;
+
+	ret = cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, 0);
+	if (ret)
+		goto out;
+
+	ret = cap_set_proc(caps);
+	if (ret)
+		goto out;
+
+	fret = true;
+
+out:
+	cap_free(caps);
+	return fret;
+}
diff --git a/tools/testing/selftests/filesystems/utils.h b/tools/testing/selftests/filesystems/utils.h
new file mode 100644
index 000000000000..7f1df2a3e94c
--- /dev/null
+++ b/tools/testing/selftests/filesystems/utils.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __IDMAP_UTILS_H
+#define __IDMAP_UTILS_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/capability.h>
+#include <sys/fsuid.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+extern int get_userns_fd(unsigned long nsid, unsigned long hostid,
+			 unsigned long range);
+
+extern int caps_down(void);
+extern int cap_down(cap_value_t down);
+
+extern bool switch_ids(uid_t uid, gid_t gid);
+
+static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
+{
+	if (setns(fd, CLONE_NEWUSER))
+		return false;
+
+	if (!switch_ids(uid, gid))
+		return false;
+
+	if (drop_caps && !caps_down())
+		return false;
+
+	return true;
+}
+
+#endif /* __IDMAP_UTILS_H */
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index a1e955d2de4c..49d96bb16355 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -6,4 +6,6 @@ TEST_PROGS := ftracetest-ktap
 TEST_FILES := test.d settings
 EXTRA_CLEAN := $(OUTPUT)/logs/*
 
+TEST_GEN_PROGS = poll
+
 include ../lib.mk
diff --git a/tools/testing/selftests/ftrace/poll.c b/tools/testing/selftests/ftrace/poll.c
new file mode 100644
index 000000000000..53258f7515e7
--- /dev/null
+++ b/tools/testing/selftests/ftrace/poll.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple poll on a file.
+ *
+ * Copyright (c) 2024 Google LLC.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#define BUFSIZE 4096
+
+/*
+ * Usage:
+ *  poll [-I|-P] [-t timeout] FILE
+ */
+int main(int argc, char *argv[])
+{
+	struct pollfd pfd = {.events = POLLIN};
+	char buf[BUFSIZE];
+	int timeout = -1;
+	int ret, opt;
+
+	while ((opt = getopt(argc, argv, "IPt:")) != -1) {
+		switch (opt) {
+		case 'I':
+			pfd.events = POLLIN;
+			break;
+		case 'P':
+			pfd.events = POLLPRI;
+			break;
+		case 't':
+			timeout = atoi(optarg);
+			break;
+		default:
+			fprintf(stderr, "Usage: %s [-I|-P] [-t timeout] FILE\n",
+				argv[0]);
+			return -1;
+		}
+	}
+	if (optind >= argc) {
+		fprintf(stderr, "Error: Polling file is not specified\n");
+		return -1;
+	}
+
+	pfd.fd = open(argv[optind], O_RDONLY);
+	if (pfd.fd < 0) {
+		fprintf(stderr, "failed to open %s", argv[optind]);
+		perror("open");
+		return -1;
+	}
+
+	/* Reset poll by read if POLLIN is specified. */
+	if (pfd.events & POLLIN)
+		do {} while (read(pfd.fd, buf, BUFSIZE) == BUFSIZE);
+
+	ret = poll(&pfd, 1, timeout);
+	if (ret < 0 && errno != EINTR) {
+		perror("poll");
+		return -1;
+	}
+	close(pfd.fd);
+
+	/* If timeout happned (ret == 0), exit code is 1 */
+	if (ret == 0)
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
index 35e8d47d6072..8a7ce647a60d 100644
--- a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
+++ b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc
@@ -15,11 +15,11 @@ find_alternate_gid() {
 	tac /etc/group | grep -v ":$original_gid:" | head -1 | cut -d: -f3
 }
 
-mount_tracefs_with_options() {
+remount_tracefs_with_options() {
 	local mount_point="$1"
 	local options="$2"
 
-	mount -t tracefs -o "$options" nodev "$mount_point"
+	mount -t tracefs -o "remount,$options" nodev "$mount_point"
 
 	setup
 }
@@ -81,7 +81,7 @@ test_gid_mount_option() {
 
 	# Unmount existing tracefs instance and mount with new GID
 	unmount_tracefs "$mount_point"
-	mount_tracefs_with_options "$mount_point" "$new_options"
+	remount_tracefs_with_options "$mount_point" "$new_options"
 
 	check_gid "$mount_point" "$other_group"
 
@@ -92,7 +92,7 @@ test_gid_mount_option() {
 
 	# Unmount and remount with the original GID
 	unmount_tracefs "$mount_point"
-	mount_tracefs_with_options "$mount_point" "$mount_options"
+	remount_tracefs_with_options "$mount_point" "$mount_options"
 	check_gid "$mount_point" "$original_group"
 }
 
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
index dc25bcf4f9e2..73f6c6fcecab 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
@@ -7,12 +7,42 @@ echo 0 > events/enable
 echo > dynamic_events
 
 PLACE=$FUNCTION_FORK
+PLACE2="kmem_cache_free"
+PLACE3="schedule_timeout"
+
+# Some functions may have BPF programs attached, therefore
+# count already enabled_functions before tests start
+ocnt=`cat enabled_functions | wc -l`
 
 echo "f:myevent1 $PLACE" >> dynamic_events
+
+# Make sure the event is attached and is the only one
+grep -q $PLACE enabled_functions
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 1)) ]; then
+	exit_fail
+fi
+
 echo "f:myevent2 $PLACE%return" >> dynamic_events
 
+# It should till be the only attached function
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 1)) ]; then
+	exit_fail
+fi
+
+# add another event
+echo "f:myevent3 $PLACE2" >> dynamic_events
+
+grep -q $PLACE2 enabled_functions
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 2)) ]; then
+	exit_fail
+fi
+
 grep -q myevent1 dynamic_events
 grep -q myevent2 dynamic_events
+grep -q myevent3 dynamic_events
 test -d events/fprobes/myevent1
 test -d events/fprobes/myevent2
 
@@ -21,6 +51,34 @@ echo "-:myevent2" >> dynamic_events
 grep -q myevent1 dynamic_events
 ! grep -q myevent2 dynamic_events
 
+# should still have 2 left
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 2)) ]; then
+	exit_fail
+fi
+
 echo > dynamic_events
 
+# Should have none left
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $ocnt ]; then
+	exit_fail
+fi
+
+echo "f:myevent4 $PLACE" >> dynamic_events
+
+# Should only have one enabled
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 1)) ]; then
+	exit_fail
+fi
+
+echo > dynamic_events
+
+# Should have none left
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $ocnt ]; then
+	exit_fail
+fi
+
 clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
new file mode 100644
index 000000000000..b4ad09237e2a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - Repeating add/remove fprobe events
+# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+PLACE=$FUNCTION_FORK
+REPEAT_TIMES=64
+
+for i in `seq 1 $REPEAT_TIMES`; do
+  echo "f:myevent $PLACE" >> dynamic_events
+  grep -q myevent dynamic_events
+  test -d events/fprobes/myevent
+  echo > dynamic_events
+done
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc
index a275decdc880..86c76679c56e 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc
@@ -6,8 +6,10 @@
 echo 0 > events/enable
 echo > dynamic_events
 
+REALBIN=`readlink -f /bin/sh`
+
 echo 'cat /proc/$$/maps' | /bin/sh | \
-	grep "r-xp .*/bin/.*sh$" | \
+	grep "r-xp .*${REALBIN}$" | \
 	awk '{printf "p:myevent %s:0x%s\n", $6,$3 }' >> uprobe_events
 
 grep -q myevent uprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
index 61877d166451..c9425a34fae3 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -16,9 +16,7 @@ aarch64)
   REG=%r0 ;;
 esac
 
-check_error 'f^100 vfs_read'		# MAXACT_NO_KPROBE
-check_error 'f^1a111 vfs_read'		# BAD_MAXACT
-check_error 'f^100000 vfs_read'		# MAXACT_TOO_BIG
+check_error 'f^100 vfs_read'		# BAD_MAXACT
 
 check_error 'f ^non_exist_func'		# BAD_PROBE_ADDR (enoent)
 check_error 'f ^vfs_read+10'		# BAD_PROBE_ADDR
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-mod.tc b/tools/testing/selftests/ftrace/test.d/event/event-mod.tc
new file mode 100644
index 000000000000..175243cd9ab7
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/event-mod.tc
@@ -0,0 +1,191 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - enable/disable with module event
+# requires: set_event "Can enable module events via: :mod:":README
+# flags: instance
+
+rmmod trace-events-sample ||:
+if ! modprobe trace-events-sample ; then
+  echo "No trace-events sample module - please make CONFIG_SAMPLE_TRACE_EVENTS=m"
+  exit_unresolved;
+fi
+trap "rmmod trace-events-sample" EXIT
+
+# Set events for the module
+echo ":mod:trace-events-sample" > set_event
+
+test_all_enabled() {
+
+	# Check if more than one is enabled
+	grep -q sample-trace:foo_bar set_event
+	grep -q sample-trace:foo_bar_with_cond set_event
+	grep -q sample-trace:foo_bar_with_fn set_event
+
+	# All of them should be enabled. Check via the enable file
+	val=`cat events/sample-trace/enable`
+	if [ $val -ne 1 ]; then
+		exit_fail
+	fi
+}
+
+clear_events() {
+	echo > set_event
+	val=`cat events/enable`
+	if [ "$val" != "0" ]; then
+		exit_fail
+	fi
+	count=`cat set_event | wc -l`
+	if [ $count -ne 0 ]; then
+		exit_fail
+	fi
+}
+
+test_all_enabled
+
+echo clear all events
+echo 0 > events/enable
+
+echo Confirm the events are disabled
+val=`cat events/sample-trace/enable`
+if [ $val -ne 0 ]; then
+	exit_fail
+fi
+
+echo And the set_event file is empty
+
+cnt=`wc -l set_event`
+if [ $cnt -ne 0 ]; then
+	exit_fail
+fi
+
+echo now enable all events
+echo 1 > events/enable
+
+echo Confirm the events are enabled again
+val=`cat events/sample-trace/enable`
+if [ $val -ne 1 ]; then
+	exit_fail
+fi
+
+echo disable just the module events
+echo '!:mod:trace-events-sample' >> set_event
+
+echo Should have mix of events enabled
+val=`cat events/enable`
+if [ "$val" != "X" ]; then
+	exit_fail
+fi
+
+echo Confirm the module events are disabled
+val=`cat events/sample-trace/enable`
+if [ $val -ne 0 ]; then
+	exit_fail
+fi
+
+echo 0 > events/enable
+
+echo now enable the system events
+echo 'sample-trace:mod:trace-events-sample' > set_event
+
+test_all_enabled
+
+echo clear all events
+echo 0 > events/enable
+
+echo Confirm the events are disabled
+val=`cat events/sample-trace/enable`
+if [ $val -ne 0 ]; then
+	exit_fail
+fi
+
+echo Test enabling foo_bar only
+echo 'foo_bar:mod:trace-events-sample' > set_event
+
+grep -q sample-trace:foo_bar set_event
+
+echo make sure nothing is found besides foo_bar
+if grep -q -v sample-trace:foo_bar set_event ; then
+	exit_fail
+fi
+
+echo Append another using the system and event name
+echo 'sample-trace:foo_bar_with_cond:mod:trace-events-sample' >> set_event
+
+grep -q sample-trace:foo_bar set_event
+grep -q sample-trace:foo_bar_with_cond set_event
+
+count=`cat set_event | wc -l`
+
+if [ $count -ne 2 ]; then
+	exit_fail
+fi
+
+clear_events
+
+rmmod trace-events-sample
+
+echo ':mod:trace-events-sample' > set_event
+
+echo make sure that the module shows up, and '-' is converted to '_'
+grep -q '\*:\*:mod:trace_events_sample' set_event
+
+modprobe trace-events-sample
+
+test_all_enabled
+
+clear_events
+
+rmmod trace-events-sample
+
+echo Enable just the system events
+echo 'sample-trace:mod:trace-events-sample' > set_event
+grep -q 'sample-trace:mod:trace_events_sample' set_event
+
+modprobe trace-events-sample
+
+test_all_enabled
+
+clear_events
+
+rmmod trace-events-sample
+
+echo Enable event with just event name
+echo 'foo_bar:mod:trace-events-sample' > set_event
+grep -q 'foo_bar:mod:trace_events_sample' set_event
+
+echo Enable another event with both system and event name
+echo 'sample-trace:foo_bar_with_cond:mod:trace-events-sample' >> set_event
+grep -q 'sample-trace:foo_bar_with_cond:mod:trace_events_sample' set_event
+echo Make sure the other event was still there
+grep -q 'foo_bar:mod:trace_events_sample' set_event
+
+modprobe trace-events-sample
+
+echo There should be no :mod: cached events
+if grep -q ':mod:' set_event; then
+	exit_fail
+fi
+
+echo two events should be enabled
+count=`cat set_event | wc -l`
+if [ $count -ne 2 ]; then
+	exit_fail
+fi
+
+echo only two events should be enabled
+val=`cat events/sample-trace/enable`
+if [ "$val" != "X" ]; then
+	exit_fail
+fi
+
+val=`cat events/sample-trace/foo_bar/enable`
+if [ "$val" != "1" ]; then
+	exit_fail
+fi
+
+val=`cat events/sample-trace/foo_bar_with_cond/enable`
+if [ "$val" != "1" ]; then
+	exit_fail
+fi
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-poll.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-poll.tc
new file mode 100644
index 000000000000..8d275e3238d9
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-poll.tc
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test poll wait on histogram
+# requires: set_event events/sched/sched_process_free/trigger events/sched/sched_process_free/hist
+# flags: instance
+
+POLL=${FTRACETEST_ROOT}/poll
+
+if [ ! -x ${POLL} ]; then
+  echo "poll program is not compiled!"
+  exit_unresolved
+fi
+
+EVENT=events/sched/sched_process_free/
+
+# Check poll ops is supported. Before implementing poll on hist file, it
+# returns soon with POLLIN | POLLOUT, but not POLLPRI.
+
+# This must wait >1 sec and return 1 (timeout).
+set +e
+${POLL} -I -t 1000 ${EVENT}/hist
+ret=$?
+set -e
+if [ ${ret} != 1 ]; then
+  echo "poll on hist file is not supported"
+  exit_unsupported
+fi
+
+# Test POLLIN
+echo > trace
+echo 'hist:key=comm if comm =="sleep"' > ${EVENT}/trigger
+echo 1 > ${EVENT}/enable
+
+# This sleep command will exit after 2 seconds.
+sleep 2 &
+BGPID=$!
+# if timeout happens, poll returns 1.
+${POLL} -I -t 4000 ${EVENT}/hist
+echo 0 > tracing_on
+
+if [ -d /proc/${BGPID} ]; then
+  echo "poll exits too soon"
+  kill -KILL ${BGPID} ||:
+  exit_fail
+fi
+
+if ! grep -qw "sleep" trace; then
+  echo "poll exits before event happens"
+  exit_fail
+fi
+
+# Test POLLPRI
+echo > trace
+echo 1 > tracing_on
+
+# This sleep command will exit after 2 seconds.
+sleep 2 &
+BGPID=$!
+# if timeout happens, poll returns 1.
+${POLL} -P -t 4000 ${EVENT}/hist
+echo 0 > tracing_on
+
+if [ -d /proc/${BGPID} ]; then
+  echo "poll exits too soon"
+  kill -KILL ${BGPID} ||:
+  exit_fail
+fi
+
+if ! grep -qw "sleep" trace; then
+  echo "poll exits before event happens"
+  exit_fail
+fi
+
+exit_pass
diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh
index 6fb66a687f17..bbc29ed9c60a 100755
--- a/tools/testing/selftests/gpio/gpio-sim.sh
+++ b/tools/testing/selftests/gpio/gpio-sim.sh
@@ -46,12 +46,6 @@ remove_chip() {
 	rmdir $CONFIGFS_DIR/$CHIP || fail "Unable to remove the chip"
 }
 
-configfs_cleanup() {
-	for CHIP in `ls $CONFIGFS_DIR/`; do
-		remove_chip $CHIP
-	done
-}
-
 create_chip() {
 	local CHIP=$1
 
@@ -105,6 +99,13 @@ disable_chip() {
 	echo 0 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to disable the chip"
 }
 
+configfs_cleanup() {
+	for CHIP in `ls $CONFIGFS_DIR/`; do
+		disable_chip $CHIP
+		remove_chip $CHIP
+	done
+}
+
 configfs_chip_name() {
 	local CHIP=$1
 	local BANK=$2
@@ -181,6 +182,7 @@ create_chip chip
 create_bank chip bank
 enable_chip chip
 test -n `cat $CONFIGFS_DIR/chip/bank/chip_name` || fail "chip_name doesn't work"
+disable_chip chip
 remove_chip chip
 
 echo "1.2. chip_name returns 'none' if the chip is still pending"
@@ -195,6 +197,7 @@ create_chip chip
 create_bank chip bank
 enable_chip chip
 test -n `cat $CONFIGFS_DIR/chip/dev_name` || fail "dev_name doesn't work"
+disable_chip chip
 remove_chip chip
 
 echo "2. Creating and configuring simulated chips"
@@ -204,6 +207,7 @@ create_chip chip
 create_bank chip bank
 enable_chip chip
 test "`get_chip_num_lines chip bank`" = "1" || fail "default number of lines is not 1"
+disable_chip chip
 remove_chip chip
 
 echo "2.2. Number of lines can be specified"
@@ -212,6 +216,7 @@ create_bank chip bank
 set_num_lines chip bank 16
 enable_chip chip
 test "`get_chip_num_lines chip bank`" = "16" || fail "number of lines is not 16"
+disable_chip chip
 remove_chip chip
 
 echo "2.3. Label can be set"
@@ -220,6 +225,7 @@ create_bank chip bank
 set_label chip bank foobar
 enable_chip chip
 test "`get_chip_label chip bank`" = "foobar" || fail "label is incorrect"
+disable_chip chip
 remove_chip chip
 
 echo "2.4. Label can be left empty"
@@ -227,6 +233,7 @@ create_chip chip
 create_bank chip bank
 enable_chip chip
 test -z "`cat $CONFIGFS_DIR/chip/bank/label`" || fail "label is not empty"
+disable_chip chip
 remove_chip chip
 
 echo "2.5. Line names can be configured"
@@ -238,6 +245,7 @@ set_line_name chip bank 2 bar
 enable_chip chip
 test "`get_line_name chip bank 0`" = "foo" || fail "line name is incorrect"
 test "`get_line_name chip bank 2`" = "bar" || fail "line name is incorrect"
+disable_chip chip
 remove_chip chip
 
 echo "2.6. Line config can remain unused if offset is greater than number of lines"
@@ -248,6 +256,7 @@ set_line_name chip bank 5 foobar
 enable_chip chip
 test "`get_line_name chip bank 0`" = "" || fail "line name is incorrect"
 test "`get_line_name chip bank 1`" = "" || fail "line name is incorrect"
+disable_chip chip
 remove_chip chip
 
 echo "2.7. Line configfs directory names are sanitized"
@@ -267,6 +276,7 @@ for CHIP in $CHIPS; do
 	enable_chip $CHIP
 done
 for CHIP in $CHIPS; do
+  disable_chip $CHIP
 	remove_chip $CHIP
 done
 
@@ -278,6 +288,7 @@ echo foobar > $CONFIGFS_DIR/chip/bank/label 2> /dev/null && \
 	fail "Setting label of a live chip should fail"
 echo 8 > $CONFIGFS_DIR/chip/bank/num_lines 2> /dev/null && \
 	fail "Setting number of lines of a live chip should fail"
+disable_chip chip
 remove_chip chip
 
 echo "2.10. Can't create line items when chip is live"
@@ -285,6 +296,7 @@ create_chip chip
 create_bank chip bank
 enable_chip chip
 mkdir $CONFIGFS_DIR/chip/bank/line0 2> /dev/null && fail "Creating line item should fail"
+disable_chip chip
 remove_chip chip
 
 echo "2.11. Probe errors are propagated to user-space"
@@ -316,6 +328,7 @@ mkdir -p $CONFIGFS_DIR/chip/bank/line4/hog
 enable_chip chip
 $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 4 2> /dev/null && \
 	fail "Setting the value of a hogged line shouldn't succeed"
+disable_chip chip
 remove_chip chip
 
 echo "3. Controlling simulated chips"
@@ -331,6 +344,7 @@ test "$?" = "1" || fail "pull set incorrectly"
 sysfs_set_pull chip bank 0 pull-down
 $BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 1
 test "$?" = "0" || fail "pull set incorrectly"
+disable_chip chip
 remove_chip chip
 
 echo "3.2. Pull can be read from sysfs"
@@ -344,6 +358,7 @@ SYSFS_PATH=/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull
 test `cat $SYSFS_PATH` = "pull-down" || fail "reading the pull failed"
 sysfs_set_pull chip bank 0 pull-up
 test `cat $SYSFS_PATH` = "pull-up" || fail "reading the pull failed"
+disable_chip chip
 remove_chip chip
 
 echo "3.3. Incorrect input in sysfs is rejected"
@@ -355,6 +370,7 @@ DEVNAME=`configfs_dev_name chip`
 CHIPNAME=`configfs_chip_name chip bank`
 SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull"
 echo foobar > $SYSFS_PATH 2> /dev/null && fail "invalid input not detected"
+disable_chip chip
 remove_chip chip
 
 echo "3.4. Can't write to value"
@@ -365,6 +381,7 @@ DEVNAME=`configfs_dev_name chip`
 CHIPNAME=`configfs_chip_name chip bank`
 SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
 echo 1 > $SYSFS_PATH 2> /dev/null && fail "writing to 'value' succeeded unexpectedly"
+disable_chip chip
 remove_chip chip
 
 echo "4. Simulated GPIO chips are functional"
@@ -382,6 +399,7 @@ $BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 0 &
 sleep 0.1 # FIXME Any better way?
 test `cat $SYSFS_PATH` = "1" || fail "incorrect value read from sysfs"
 kill $!
+disable_chip chip
 remove_chip chip
 
 echo "4.2. Bias settings work correctly"
@@ -394,6 +412,7 @@ CHIPNAME=`configfs_chip_name chip bank`
 SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
 $BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0
 test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work"
+disable_chip chip
 remove_chip chip
 
 echo "GPIO $MODULE test PASS"
diff --git a/tools/testing/selftests/hid/.gitignore b/tools/testing/selftests/hid/.gitignore
index 746c62361f77..933f483815b2 100644
--- a/tools/testing/selftests/hid/.gitignore
+++ b/tools/testing/selftests/hid/.gitignore
@@ -1,5 +1,6 @@
 bpftool
 *.skel.h
+/host-tools
 /tools
 hid_bpf
 hidraw
diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile
index 0336353bd15f..2839d2612ce3 100644
--- a/tools/testing/selftests/hid/Makefile
+++ b/tools/testing/selftests/hid/Makefile
@@ -43,10 +43,8 @@ TEST_GEN_PROGS = hid_bpf hidraw
 # $3 - target (assumed to be file); only file name will be emitted;
 # $4 - optional extra arg, emitted as-is, if provided.
 ifeq ($(V),1)
-Q =
 msg =
 else
-Q = @
 msg = @printf '  %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
 MAKEFLAGS += --no-print-directory
 submake_extras := feature_display=0
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index c75ea4094870..e9dbb84c100a 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -194,7 +194,7 @@ int fill_msgque(struct msgque_data *msgque)
 
 int main(int argc, char **argv)
 {
-	int msg, pid, err;
+	int err;
 	struct msgque_data msgque;
 
 	if (getuid() != 0)
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 29fedf609611..c3b6d2604b1e 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -18,7 +18,8 @@
  *     ksft_print_msg(fmt, ...);
  *     ksft_perror(msg);
  *
- * and finally report the pass/fail/skip/xfail state of the test with one of:
+ * and finally report the pass/fail/skip/xfail/xpass state of the test
+ * with one of:
  *
  *     ksft_test_result(condition, fmt, ...);
  *     ksft_test_result_report(result, fmt, ...);
@@ -26,6 +27,7 @@
  *     ksft_test_result_fail(fmt, ...);
  *     ksft_test_result_skip(fmt, ...);
  *     ksft_test_result_xfail(fmt, ...);
+ *     ksft_test_result_xpass(fmt, ...);
  *     ksft_test_result_error(fmt, ...);
  *     ksft_test_result_code(exit_code, test_name, fmt, ...);
  *
@@ -147,6 +149,11 @@ static inline void ksft_set_plan(unsigned int plan)
 
 static inline void ksft_print_cnts(void)
 {
+	if (ksft_cnt.ksft_xskip > 0)
+		printf(
+			"# %u skipped test(s) detected. Consider enabling relevant config options to improve coverage.\n",
+			ksft_cnt.ksft_xskip
+		);
 	if (ksft_plan != ksft_test_num())
 		printf("# Planned tests != run tests (%u != %u)\n",
 			ksft_plan, ksft_test_num());
@@ -227,6 +234,20 @@ static inline __printf(1, 2) void ksft_test_result_xfail(const char *msg, ...)
 	va_end(args);
 }
 
+static inline __printf(1, 2) void ksft_test_result_xpass(const char *msg, ...)
+{
+	int saved_errno = errno;
+	va_list args;
+
+	ksft_cnt.ksft_xpass++;
+
+	va_start(args, msg);
+	printf("ok %u # XPASS ", ksft_test_num());
+	errno = saved_errno;
+	vprintf(msg, args);
+	va_end(args);
+}
+
 static inline __printf(1, 2) void ksft_test_result_skip(const char *msg, ...)
 {
 	int saved_errno = errno;
@@ -318,6 +339,9 @@ void ksft_test_result_code(int exit_code, const char *test_name,
 	case KSFT_XFAIL:					\
 		ksft_test_result_xfail(fmt, ##__VA_ARGS__);	\
 		break;						\
+	case KSFT_XPASS:					\
+		ksft_test_result_xpass(fmt, ##__VA_ARGS__);	\
+		break;						\
 	case KSFT_SKIP:						\
 		ksft_test_result_skip(fmt, ##__VA_ARGS__);	\
 		break;						\
@@ -403,7 +427,7 @@ static inline __noreturn __printf(1, 2) void ksft_exit_skip(const char *msg, ...
 	 */
 	if (ksft_plan || ksft_test_num()) {
 		ksft_cnt.ksft_xskip++;
-		printf("ok %d # SKIP ", 1 + ksft_test_num());
+		printf("ok %u # SKIP ", 1 + ksft_test_num());
 	} else {
 		printf("1..0 # SKIP ");
 	}
@@ -420,10 +444,6 @@ static inline __noreturn __printf(1, 2) void ksft_exit_skip(const char *msg, ...
 static inline int ksft_min_kernel_version(unsigned int min_major,
 					  unsigned int min_minor)
 {
-#ifdef NOLIBC
-	ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n");
-	return 0;
-#else
 	unsigned int major, minor;
 	struct utsname info;
 
@@ -431,7 +451,6 @@ static inline int ksft_min_kernel_version(unsigned int min_major,
 		ksft_exit_fail_msg("Can't parse kernel version\n");
 
 	return major > min_major || (major == min_major && minor >= min_minor);
-#endif
 }
 
 #endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/kselftest/ksft.py b/tools/testing/selftests/kselftest/ksft.py
index bf215790a89d..0e030837fc17 100644
--- a/tools/testing/selftests/kselftest/ksft.py
+++ b/tools/testing/selftests/kselftest/ksft.py
@@ -27,6 +27,9 @@ def set_plan(num_tests):
 
 
 def print_cnts():
+    if ksft_cnt['skip'] > 0:
+        print(f"# {ksft_cnt['skip']} skipped test(s) detected. Consider enabling relevant config options to improve coverage.")
+
     print(
         f"# Totals: pass:{ksft_cnt['pass']} fail:{ksft_cnt['fail']} xfail:0 xpass:0 skip:{ksft_cnt['skip']} error:0"
     )
diff --git a/tools/testing/selftests/kselftest/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh
index 79a125eb24c2..32dbfe9da2c4 100644
--- a/tools/testing/selftests/kselftest/ktap_helpers.sh
+++ b/tools/testing/selftests/kselftest/ktap_helpers.sh
@@ -7,6 +7,7 @@
 KTAP_TESTNO=1
 KTAP_CNT_PASS=0
 KTAP_CNT_FAIL=0
+KTAP_CNT_XFAIL=0
 KTAP_CNT_SKIP=0
 
 KSFT_PASS=0
@@ -40,7 +41,7 @@ ktap_skip_all() {
 __ktap_test() {
 	result="$1"
 	description="$2"
-	directive="$3" # optional
+	directive="${3:-}" # optional
 
 	local directive_str=
 	[ ! -z "$directive" ] && directive_str="# $directive"
@@ -69,6 +70,16 @@ ktap_test_skip() {
 	KTAP_CNT_SKIP=$((KTAP_CNT_SKIP+1))
 }
 
+ktap_test_xfail() {
+	description="$1"
+
+	result="ok"
+	directive="XFAIL"
+	__ktap_test "$result" "$description" "$directive"
+
+	KTAP_CNT_XFAIL=$((KTAP_CNT_XFAIL+1))
+}
+
 ktap_test_fail() {
 	description="$1"
 
@@ -99,7 +110,7 @@ ktap_exit_fail_msg() {
 ktap_finished() {
 	ktap_print_totals
 
-	if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP)) -eq "$KSFT_NUM_TESTS" ]; then
+	if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP + KTAP_CNT_XFAIL)) -eq "$KSFT_NUM_TESTS" ]; then
 		exit "$KSFT_PASS"
 	else
 		exit "$KSFT_FAIL"
@@ -107,5 +118,9 @@ ktap_finished() {
 }
 
 ktap_print_totals() {
-	echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:0 xpass:0 skip:$KTAP_CNT_SKIP error:0"
+	if [ "$KTAP_CNT_SKIP" -gt 0 ]; then
+		echo "# $KTAP_CNT_SKIP skipped test(s) detected. " \
+			"Consider enabling relevant config options to improve coverage."
+	fi
+	echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:$KTAP_CNT_XFAIL xpass:0 skip:$KTAP_CNT_SKIP error:0"
 }
diff --git a/tools/testing/selftests/kselftest/module.sh b/tools/testing/selftests/kselftest/module.sh
index fb4733faff12..51fb65159932 100755
--- a/tools/testing/selftests/kselftest/module.sh
+++ b/tools/testing/selftests/kselftest/module.sh
@@ -11,7 +11,7 @@
 #   SPDX-License-Identifier: GPL-2.0+
 #   $(dirname $0)/../kselftest/module.sh "description" module_name
 #
-# Example: tools/testing/selftests/lib/printf.sh
+# Example: tools/testing/selftests/lib/bitmap.sh
 
 desc=""				# Output prefix.
 module=""			# Filename (without the .ko).
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index a5a72415e37b..666c9fde76da 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -760,33 +760,33 @@
 		/* Report with actual signedness to avoid weird output. */ \
 		switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \
 		case 0: { \
-			unsigned long long __exp_print = (uintptr_t)__exp; \
-			unsigned long long __seen_print = (uintptr_t)__seen; \
-			__TH_LOG("Expected %s (%llu) %s %s (%llu)", \
+			uintmax_t __exp_print = (uintmax_t)__exp; \
+			uintmax_t __seen_print = (uintmax_t)__seen; \
+			__TH_LOG("Expected %s (%ju) %s %s (%ju)", \
 				 _expected_str, __exp_print, #_t, \
 				 _seen_str, __seen_print); \
 			break; \
 			} \
 		case 1: { \
-			unsigned long long __exp_print = (uintptr_t)__exp; \
-			long long __seen_print = (intptr_t)__seen; \
-			__TH_LOG("Expected %s (%llu) %s %s (%lld)", \
+			uintmax_t __exp_print = (uintmax_t)__exp; \
+			intmax_t  __seen_print = (intmax_t)__seen; \
+			__TH_LOG("Expected %s (%ju) %s %s (%jd)", \
 				 _expected_str, __exp_print, #_t, \
 				 _seen_str, __seen_print); \
 			break; \
 			} \
 		case 2: { \
-			long long __exp_print = (intptr_t)__exp; \
-			unsigned long long __seen_print = (uintptr_t)__seen; \
-			__TH_LOG("Expected %s (%lld) %s %s (%llu)", \
+			intmax_t  __exp_print = (intmax_t)__exp; \
+			uintmax_t __seen_print = (uintmax_t)__seen; \
+			__TH_LOG("Expected %s (%jd) %s %s (%ju)", \
 				 _expected_str, __exp_print, #_t, \
 				 _seen_str, __seen_print); \
 			break; \
 			} \
 		case 3: { \
-			long long __exp_print = (intptr_t)__exp; \
-			long long __seen_print = (intptr_t)__seen; \
-			__TH_LOG("Expected %s (%lld) %s %s (%lld)", \
+			intmax_t  __exp_print = (intmax_t)__exp; \
+			intmax_t  __seen_print = (intmax_t)__seen; \
+			__TH_LOG("Expected %s (%jd) %s %s (%jd)", \
 				 _expected_str, __exp_print, #_t, \
 				 _seen_str, __seen_print); \
 			break; \
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 7f57abf936e7..1d41a046a7bf 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -9,3 +9,4 @@
 !config
 !settings
 !Makefile
+!Makefile.kvm
+\ No newline at end of file
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 41593d2e7de9..20af35a91d6f 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -1,347 +1,16 @@
 # SPDX-License-Identifier: GPL-2.0-only
-include ../../../build/Build.include
-
-all:
-
 top_srcdir = ../../../..
 include $(top_srcdir)/scripts/subarch.include
 ARCH            ?= $(SUBARCH)
 
-ifeq ($(ARCH),x86)
-	ARCH_DIR := x86_64
-else ifeq ($(ARCH),arm64)
-	ARCH_DIR := aarch64
-else ifeq ($(ARCH),s390)
-	ARCH_DIR := s390x
-else
-	ARCH_DIR := $(ARCH)
-endif
-
-LIBKVM += lib/assert.c
-LIBKVM += lib/elf.c
-LIBKVM += lib/guest_modes.c
-LIBKVM += lib/io.c
-LIBKVM += lib/kvm_util.c
-LIBKVM += lib/memstress.c
-LIBKVM += lib/guest_sprintf.c
-LIBKVM += lib/rbtree.c
-LIBKVM += lib/sparsebit.c
-LIBKVM += lib/test_util.c
-LIBKVM += lib/ucall_common.c
-LIBKVM += lib/userfaultfd_util.c
-
-LIBKVM_STRING += lib/string_override.c
-
-LIBKVM_x86_64 += lib/x86_64/apic.c
-LIBKVM_x86_64 += lib/x86_64/handlers.S
-LIBKVM_x86_64 += lib/x86_64/hyperv.c
-LIBKVM_x86_64 += lib/x86_64/memstress.c
-LIBKVM_x86_64 += lib/x86_64/pmu.c
-LIBKVM_x86_64 += lib/x86_64/processor.c
-LIBKVM_x86_64 += lib/x86_64/sev.c
-LIBKVM_x86_64 += lib/x86_64/svm.c
-LIBKVM_x86_64 += lib/x86_64/ucall.c
-LIBKVM_x86_64 += lib/x86_64/vmx.c
-
-LIBKVM_aarch64 += lib/aarch64/gic.c
-LIBKVM_aarch64 += lib/aarch64/gic_v3.c
-LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c
-LIBKVM_aarch64 += lib/aarch64/handlers.S
-LIBKVM_aarch64 += lib/aarch64/processor.c
-LIBKVM_aarch64 += lib/aarch64/spinlock.c
-LIBKVM_aarch64 += lib/aarch64/ucall.c
-LIBKVM_aarch64 += lib/aarch64/vgic.c
-
-LIBKVM_s390x += lib/s390x/diag318_test_handler.c
-LIBKVM_s390x += lib/s390x/processor.c
-LIBKVM_s390x += lib/s390x/ucall.c
-LIBKVM_s390x += lib/s390x/facility.c
-
-LIBKVM_riscv += lib/riscv/handlers.S
-LIBKVM_riscv += lib/riscv/processor.c
-LIBKVM_riscv += lib/riscv/ucall.c
-
-# Non-compiled test targets
-TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
-
-# Compiled test targets
-TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
-TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
-TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
-TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
-TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
-TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
-TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush
-TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
-TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
-TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
-TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
-TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
-TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test
-TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
-TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test
-TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test
-TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
-TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
-TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test
-TEST_GEN_PROGS_x86_64 += x86_64/smm_test
-TEST_GEN_PROGS_x86_64 += x86_64/state_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test
-TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync
-TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
-TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test
-TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
-TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
-TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test
-TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
-TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test
-TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test
-TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
-TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
-TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test
-TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
-TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
-TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests
-TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
-TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test
-TEST_GEN_PROGS_x86_64 += x86_64/amx_test
-TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
-TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
-TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test
-TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
-TEST_GEN_PROGS_x86_64 += coalesced_io_test
-TEST_GEN_PROGS_x86_64 += demand_paging_test
-TEST_GEN_PROGS_x86_64 += dirty_log_test
-TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
-TEST_GEN_PROGS_x86_64 += guest_memfd_test
-TEST_GEN_PROGS_x86_64 += guest_print_test
-TEST_GEN_PROGS_x86_64 += hardware_disable_test
-TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_x86_64 += kvm_page_table_test
-TEST_GEN_PROGS_x86_64 += max_guest_memory_test
-TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
-TEST_GEN_PROGS_x86_64 += memslot_perf_test
-TEST_GEN_PROGS_x86_64 += rseq_test
-TEST_GEN_PROGS_x86_64 += set_memory_region_test
-TEST_GEN_PROGS_x86_64 += steal_time
-TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
-TEST_GEN_PROGS_x86_64 += system_counter_offset_test
-TEST_GEN_PROGS_x86_64 += pre_fault_memory_test
-
-# Compiled outputs used by test targets
-TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
-
-TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
-TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases
-TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
-TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
-TEST_GEN_PROGS_aarch64 += aarch64/mmio_abort
-TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
-TEST_GEN_PROGS_aarch64 += aarch64/psci_test
-TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs
-TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter
-TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
-TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress
-TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
-TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3
-TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
-TEST_GEN_PROGS_aarch64 += arch_timer
-TEST_GEN_PROGS_aarch64 += coalesced_io_test
-TEST_GEN_PROGS_aarch64 += demand_paging_test
-TEST_GEN_PROGS_aarch64 += dirty_log_test
-TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
-TEST_GEN_PROGS_aarch64 += guest_print_test
-TEST_GEN_PROGS_aarch64 += get-reg-list
-TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_aarch64 += kvm_page_table_test
-TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test
-TEST_GEN_PROGS_aarch64 += memslot_perf_test
-TEST_GEN_PROGS_aarch64 += rseq_test
-TEST_GEN_PROGS_aarch64 += set_memory_region_test
-TEST_GEN_PROGS_aarch64 += steal_time
-TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
-
-TEST_GEN_PROGS_s390x = s390x/memop
-TEST_GEN_PROGS_s390x += s390x/resets
-TEST_GEN_PROGS_s390x += s390x/sync_regs_test
-TEST_GEN_PROGS_s390x += s390x/tprot
-TEST_GEN_PROGS_s390x += s390x/cmma_test
-TEST_GEN_PROGS_s390x += s390x/debug_test
-TEST_GEN_PROGS_s390x += s390x/cpumodel_subfuncs_test
-TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test
-TEST_GEN_PROGS_s390x += s390x/ucontrol_test
-TEST_GEN_PROGS_s390x += demand_paging_test
-TEST_GEN_PROGS_s390x += dirty_log_test
-TEST_GEN_PROGS_s390x += guest_print_test
-TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
-TEST_GEN_PROGS_s390x += kvm_page_table_test
-TEST_GEN_PROGS_s390x += rseq_test
-TEST_GEN_PROGS_s390x += set_memory_region_test
-TEST_GEN_PROGS_s390x += kvm_binary_stats_test
-
-TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
-TEST_GEN_PROGS_riscv += riscv/ebreak_test
-TEST_GEN_PROGS_riscv += arch_timer
-TEST_GEN_PROGS_riscv += coalesced_io_test
-TEST_GEN_PROGS_riscv += demand_paging_test
-TEST_GEN_PROGS_riscv += dirty_log_test
-TEST_GEN_PROGS_riscv += get-reg-list
-TEST_GEN_PROGS_riscv += guest_print_test
-TEST_GEN_PROGS_riscv += kvm_binary_stats_test
-TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
-TEST_GEN_PROGS_riscv += kvm_page_table_test
-TEST_GEN_PROGS_riscv += set_memory_region_test
-TEST_GEN_PROGS_riscv += steal_time
-
-SPLIT_TESTS += arch_timer
-SPLIT_TESTS += get-reg-list
-
-TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR))
-TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR))
-TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR))
-LIBKVM += $(LIBKVM_$(ARCH_DIR))
-
-OVERRIDE_TARGETS = 1
-
-# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most
-# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
-# which causes the environment variable to override the makefile).
-include ../lib.mk
-
-INSTALL_HDR_PATH = $(top_srcdir)/usr
-LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
-LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64))
+# Top-level selftests allows ARCH=x86_64 :-(
 ifeq ($(ARCH),x86_64)
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include
-else
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
+	ARCH := x86
 endif
-CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-	-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
-	-fno-builtin-memcmp -fno-builtin-memcpy \
-	-fno-builtin-memset -fno-builtin-strnlen \
-	-fno-stack-protector -fno-PIE -fno-strict-aliasing \
-	-I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \
-	-I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH_DIR) \
-	-I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
-ifeq ($(ARCH),s390)
-	CFLAGS += -march=z10
-endif
-ifeq ($(ARCH),x86)
-ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0)
-	CFLAGS += -march=x86-64-v2
-endif
-endif
-ifeq ($(ARCH),arm64)
-tools_dir := $(top_srcdir)/tools
-arm64_tools_dir := $(tools_dir)/arch/arm64/tools/
-
-ifneq ($(abs_objdir),)
-arm64_hdr_outdir := $(abs_objdir)/tools/
+include Makefile.kvm
 else
-arm64_hdr_outdir := $(tools_dir)/
-endif
-
-GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/
-CFLAGS += -I$(GEN_HDRS)
-
-$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*)
-	$(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir)
+# Empty targets for unsupported architectures
+all:
+clean:
 endif
-
-no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \
-        $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
-
-# On s390, build the testcases KVM-enabled
-pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \
-	$(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
-
-LDLIBS += -ldl
-LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
-
-LIBKVM_C := $(filter %.c,$(LIBKVM))
-LIBKVM_S := $(filter %.S,$(LIBKVM))
-LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
-LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
-LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
-SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
-SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH_DIR)/%.o, $(SPLIT_TESTS))
-
-TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS))
-TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
-TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ))
-TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS))
-TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ))
--include $(TEST_DEP_FILES)
-
-$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH_DIR) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
-
-$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \
-$(TEST_GEN_PROGS_EXTENDED): %: %.o
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@
-$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-
-$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH_DIR)/%.o
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
-$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH_DIR)/%.o: $(ARCH_DIR)/%.c
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-
-EXTRA_CLEAN += $(GEN_HDRS) \
-	       $(LIBKVM_OBJS) \
-	       $(SPLIT_TEST_GEN_OBJ) \
-	       $(TEST_DEP_FILES) \
-	       $(TEST_GEN_OBJ) \
-	       cscope.*
-
-$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS)
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-
-$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS)
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-
-# Compile the string overrides as freestanding to prevent the compiler from
-# generating self-referential code, e.g. without "freestanding" the compiler may
-# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion.
-$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
-	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
-
-$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
-$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS)
-$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
-$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
-$(TEST_GEN_OBJ): $(GEN_HDRS)
-
-cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
-cscope:
-	$(RM) cscope.*
-	(find $(include_paths) -name '*.h' \
-		-exec realpath --relative-base=$(PWD) {} \;; \
-	find . -name '*.c' \
-		-exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files
-	cscope -b
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
new file mode 100644
index 000000000000..f773f8f99249
--- /dev/null
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -0,0 +1,331 @@
+# SPDX-License-Identifier: GPL-2.0-only
+include ../../../build/Build.include
+
+all:
+
+LIBKVM += lib/assert.c
+LIBKVM += lib/elf.c
+LIBKVM += lib/guest_modes.c
+LIBKVM += lib/io.c
+LIBKVM += lib/kvm_util.c
+LIBKVM += lib/memstress.c
+LIBKVM += lib/guest_sprintf.c
+LIBKVM += lib/rbtree.c
+LIBKVM += lib/sparsebit.c
+LIBKVM += lib/test_util.c
+LIBKVM += lib/ucall_common.c
+LIBKVM += lib/userfaultfd_util.c
+
+LIBKVM_STRING += lib/string_override.c
+
+LIBKVM_x86 += lib/x86/apic.c
+LIBKVM_x86 += lib/x86/handlers.S
+LIBKVM_x86 += lib/x86/hyperv.c
+LIBKVM_x86 += lib/x86/memstress.c
+LIBKVM_x86 += lib/x86/pmu.c
+LIBKVM_x86 += lib/x86/processor.c
+LIBKVM_x86 += lib/x86/sev.c
+LIBKVM_x86 += lib/x86/svm.c
+LIBKVM_x86 += lib/x86/ucall.c
+LIBKVM_x86 += lib/x86/vmx.c
+
+LIBKVM_arm64 += lib/arm64/gic.c
+LIBKVM_arm64 += lib/arm64/gic_v3.c
+LIBKVM_arm64 += lib/arm64/gic_v3_its.c
+LIBKVM_arm64 += lib/arm64/handlers.S
+LIBKVM_arm64 += lib/arm64/processor.c
+LIBKVM_arm64 += lib/arm64/spinlock.c
+LIBKVM_arm64 += lib/arm64/ucall.c
+LIBKVM_arm64 += lib/arm64/vgic.c
+
+LIBKVM_s390 += lib/s390/diag318_test_handler.c
+LIBKVM_s390 += lib/s390/processor.c
+LIBKVM_s390 += lib/s390/ucall.c
+LIBKVM_s390 += lib/s390/facility.c
+
+LIBKVM_riscv += lib/riscv/handlers.S
+LIBKVM_riscv += lib/riscv/processor.c
+LIBKVM_riscv += lib/riscv/ucall.c
+
+# Non-compiled test targets
+TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
+
+# Compiled test targets
+TEST_GEN_PROGS_x86 = x86/cpuid_test
+TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
+TEST_GEN_PROGS_x86 += x86/feature_msrs_test
+TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
+TEST_GEN_PROGS_x86 += x86/hwcr_msr_test
+TEST_GEN_PROGS_x86 += x86/hyperv_clock
+TEST_GEN_PROGS_x86 += x86/hyperv_cpuid
+TEST_GEN_PROGS_x86 += x86/hyperv_evmcs
+TEST_GEN_PROGS_x86 += x86/hyperv_extended_hypercalls
+TEST_GEN_PROGS_x86 += x86/hyperv_features
+TEST_GEN_PROGS_x86 += x86/hyperv_ipi
+TEST_GEN_PROGS_x86 += x86/hyperv_svm_test
+TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
+TEST_GEN_PROGS_x86 += x86/kvm_clock_test
+TEST_GEN_PROGS_x86 += x86/kvm_pv_test
+TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
+TEST_GEN_PROGS_x86 += x86/nested_emulation_test
+TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
+TEST_GEN_PROGS_x86 += x86/platform_info_test
+TEST_GEN_PROGS_x86 += x86/pmu_counters_test
+TEST_GEN_PROGS_x86 += x86/pmu_event_filter_test
+TEST_GEN_PROGS_x86 += x86/private_mem_conversions_test
+TEST_GEN_PROGS_x86 += x86/private_mem_kvm_exits_test
+TEST_GEN_PROGS_x86 += x86/set_boot_cpu_id
+TEST_GEN_PROGS_x86 += x86/set_sregs_test
+TEST_GEN_PROGS_x86 += x86/smaller_maxphyaddr_emulation_test
+TEST_GEN_PROGS_x86 += x86/smm_test
+TEST_GEN_PROGS_x86 += x86/state_test
+TEST_GEN_PROGS_x86 += x86/vmx_preemption_timer_test
+TEST_GEN_PROGS_x86 += x86/svm_vmcall_test
+TEST_GEN_PROGS_x86 += x86/svm_int_ctl_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_shutdown_test
+TEST_GEN_PROGS_x86 += x86/svm_nested_soft_inject_test
+TEST_GEN_PROGS_x86 += x86/tsc_scaling_sync
+TEST_GEN_PROGS_x86 += x86/sync_regs_test
+TEST_GEN_PROGS_x86 += x86/ucna_injection_test
+TEST_GEN_PROGS_x86 += x86/userspace_io_test
+TEST_GEN_PROGS_x86 += x86/userspace_msr_exit_test
+TEST_GEN_PROGS_x86 += x86/vmx_apic_access_test
+TEST_GEN_PROGS_x86 += x86/vmx_close_while_nested_test
+TEST_GEN_PROGS_x86 += x86/vmx_dirty_log_test
+TEST_GEN_PROGS_x86 += x86/vmx_exception_with_invalid_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_msrs_test
+TEST_GEN_PROGS_x86 += x86/vmx_invalid_nested_guest_state
+TEST_GEN_PROGS_x86 += x86/vmx_set_nested_state_test
+TEST_GEN_PROGS_x86 += x86/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86 += x86/vmx_nested_tsc_scaling_test
+TEST_GEN_PROGS_x86 += x86/apic_bus_clock_test
+TEST_GEN_PROGS_x86 += x86/xapic_ipi_test
+TEST_GEN_PROGS_x86 += x86/xapic_state_test
+TEST_GEN_PROGS_x86 += x86/xcr0_cpuid_test
+TEST_GEN_PROGS_x86 += x86/xss_msr_test
+TEST_GEN_PROGS_x86 += x86/debug_regs
+TEST_GEN_PROGS_x86 += x86/tsc_msrs_test
+TEST_GEN_PROGS_x86 += x86/vmx_pmu_caps_test
+TEST_GEN_PROGS_x86 += x86/xen_shinfo_test
+TEST_GEN_PROGS_x86 += x86/xen_vmcall_test
+TEST_GEN_PROGS_x86 += x86/sev_init2_tests
+TEST_GEN_PROGS_x86 += x86/sev_migrate_tests
+TEST_GEN_PROGS_x86 += x86/sev_smoke_test
+TEST_GEN_PROGS_x86 += x86/amx_test
+TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test
+TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
+TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
+TEST_GEN_PROGS_x86 += access_tracking_perf_test
+TEST_GEN_PROGS_x86 += coalesced_io_test
+TEST_GEN_PROGS_x86 += demand_paging_test
+TEST_GEN_PROGS_x86 += dirty_log_test
+TEST_GEN_PROGS_x86 += dirty_log_perf_test
+TEST_GEN_PROGS_x86 += guest_memfd_test
+TEST_GEN_PROGS_x86 += guest_print_test
+TEST_GEN_PROGS_x86 += hardware_disable_test
+TEST_GEN_PROGS_x86 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86 += kvm_page_table_test
+TEST_GEN_PROGS_x86 += memslot_modification_stress_test
+TEST_GEN_PROGS_x86 += memslot_perf_test
+TEST_GEN_PROGS_x86 += mmu_stress_test
+TEST_GEN_PROGS_x86 += rseq_test
+TEST_GEN_PROGS_x86 += set_memory_region_test
+TEST_GEN_PROGS_x86 += steal_time
+TEST_GEN_PROGS_x86 += kvm_binary_stats_test
+TEST_GEN_PROGS_x86 += system_counter_offset_test
+TEST_GEN_PROGS_x86 += pre_fault_memory_test
+
+# Compiled outputs used by test targets
+TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
+
+TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
+TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
+TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
+TEST_GEN_PROGS_arm64 += arm64/hypercalls
+TEST_GEN_PROGS_arm64 += arm64/mmio_abort
+TEST_GEN_PROGS_arm64 += arm64/page_fault_test
+TEST_GEN_PROGS_arm64 += arm64/psci_test
+TEST_GEN_PROGS_arm64 += arm64/set_id_regs
+TEST_GEN_PROGS_arm64 += arm64/smccc_filter
+TEST_GEN_PROGS_arm64 += arm64/vcpu_width_config
+TEST_GEN_PROGS_arm64 += arm64/vgic_init
+TEST_GEN_PROGS_arm64 += arm64/vgic_irq
+TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress
+TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access
+TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
+TEST_GEN_PROGS_arm64 += access_tracking_perf_test
+TEST_GEN_PROGS_arm64 += arch_timer
+TEST_GEN_PROGS_arm64 += coalesced_io_test
+TEST_GEN_PROGS_arm64 += demand_paging_test
+TEST_GEN_PROGS_arm64 += dirty_log_test
+TEST_GEN_PROGS_arm64 += dirty_log_perf_test
+TEST_GEN_PROGS_arm64 += guest_print_test
+TEST_GEN_PROGS_arm64 += get-reg-list
+TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_arm64 += kvm_page_table_test
+TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
+TEST_GEN_PROGS_arm64 += memslot_perf_test
+TEST_GEN_PROGS_arm64 += mmu_stress_test
+TEST_GEN_PROGS_arm64 += rseq_test
+TEST_GEN_PROGS_arm64 += set_memory_region_test
+TEST_GEN_PROGS_arm64 += steal_time
+TEST_GEN_PROGS_arm64 += kvm_binary_stats_test
+
+TEST_GEN_PROGS_s390 = s390/memop
+TEST_GEN_PROGS_s390 += s390/resets
+TEST_GEN_PROGS_s390 += s390/sync_regs_test
+TEST_GEN_PROGS_s390 += s390/tprot
+TEST_GEN_PROGS_s390 += s390/cmma_test
+TEST_GEN_PROGS_s390 += s390/debug_test
+TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
+TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
+TEST_GEN_PROGS_s390 += s390/ucontrol_test
+TEST_GEN_PROGS_s390 += demand_paging_test
+TEST_GEN_PROGS_s390 += dirty_log_test
+TEST_GEN_PROGS_s390 += guest_print_test
+TEST_GEN_PROGS_s390 += kvm_create_max_vcpus
+TEST_GEN_PROGS_s390 += kvm_page_table_test
+TEST_GEN_PROGS_s390 += rseq_test
+TEST_GEN_PROGS_s390 += set_memory_region_test
+TEST_GEN_PROGS_s390 += kvm_binary_stats_test
+
+TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
+TEST_GEN_PROGS_riscv += riscv/ebreak_test
+TEST_GEN_PROGS_riscv += arch_timer
+TEST_GEN_PROGS_riscv += coalesced_io_test
+TEST_GEN_PROGS_riscv += demand_paging_test
+TEST_GEN_PROGS_riscv += dirty_log_test
+TEST_GEN_PROGS_riscv += get-reg-list
+TEST_GEN_PROGS_riscv += guest_print_test
+TEST_GEN_PROGS_riscv += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
+TEST_GEN_PROGS_riscv += kvm_page_table_test
+TEST_GEN_PROGS_riscv += set_memory_region_test
+TEST_GEN_PROGS_riscv += steal_time
+
+SPLIT_TESTS += arch_timer
+SPLIT_TESTS += get-reg-list
+
+TEST_PROGS += $(TEST_PROGS_$(ARCH))
+TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH))
+TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH))
+LIBKVM += $(LIBKVM_$(ARCH))
+
+OVERRIDE_TARGETS = 1
+
+# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most
+# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
+# which causes the environment variable to override the makefile).
+include ../lib.mk
+
+INSTALL_HDR_PATH = $(top_srcdir)/usr
+LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
+LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
+CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
+	-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
+	-fno-builtin-memcmp -fno-builtin-memcpy \
+	-fno-builtin-memset -fno-builtin-strnlen \
+	-fno-stack-protector -fno-PIE -fno-strict-aliasing \
+	-I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \
+	-I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH) \
+	-I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
+ifeq ($(ARCH),s390)
+	CFLAGS += -march=z10
+endif
+ifeq ($(ARCH),x86)
+ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0)
+	CFLAGS += -march=x86-64-v2
+endif
+endif
+ifeq ($(ARCH),arm64)
+tools_dir := $(top_srcdir)/tools
+arm64_tools_dir := $(tools_dir)/arch/arm64/tools/
+
+ifneq ($(abs_objdir),)
+arm64_hdr_outdir := $(abs_objdir)/tools/
+else
+arm64_hdr_outdir := $(tools_dir)/
+endif
+
+GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/
+CFLAGS += -I$(GEN_HDRS)
+
+$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*)
+	$(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir)
+endif
+
+no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \
+        $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+
+# On s390, build the testcases KVM-enabled
+pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \
+	$(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
+
+LDLIBS += -ldl
+LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
+
+LIBKVM_C := $(filter %.c,$(LIBKVM))
+LIBKVM_S := $(filter %.S,$(LIBKVM))
+LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
+LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
+LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
+SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
+SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS))
+
+TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS))
+TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
+TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ))
+-include $(TEST_DEP_FILES)
+
+$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
+
+$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \
+$(TEST_GEN_PROGS_EXTENDED): %: %.o
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@
+$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH)/%.o
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
+$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH)/%.o: $(ARCH)/%.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(GEN_HDRS) \
+	       $(LIBKVM_OBJS) \
+	       $(SPLIT_TEST_GEN_OBJ) \
+	       $(TEST_DEP_FILES) \
+	       $(TEST_GEN_OBJ) \
+	       cscope.*
+
+$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS)
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS)
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+# Compile the string overrides as freestanding to prevent the compiler from
+# generating self-referential code, e.g. without "freestanding" the compiler may
+# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion.
+$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
+
+$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
+$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS)
+$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
+$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
+$(TEST_GEN_OBJ): $(GEN_HDRS)
+
+cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
+cscope:
+	$(RM) cscope.*
+	(find $(include_paths) -name '*.h' \
+		-exec realpath --relative-base=$(PWD) {} \;; \
+	find . -name '*.c' \
+		-exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files
+	cscope -b
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 3c7defd34f56..447e619cf856 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -239,7 +239,7 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args)
 		case ITERATION_MARK_IDLE:
 			mark_vcpu_memory_idle(vm, vcpu_args);
 			break;
-		};
+		}
 
 		vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
 	}
diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
index 8e5bd07a3727..cef8f7323ceb 100644
--- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/aarch32_id_regs.c
@@ -97,7 +97,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu)
 		uint64_t reg_id = raz_wi_reg_ids[i];
 		uint64_t val;
 
-		vcpu_get_reg(vcpu, reg_id, &val);
+		val = vcpu_get_reg(vcpu, reg_id);
 		TEST_ASSERT_EQ(val, 0);
 
 		/*
@@ -106,7 +106,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu)
 		 */
 		vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
 
-		vcpu_get_reg(vcpu, reg_id, &val);
+		val = vcpu_get_reg(vcpu, reg_id);
 		TEST_ASSERT_EQ(val, 0);
 	}
 }
@@ -126,14 +126,14 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
 		uint64_t reg_id = raz_invariant_reg_ids[i];
 		uint64_t val;
 
-		vcpu_get_reg(vcpu, reg_id, &val);
+		val = vcpu_get_reg(vcpu, reg_id);
 		TEST_ASSERT_EQ(val, 0);
 
 		r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
 		TEST_ASSERT(r < 0 && errno == EINVAL,
 			    "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
 
-		vcpu_get_reg(vcpu, reg_id, &val);
+		val = vcpu_get_reg(vcpu, reg_id);
 		TEST_ASSERT_EQ(val, 0);
 	}
 }
@@ -144,10 +144,10 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
 {
 	uint64_t val, el0;
 
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
+	val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
 
 	el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
-	return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY;
+	return el0 == ID_AA64PFR0_EL1_EL0_IMP;
 }
 
 int main(void)
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/arm64/arch_timer.c
index eeba1cc87ff8..eeba1cc87ff8 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer.c
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
index a36a7e2db434..a36a7e2db434 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c
+++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
index ff7a949fc96a..c7fb55c9135b 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c
@@ -501,7 +501,7 @@ void test_single_step_from_userspace(int test_cnt)
 		TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG");
 
 		/* Check if the current pc is expected. */
-		vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc);
+		pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
 		TEST_ASSERT(!test_pc || pc == test_pc,
 			    "Unexpected pc 0x%lx (expected 0x%lx)",
 			    pc, test_pc);
@@ -583,7 +583,7 @@ int main(int argc, char *argv[])
 	uint64_t aa64dfr0;
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &aa64dfr0);
+	aa64dfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
 	__TEST_REQUIRE(debug_version(aa64dfr0) >= 6,
 		       "Armv8 debug architecture not supported.");
 	kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c
index d43fb3f49050..d01798b6b3b4 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c
@@ -332,6 +332,7 @@ static __u64 base_regs[] = {
 	KVM_REG_ARM_FW_FEAT_BMAP_REG(0),	/* KVM_REG_ARM_STD_BMAP */
 	KVM_REG_ARM_FW_FEAT_BMAP_REG(1),	/* KVM_REG_ARM_STD_HYP_BMAP */
 	KVM_REG_ARM_FW_FEAT_BMAP_REG(2),	/* KVM_REG_ARM_VENDOR_HYP_BMAP */
+	KVM_REG_ARM_FW_FEAT_BMAP_REG(3),	/* KVM_REG_ARM_VENDOR_HYP_BMAP_2 */
 	ARM64_SYS_REG(3, 3, 14, 3, 1),	/* CNTV_CTL_EL0 */
 	ARM64_SYS_REG(3, 3, 14, 3, 2),	/* CNTV_CVAL_EL0 */
 	ARM64_SYS_REG(3, 3, 14, 0, 2),
diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/arm64/hypercalls.c
index 9d192ce0078d..44cfcf8a7f46 100644
--- a/tools/testing/selftests/kvm/aarch64/hypercalls.c
+++ b/tools/testing/selftests/kvm/arm64/hypercalls.c
@@ -21,22 +21,31 @@
 #define KVM_REG_ARM_STD_BMAP_BIT_MAX		0
 #define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX	0
 #define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX	1
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_BIT_MAX   1
+
+#define KVM_REG_ARM_STD_BMAP_RESET_VAL		FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_BMAP_BIT_MAX)
+#define KVM_REG_ARM_STD_HYP_BMAP_RESET_VAL	FW_REG_ULIMIT_VAL(KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX)
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_RESET_VAL	FW_REG_ULIMIT_VAL(KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX)
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_2_RESET_VAL 0
 
 struct kvm_fw_reg_info {
 	uint64_t reg;		/* Register definition */
 	uint64_t max_feat_bit;	/* Bit that represents the upper limit of the feature-map */
+	uint64_t reset_val;	/* Reset value for the register */
 };
 
 #define FW_REG_INFO(r)			\
 	{					\
 		.reg = r,			\
 		.max_feat_bit = r##_BIT_MAX,	\
+		.reset_val = r##_RESET_VAL	\
 	}
 
 static const struct kvm_fw_reg_info fw_reg_info[] = {
 	FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
 	FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
 	FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
+	FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP_2),
 };
 
 enum test_stage {
@@ -171,22 +180,39 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
 
 	for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
 		const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+		uint64_t set_val;
+
+		/* First 'read' should be the reset value for the reg  */
+		val = vcpu_get_reg(vcpu, reg_info->reg);
+		TEST_ASSERT(val == reg_info->reset_val,
+			"Unexpected reset value for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
+			reg_info->reg, reg_info->reset_val, val);
 
-		/* First 'read' should be an upper limit of the features supported */
-		vcpu_get_reg(vcpu, reg_info->reg, &val);
-		TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
-			"Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
-			reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
+		if (reg_info->reset_val)
+			set_val = 0;
+		else
+			set_val = FW_REG_ULIMIT_VAL(reg_info->max_feat_bit);
 
-		/* Test a 'write' by disabling all the features of the register map */
-		ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+		ret = __vcpu_set_reg(vcpu, reg_info->reg, set_val);
 		TEST_ASSERT(ret == 0,
+			"Failed to %s all the features of reg: 0x%lx; ret: %d",
+			(set_val ? "set" : "clear"), reg_info->reg, errno);
+
+		val = vcpu_get_reg(vcpu, reg_info->reg);
+		TEST_ASSERT(val == set_val,
+			"Expected all the features to be %s for reg: 0x%lx",
+			(set_val ? "set" : "cleared"), reg_info->reg);
+
+		/*
+		 * If the reg has been set, clear it as test_fw_regs_after_vm_start()
+		 * expects it to be cleared.
+		 */
+		if (set_val) {
+			ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+			TEST_ASSERT(ret == 0,
 			"Failed to clear all the features of reg: 0x%lx; ret: %d",
 			reg_info->reg, errno);
-
-		vcpu_get_reg(vcpu, reg_info->reg, &val);
-		TEST_ASSERT(val == 0,
-			"Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
+		}
 
 		/*
 		 * Test enabling a feature that's not supported.
@@ -214,7 +240,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
 		 * Before starting the VM, the test clears all the bits.
 		 * Check if that's still the case.
 		 */
-		vcpu_get_reg(vcpu, reg_info->reg, &val);
+		val = vcpu_get_reg(vcpu, reg_info->reg);
 		TEST_ASSERT(val == 0,
 			"Expected all the features to be cleared for reg: 0x%lx",
 			reg_info->reg);
diff --git a/tools/testing/selftests/kvm/aarch64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c
index 8b7a80a51b1c..8b7a80a51b1c 100644
--- a/tools/testing/selftests/kvm/aarch64/mmio_abort.c
+++ b/tools/testing/selftests/kvm/arm64/mmio_abort.c
diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
index 58304bbc2036..ebd70430c89d 100644
--- a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c
+++ b/tools/testing/selftests/kvm/arm64/no-vgic-v3.c
@@ -164,7 +164,7 @@ int main(int argc, char *argv[])
 	uint64_t pfr0;
 
 	vm = vm_create_with_one_vcpu(&vcpu, NULL);
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &pfr0);
+	pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
 	__TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
 		       "GICv3 not supported.");
 	kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c
index ec33a8f9c908..ec33a8f9c908 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c
diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/arm64/psci_test.c
index eaa7655fefc1..ab491ee9e5f7 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/arm64/psci_test.c
@@ -111,8 +111,8 @@ static void assert_vcpu_reset(struct kvm_vcpu *vcpu)
 {
 	uint64_t obs_pc, obs_x0;
 
-	vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &obs_pc);
-	vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]), &obs_x0);
+	obs_pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
+	obs_x0 = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]));
 
 	TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
 		    "unexpected target cpu pc: %lx (expected: %lx)",
@@ -152,7 +152,7 @@ static void host_test_cpu_on(void)
 	 */
 	vcpu_power_off(target);
 
-	vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
+	target_mpidr = vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1));
 	vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK);
 	enter_guest(source);
 
@@ -244,7 +244,7 @@ static void host_test_system_off2(void)
 
 	setup_vm(guest_test_system_off2, &source, &target);
 
-	vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION, &psci_version);
+	psci_version = vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION);
 
 	TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3),
 		    "Unexpected PSCI version %lu.%lu",
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 3a97c160b5fe..322b9d3b0125 100644
--- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -146,6 +146,9 @@ static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = {
 static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
+	REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN4_2, 1),
+	REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN64_2, 1),
+	REG_FTR_BITS(FTR_EXACT, ID_AA64MMFR0_EL1, TGRAN16_2, 1),
 	S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
 	S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
 	REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
@@ -230,6 +233,9 @@ static void guest_code(void)
 	GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
 	GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
 	GUEST_REG_SYNC(SYS_CTR_EL0);
+	GUEST_REG_SYNC(SYS_MIDR_EL1);
+	GUEST_REG_SYNC(SYS_REVIDR_EL1);
+	GUEST_REG_SYNC(SYS_AIDR_EL1);
 
 	GUEST_DONE();
 }
@@ -345,7 +351,7 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
 	uint64_t mask = ftr_bits->mask;
 	uint64_t val, new_val, ftr;
 
-	vcpu_get_reg(vcpu, reg, &val);
+	val = vcpu_get_reg(vcpu, reg);
 	ftr = (val & mask) >> shift;
 
 	ftr = get_safe_value(ftr_bits, ftr);
@@ -355,7 +361,7 @@ static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
 	val |= ftr;
 
 	vcpu_set_reg(vcpu, reg, val);
-	vcpu_get_reg(vcpu, reg, &new_val);
+	new_val = vcpu_get_reg(vcpu, reg);
 	TEST_ASSERT_EQ(new_val, val);
 
 	return new_val;
@@ -369,7 +375,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
 	uint64_t val, old_val, ftr;
 	int r;
 
-	vcpu_get_reg(vcpu, reg, &val);
+	val = vcpu_get_reg(vcpu, reg);
 	ftr = (val & mask) >> shift;
 
 	ftr = get_invalid_value(ftr_bits, ftr);
@@ -383,7 +389,7 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
 	TEST_ASSERT(r < 0 && errno == EINVAL,
 		    "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
 
-	vcpu_get_reg(vcpu, reg, &val);
+	val = vcpu_get_reg(vcpu, reg);
 	TEST_ASSERT_EQ(val, old_val);
 }
 
@@ -470,7 +476,7 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
 	}
 
 	/* Get the id register value */
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
+	val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
 
 	/* Try to set MPAM=0. This should always be possible. */
 	val &= ~ID_AA64PFR0_EL1_MPAM_MASK;
@@ -507,7 +513,7 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
 	}
 
 	/* Get the id register value */
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), &val);
+	val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
 
 	/* Try to set MPAM_frac=0. This should always be possible. */
 	val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK;
@@ -575,7 +581,7 @@ static void test_clidr(struct kvm_vcpu *vcpu)
 	uint64_t clidr;
 	int level;
 
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), &clidr);
+	clidr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1));
 
 	/* find the first empty level in the cache hierarchy */
 	for (level = 1; level < 7; level++) {
@@ -600,7 +606,7 @@ static void test_ctr(struct kvm_vcpu *vcpu)
 {
 	u64 ctr;
 
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), &ctr);
+	ctr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0));
 	ctr &= ~CTR_EL0_DIC_MASK;
 	if (ctr & CTR_EL0_IminLine_MASK)
 		ctr--;
@@ -609,18 +615,31 @@ static void test_ctr(struct kvm_vcpu *vcpu)
 	test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr;
 }
 
-static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+static void test_id_reg(struct kvm_vcpu *vcpu, u32 id)
 {
 	u64 val;
 
+	val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(id));
+	val++;
+	vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(id), val);
+	test_reg_vals[encoding_to_range_idx(id)] = val;
+}
+
+static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
 	test_clidr(vcpu);
 	test_ctr(vcpu);
 
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &val);
-	val++;
-	vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val);
+	test_id_reg(vcpu, SYS_MPIDR_EL1);
+	ksft_test_result_pass("%s\n", __func__);
+}
+
+static void test_vcpu_non_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
+	test_id_reg(vcpu, SYS_MIDR_EL1);
+	test_id_reg(vcpu, SYS_REVIDR_EL1);
+	test_id_reg(vcpu, SYS_AIDR_EL1);
 
-	test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val;
 	ksft_test_result_pass("%s\n", __func__);
 }
 
@@ -629,7 +648,7 @@ static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encodin
 	size_t idx = encoding_to_range_idx(encoding);
 	uint64_t observed;
 
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding), &observed);
+	observed = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding));
 	TEST_ASSERT_EQ(test_reg_vals[idx], observed);
 }
 
@@ -647,6 +666,9 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
 	test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1);
 	test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
 	test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0);
+	test_assert_id_reg_unchanged(vcpu, SYS_MIDR_EL1);
+	test_assert_id_reg_unchanged(vcpu, SYS_REVIDR_EL1);
+	test_assert_id_reg_unchanged(vcpu, SYS_AIDR_EL1);
 
 	ksft_test_result_pass("%s\n", __func__);
 }
@@ -660,13 +682,16 @@ int main(void)
 	int test_cnt;
 
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_WRITABLE_IMP_ID_REGS));
 
-	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	vm = vm_create(1);
+	vm_enable_cap(vm, KVM_CAP_ARM_WRITABLE_IMP_ID_REGS, 0);
+	vcpu = vm_vcpu_add(vm, 0, guest_code);
 
 	/* Check for AARCH64 only system */
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
+	val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1));
 	el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
-	aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+	aarch64_only = (el0 == ID_AA64PFR0_EL1_EL0_IMP);
 
 	ksft_print_header();
 
@@ -675,13 +700,14 @@ int main(void)
 		   ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
 		   ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
 		   ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
-		   ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 +
+		   ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 +
 		   MPAM_IDREG_TEST;
 
 	ksft_set_plan(test_cnt);
 
 	test_vm_ftr_id_regs(vcpu, aarch64_only);
 	test_vcpu_ftr_id_regs(vcpu);
+	test_vcpu_non_ftr_id_regs(vcpu);
 	test_user_set_mpam_reg(vcpu);
 
 	test_guest_reg_read(vcpu);
diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/arm64/smccc_filter.c
index 2d189f3da228..2d189f3da228 100644
--- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c
+++ b/tools/testing/selftests/kvm/arm64/smccc_filter.c
diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c
index 80b74c6f152b..80b74c6f152b 100644
--- a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
+++ b/tools/testing/selftests/kvm/arm64/vcpu_width_config.c
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c
index b3b5fb0ff0a9..b3b5fb0ff0a9 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_init.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_init.c
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c
index f4ac28d53747..f4ac28d53747 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
index fc4fe52fb6f8..fc4fe52fb6f8 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c
+++ b/tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c
diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
index f9c0c86d7e85..f16b3b27e32e 100644
--- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
+++ b/tools/testing/selftests/kvm/arm64/vpmu_counter_access.c
@@ -440,8 +440,7 @@ static void create_vpmu_vm(void *guest_code)
 		       "Failed to create vgic-v3, skipping");
 
 	/* Make sure that PMUv3 support is indicated in the ID register */
-	vcpu_get_reg(vpmu_vm.vcpu,
-		     KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &dfr0);
+	dfr0 = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1));
 	pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
 	TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
 		    pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
@@ -484,7 +483,7 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
 	create_vpmu_vm(guest_code);
 	vcpu = vpmu_vm.vcpu;
 
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr_orig);
+	pmcr_orig = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
 	pmcr = pmcr_orig;
 
 	/*
@@ -493,7 +492,7 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
 	 */
 	set_pmcr_n(&pmcr, pmcr_n);
 	vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr);
+	pmcr = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
 
 	if (expect_fail)
 		TEST_ASSERT(pmcr_orig == pmcr,
@@ -521,7 +520,7 @@ static void run_access_test(uint64_t pmcr_n)
 	vcpu = vpmu_vm.vcpu;
 
 	/* Save the initial sp to restore them later to run the guest again */
-	vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1), &sp);
+	sp = vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1));
 
 	run_vcpu(vcpu, pmcr_n);
 
@@ -572,12 +571,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n)
 		 * Test if the 'set' and 'clr' variants of the registers
 		 * are initialized based on the number of valid counters.
 		 */
-		vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
+		reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
 		TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
 			    "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
 			    KVM_ARM64_SYS_REG(set_reg_id), reg_val);
 
-		vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
+		reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
 		TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
 			    "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
 			    KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
@@ -589,12 +588,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n)
 		 */
 		vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask);
 
-		vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
+		reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id));
 		TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
 			    "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
 			    KVM_ARM64_SYS_REG(set_reg_id), reg_val);
 
-		vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
+		reg_val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id));
 		TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
 			    "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
 			    KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
@@ -625,7 +624,7 @@ static uint64_t get_pmcr_n_limit(void)
 	uint64_t pmcr;
 
 	create_vpmu_vm(guest_code);
-	vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr);
+	pmcr = vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0));
 	destroy_vpmu_vm();
 	return get_pmcr_n(pmcr);
 }
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 9f24303acb8c..e79817bd0e29 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -21,7 +21,7 @@
 #include "ucall_common.h"
 
 #ifdef __aarch64__
-#include "aarch64/vgic.h"
+#include "arm64/vgic.h"
 
 static int gic_fd;
 
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index aacf80f57439..23593d9eeba9 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -31,15 +31,18 @@
 /* Default guest test virtual memory offset */
 #define DEFAULT_GUEST_TEST_MEM		0xc0000000
 
-/* How many pages to dirty for each guest loop */
-#define TEST_PAGES_PER_LOOP		1024
-
 /* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
 #define TEST_HOST_LOOP_N		32UL
 
 /* Interval for each host loop (ms) */
 #define TEST_HOST_LOOP_INTERVAL		10UL
 
+/*
+ * Ensure the vCPU is able to perform a reasonable number of writes in each
+ * iteration to provide a lower bound on coverage.
+ */
+#define TEST_MIN_WRITES_PER_ITERATION	0x100
+
 /* Dirty bitmaps are always little endian, so we need to swap on big endian */
 #if defined(__s390x__)
 # define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
@@ -75,6 +78,8 @@ static uint64_t host_page_size;
 static uint64_t guest_page_size;
 static uint64_t guest_num_pages;
 static uint64_t iteration;
+static uint64_t nr_writes;
+static bool vcpu_stop;
 
 /*
  * Guest physical memory offset of the testing memory slot.
@@ -96,7 +101,9 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
 static void guest_code(void)
 {
 	uint64_t addr;
-	int i;
+
+#ifdef __s390x__
+	uint64_t i;
 
 	/*
 	 * On s390x, all pages of a 1M segment are initially marked as dirty
@@ -107,16 +114,19 @@ static void guest_code(void)
 	for (i = 0; i < guest_num_pages; i++) {
 		addr = guest_test_virt_mem + i * guest_page_size;
 		vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+		nr_writes++;
 	}
+#endif
 
 	while (true) {
-		for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
+		while (!READ_ONCE(vcpu_stop)) {
 			addr = guest_test_virt_mem;
 			addr += (guest_random_u64(&guest_rng) % guest_num_pages)
 				* guest_page_size;
 			addr = align_down(addr, host_page_size);
 
 			vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
+			nr_writes++;
 		}
 
 		GUEST_SYNC(1);
@@ -133,25 +143,18 @@ static uint64_t host_num_pages;
 /* For statistics only */
 static uint64_t host_dirty_count;
 static uint64_t host_clear_count;
-static uint64_t host_track_next_count;
 
 /* Whether dirty ring reset is requested, or finished */
 static sem_t sem_vcpu_stop;
 static sem_t sem_vcpu_cont;
-/*
- * This is only set by main thread, and only cleared by vcpu thread.  It is
- * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC
- * is the only place that we'll guarantee both "dirty bit" and "dirty data"
- * will match.  E.g., SIG_IPI won't guarantee that if the vcpu is interrupted
- * after setting dirty bit but before the data is written.
- */
-static atomic_t vcpu_sync_stop_requested;
+
 /*
  * This is updated by the vcpu thread to tell the host whether it's a
  * ring-full event.  It should only be read until a sem_wait() of
  * sem_vcpu_stop and before vcpu continues to run.
  */
 static bool dirty_ring_vcpu_ring_full;
+
 /*
  * This is only used for verifying the dirty pages.  Dirty ring has a very
  * tricky case when the ring just got full, kvm will do userspace exit due to
@@ -166,7 +169,51 @@ static bool dirty_ring_vcpu_ring_full;
  * dirty gfn we've collected, so that if a mismatch of data found later in the
  * verifying process, we let it pass.
  */
-static uint64_t dirty_ring_last_page;
+static uint64_t dirty_ring_last_page = -1ULL;
+
+/*
+ * In addition to the above, it is possible (especially if this
+ * test is run nested) for the above scenario to repeat multiple times:
+ *
+ * The following can happen:
+ *
+ * - L1 vCPU:        Memory write is logged to PML but not committed.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry
+ *                   Resets the dirty ring which:
+ *                     - Resets the A/D bits in EPT
+ *                     - Issues tlb flush (invept), which is intercepted by L0
+ *
+ * - L0: frees the whole nested ept mmu root as the response to invept,
+ *       and thus ensures that when memory write is retried, it will fault again
+ *
+ * - L1 vCPU:        Same memory write is logged to the PML but not committed again.
+ *
+ * - L1 test thread: Ignores the write because its last dirty ring entry (again)
+ *                   Resets the dirty ring which:
+ *                     - Resets the A/D bits in EPT (again)
+ *                     - Issues tlb flush (again) which is intercepted by L0
+ *
+ * ...
+ *
+ * N times
+ *
+ * - L1 vCPU:        Memory write is logged in the PML and then committed.
+ *                   Lots of other memory writes are logged and committed.
+ * ...
+ *
+ * - L1 test thread: Sees the memory write along with other memory writes
+ *                   in the dirty ring, and since the write is usually not
+ *                   the last entry in the dirty-ring and has a very outdated
+ *                   iteration, the test fails.
+ *
+ *
+ * Note that this is only possible when the write was the last log entry
+ * write during iteration N-1, thus remember last iteration last log entry
+ * and also don't fail when it is reported in the next iteration, together with
+ * an outdated iteration count.
+ */
+static uint64_t dirty_ring_prev_iteration_last_page;
 
 enum log_mode_t {
 	/* Only use KVM_GET_DIRTY_LOG for logging */
@@ -191,24 +238,6 @@ static enum log_mode_t host_log_mode;
 static pthread_t vcpu_thread;
 static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
 
-static void vcpu_kick(void)
-{
-	pthread_kill(vcpu_thread, SIG_IPI);
-}
-
-/*
- * In our test we do signal tricks, let's use a better version of
- * sem_wait to avoid signal interrupts
- */
-static void sem_wait_until(sem_t *sem)
-{
-	int ret;
-
-	do
-		ret = sem_wait(sem);
-	while (ret == -1 && errno == EINTR);
-}
-
 static bool clear_log_supported(void)
 {
 	return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
@@ -243,21 +272,16 @@ static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
 /* Should only be called after a GUEST_SYNC */
 static void vcpu_handle_sync_stop(void)
 {
-	if (atomic_read(&vcpu_sync_stop_requested)) {
-		/* It means main thread is sleeping waiting */
-		atomic_set(&vcpu_sync_stop_requested, false);
+	if (READ_ONCE(vcpu_stop)) {
 		sem_post(&sem_vcpu_stop);
-		sem_wait_until(&sem_vcpu_cont);
+		sem_wait(&sem_vcpu_cont);
 	}
 }
 
-static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void default_after_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 
-	TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
-		    "vcpu run failed: errno=%d", err);
-
 	TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
 		    "Invalid guest sync status: exit_reason=%s",
 		    exit_reason_str(run->exit_reason));
@@ -324,7 +348,6 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
 			    "%u != %u", cur->slot, slot);
 		TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
 			    "0x%llx >= 0x%x", cur->offset, num_pages);
-		//pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
 		__set_bit_le(cur->offset, bitmap);
 		dirty_ring_last_page = cur->offset;
 		dirty_gfn_set_collected(cur);
@@ -335,36 +358,11 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
 	return count;
 }
 
-static void dirty_ring_wait_vcpu(void)
-{
-	/* This makes sure that hardware PML cache flushed */
-	vcpu_kick();
-	sem_wait_until(&sem_vcpu_stop);
-}
-
-static void dirty_ring_continue_vcpu(void)
-{
-	pr_info("Notifying vcpu to continue\n");
-	sem_post(&sem_vcpu_cont);
-}
-
 static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
 					   void *bitmap, uint32_t num_pages,
 					   uint32_t *ring_buf_idx)
 {
-	uint32_t count = 0, cleared;
-	bool continued_vcpu = false;
-
-	dirty_ring_wait_vcpu();
-
-	if (!dirty_ring_vcpu_ring_full) {
-		/*
-		 * This is not a ring-full event, it's safe to allow
-		 * vcpu to continue
-		 */
-		dirty_ring_continue_vcpu();
-		continued_vcpu = true;
-	}
+	uint32_t count, cleared;
 
 	/* Only have one vcpu */
 	count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
@@ -379,35 +377,18 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
 	 */
 	TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
 		    "with collected (%u)", cleared, count);
-
-	if (!continued_vcpu) {
-		TEST_ASSERT(dirty_ring_vcpu_ring_full,
-			    "Didn't continue vcpu even without ring full");
-		dirty_ring_continue_vcpu();
-	}
-
-	pr_info("Iteration %ld collected %u pages\n", iteration, count);
 }
 
-static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 
 	/* A ucall-sync or ring-full event is allowed */
 	if (get_ucall(vcpu, NULL) == UCALL_SYNC) {
-		/* We should allow this to continue */
-		;
-	} else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
-		   (ret == -1 && err == EINTR)) {
-		/* Update the flag first before pause */
-		WRITE_ONCE(dirty_ring_vcpu_ring_full,
-			   run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
-		sem_post(&sem_vcpu_stop);
-		pr_info("vcpu stops because %s...\n",
-			dirty_ring_vcpu_ring_full ?
-			"dirty ring is full" : "vcpu is kicked out");
-		sem_wait_until(&sem_vcpu_cont);
-		pr_info("vcpu continues now.\n");
+		vcpu_handle_sync_stop();
+	} else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL) {
+		WRITE_ONCE(dirty_ring_vcpu_ring_full, true);
+		vcpu_handle_sync_stop();
 	} else {
 		TEST_ASSERT(false, "Invalid guest sync status: "
 			    "exit_reason=%s",
@@ -426,7 +407,7 @@ struct log_mode {
 				     void *bitmap, uint32_t num_pages,
 				     uint32_t *ring_buf_idx);
 	/* Hook to call when after each vcpu run */
-	void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
+	void (*after_vcpu_run)(struct kvm_vcpu *vcpu);
 } log_modes[LOG_MODE_NUM] = {
 	{
 		.name = "dirty-log",
@@ -449,15 +430,6 @@ struct log_mode {
 	},
 };
 
-/*
- * We use this bitmap to track some pages that should have its dirty
- * bit set in the _next_ iteration.  For example, if we detected the
- * page value changed to current iteration but at the same time the
- * page bit is cleared in the latest bitmap, then the system must
- * report that write in the next get dirty log call.
- */
-static unsigned long *host_bmap_track;
-
 static void log_modes_dump(void)
 {
 	int i;
@@ -497,170 +469,109 @@ static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
 	mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx);
 }
 
-static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct log_mode *mode = &log_modes[host_log_mode];
 
 	if (mode->after_vcpu_run)
-		mode->after_vcpu_run(vcpu, ret, err);
+		mode->after_vcpu_run(vcpu);
 }
 
 static void *vcpu_worker(void *data)
 {
-	int ret;
 	struct kvm_vcpu *vcpu = data;
-	uint64_t pages_count = 0;
-	struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
-						 + sizeof(sigset_t));
-	sigset_t *sigset = (sigset_t *) &sigmask->sigset;
 
-	/*
-	 * SIG_IPI is unblocked atomically while in KVM_RUN.  It causes the
-	 * ioctl to return with -EINTR, but it is still pending and we need
-	 * to accept it with the sigwait.
-	 */
-	sigmask->len = 8;
-	pthread_sigmask(0, NULL, sigset);
-	sigdelset(sigset, SIG_IPI);
-	vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask);
-
-	sigemptyset(sigset);
-	sigaddset(sigset, SIG_IPI);
+	sem_wait(&sem_vcpu_cont);
 
 	while (!READ_ONCE(host_quit)) {
-		/* Clear any existing kick signals */
-		pages_count += TEST_PAGES_PER_LOOP;
 		/* Let the guest dirty the random pages */
-		ret = __vcpu_run(vcpu);
-		if (ret == -1 && errno == EINTR) {
-			int sig = -1;
-			sigwait(sigset, &sig);
-			assert(sig == SIG_IPI);
-		}
-		log_mode_after_vcpu_run(vcpu, ret, errno);
+		vcpu_run(vcpu);
+		log_mode_after_vcpu_run(vcpu);
 	}
 
-	pr_info("Dirtied %"PRIu64" pages\n", pages_count);
-
 	return NULL;
 }
 
-static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
+static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long **bmap)
 {
+	uint64_t page, nr_dirty_pages = 0, nr_clean_pages = 0;
 	uint64_t step = vm_num_host_pages(mode, 1);
-	uint64_t page;
-	uint64_t *value_ptr;
-	uint64_t min_iter = 0;
 
 	for (page = 0; page < host_num_pages; page += step) {
-		value_ptr = host_test_mem + page * host_page_size;
-
-		/* If this is a special page that we were tracking... */
-		if (__test_and_clear_bit_le(page, host_bmap_track)) {
-			host_track_next_count++;
-			TEST_ASSERT(test_bit_le(page, bmap),
-				    "Page %"PRIu64" should have its dirty bit "
-				    "set in this iteration but it is missing",
-				    page);
-		}
+		uint64_t val = *(uint64_t *)(host_test_mem + page * host_page_size);
+		bool bmap0_dirty = __test_and_clear_bit_le(page, bmap[0]);
 
-		if (__test_and_clear_bit_le(page, bmap)) {
-			bool matched;
-
-			host_dirty_count++;
+		/*
+		 * Ensure both bitmaps are cleared, as a page can be written
+		 * multiple times per iteration, i.e. can show up in both
+		 * bitmaps, and the dirty ring is additive, i.e. doesn't purge
+		 * bitmap entries from previous collections.
+		 */
+		if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) {
+			nr_dirty_pages++;
 
 			/*
-			 * If the bit is set, the value written onto
-			 * the corresponding page should be either the
-			 * previous iteration number or the current one.
+			 * If the page is dirty, the value written to memory
+			 * should be the current iteration number.
 			 */
-			matched = (*value_ptr == iteration ||
-				   *value_ptr == iteration - 1);
-
-			if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
-				if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
-					/*
-					 * Short answer: this case is special
-					 * only for dirty ring test where the
-					 * page is the last page before a kvm
-					 * dirty ring full in iteration N-2.
-					 *
-					 * Long answer: Assuming ring size R,
-					 * one possible condition is:
-					 *
-					 *      main thr       vcpu thr
-					 *      --------       --------
-					 *    iter=1
-					 *                   write 1 to page 0~(R-1)
-					 *                   full, vmexit
-					 *    collect 0~(R-1)
-					 *    kick vcpu
-					 *                   write 1 to (R-1)~(2R-2)
-					 *                   full, vmexit
-					 *    iter=2
-					 *    collect (R-1)~(2R-2)
-					 *    kick vcpu
-					 *                   write 1 to (2R-2)
-					 *                   (NOTE!!! "1" cached in cpu reg)
-					 *                   write 2 to (2R-1)~(3R-3)
-					 *                   full, vmexit
-					 *    iter=3
-					 *    collect (2R-2)~(3R-3)
-					 *    (here if we read value on page
-					 *     "2R-2" is 1, while iter=3!!!)
-					 *
-					 * This however can only happen once per iteration.
-					 */
-					min_iter = iteration - 1;
+			if (val == iteration)
+				continue;
+
+			if (host_log_mode == LOG_MODE_DIRTY_RING) {
+				/*
+				 * The last page in the ring from previous
+				 * iteration can be written with the value
+				 * from the previous iteration, as the value to
+				 * be written may be cached in a CPU register.
+				 */
+				if (page == dirty_ring_prev_iteration_last_page &&
+				    val == iteration - 1)
 					continue;
-				} else if (page == dirty_ring_last_page) {
-					/*
-					 * Please refer to comments in
-					 * dirty_ring_last_page.
-					 */
+
+				/*
+				 * Any value from a previous iteration is legal
+				 * for the last entry, as the write may not yet
+				 * have retired, i.e. the page may hold whatever
+				 * it had before this iteration started.
+				 */
+				if (page == dirty_ring_last_page &&
+				    val < iteration)
 					continue;
-				}
+			} else if (!val && iteration == 1 && bmap0_dirty) {
+				/*
+				 * When testing get+clear, the dirty bitmap
+				 * starts with all bits set, and so the first
+				 * iteration can observe a "dirty" page that
+				 * was never written, but only in the first
+				 * bitmap (collecting the bitmap also clears
+				 * all dirty pages).
+				 */
+				continue;
 			}
 
-			TEST_ASSERT(matched,
-				    "Set page %"PRIu64" value %"PRIu64
-				    " incorrect (iteration=%"PRIu64")",
-				    page, *value_ptr, iteration);
+			TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) "
+				  "(last = %lu, prev_last = %lu)",
+				  page, val, iteration, dirty_ring_last_page,
+				  dirty_ring_prev_iteration_last_page);
 		} else {
-			host_clear_count++;
+			nr_clean_pages++;
 			/*
 			 * If cleared, the value written can be any
-			 * value smaller or equals to the iteration
-			 * number.  Note that the value can be exactly
-			 * (iteration-1) if that write can happen
-			 * like this:
-			 *
-			 * (1) increase loop count to "iteration-1"
-			 * (2) write to page P happens (with value
-			 *     "iteration-1")
-			 * (3) get dirty log for "iteration-1"; we'll
-			 *     see that page P bit is set (dirtied),
-			 *     and not set the bit in host_bmap_track
-			 * (4) increase loop count to "iteration"
-			 *     (which is current iteration)
-			 * (5) get dirty log for current iteration,
-			 *     we'll see that page P is cleared, with
-			 *     value "iteration-1".
+			 * value smaller than the iteration number.
 			 */
-			TEST_ASSERT(*value_ptr <= iteration,
-				    "Clear page %"PRIu64" value %"PRIu64
-				    " incorrect (iteration=%"PRIu64")",
-				    page, *value_ptr, iteration);
-			if (*value_ptr == iteration) {
-				/*
-				 * This page is _just_ modified; it
-				 * should report its dirtyness in the
-				 * next run
-				 */
-				__set_bit_le(page, host_bmap_track);
-			}
+			TEST_ASSERT(val < iteration,
+				    "Clear page %lu value (%lu) >= iteration (%lu) "
+				    "(last = %lu, prev_last = %lu)",
+				    page, val, iteration, dirty_ring_last_page,
+				    dirty_ring_prev_iteration_last_page);
 		}
 	}
+
+	pr_info("Iteration %2ld: dirty: %-6lu clean: %-6lu writes: %-6lu\n",
+		iteration, nr_dirty_pages, nr_clean_pages, nr_writes);
+
+	host_dirty_count += nr_dirty_pages;
+	host_clear_count += nr_clean_pages;
 }
 
 static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
@@ -688,7 +599,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	struct test_params *p = arg;
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	unsigned long *bmap;
+	unsigned long *bmap[2];
 	uint32_t ring_buf_idx = 0;
 	int sem_val;
 
@@ -731,12 +642,21 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 #ifdef __s390x__
 	/* Align to 1M (segment size) */
 	guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20);
+
+	/*
+	 * The workaround in guest_code() to write all pages prior to the first
+	 * iteration isn't compatible with the dirty ring, as the dirty ring
+	 * support relies on the vCPU to actually stop when vcpu_stop is set so
+	 * that the vCPU doesn't hang waiting for the dirty ring to be emptied.
+	 */
+	TEST_ASSERT(host_log_mode != LOG_MODE_DIRTY_RING,
+		    "Test needs to be updated to support s390 dirty ring");
 #endif
 
 	pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
 
-	bmap = bitmap_zalloc(host_num_pages);
-	host_bmap_track = bitmap_zalloc(host_num_pages);
+	bmap[0] = bitmap_zalloc(host_num_pages);
+	bmap[1] = bitmap_zalloc(host_num_pages);
 
 	/* Add an extra memory slot for testing dirty logging */
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -757,14 +677,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	sync_global_to_guest(vm, guest_test_virt_mem);
 	sync_global_to_guest(vm, guest_num_pages);
 
-	/* Start the iterations */
-	iteration = 1;
-	sync_global_to_guest(vm, iteration);
-	WRITE_ONCE(host_quit, false);
 	host_dirty_count = 0;
 	host_clear_count = 0;
-	host_track_next_count = 0;
-	WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+	WRITE_ONCE(host_quit, false);
 
 	/*
 	 * Ensure the previous iteration didn't leave a dangling semaphore, i.e.
@@ -776,21 +691,95 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	sem_getvalue(&sem_vcpu_cont, &sem_val);
 	TEST_ASSERT_EQ(sem_val, 0);
 
+	TEST_ASSERT_EQ(vcpu_stop, false);
+
 	pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
 
-	while (iteration < p->iterations) {
-		/* Give the vcpu thread some time to dirty some pages */
-		usleep(p->interval * 1000);
-		log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
-					     bmap, host_num_pages,
-					     &ring_buf_idx);
+	for (iteration = 1; iteration <= p->iterations; iteration++) {
+		unsigned long i;
+
+		sync_global_to_guest(vm, iteration);
+
+		WRITE_ONCE(nr_writes, 0);
+		sync_global_to_guest(vm, nr_writes);
+
+		dirty_ring_prev_iteration_last_page = dirty_ring_last_page;
+		WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
+
+		sem_post(&sem_vcpu_cont);
+
+		/*
+		 * Let the vCPU run beyond the configured interval until it has
+		 * performed the minimum number of writes.  This verifies the
+		 * guest is making forward progress, e.g. isn't stuck because
+		 * of a KVM bug, and puts a firm floor on test coverage.
+		 */
+		for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) {
+			/*
+			 * Sleep in 1ms chunks to keep the interval math simple
+			 * and so that the test doesn't run too far beyond the
+			 * specified interval.
+			 */
+			usleep(1000);
+
+			sync_global_from_guest(vm, nr_writes);
+
+			/*
+			 * Reap dirty pages while the guest is running so that
+			 * dirty ring full events are resolved, i.e. so that a
+			 * larger interval doesn't always end up with a vCPU
+			 * that's effectively blocked.  Collecting while the
+			 * guest is running also verifies KVM doesn't lose any
+			 * state.
+			 *
+			 * For bitmap modes, KVM overwrites the entire bitmap,
+			 * i.e. collecting the bitmaps is destructive.  Collect
+			 * the bitmap only on the first pass, otherwise this
+			 * test would lose track of dirty pages.
+			 */
+			if (i && host_log_mode != LOG_MODE_DIRTY_RING)
+				continue;
+
+			/*
+			 * For the dirty ring, empty the ring on subsequent
+			 * passes only if the ring was filled at least once,
+			 * to verify KVM's handling of a full ring (emptying
+			 * the ring on every pass would make it unlikely the
+			 * vCPU would ever fill the fing).
+			 */
+			if (i && !READ_ONCE(dirty_ring_vcpu_ring_full))
+				continue;
+
+			log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+						     bmap[0], host_num_pages,
+						     &ring_buf_idx);
+		}
+
+		/*
+		 * Stop the vCPU prior to collecting and verifying the dirty
+		 * log.  If the vCPU is allowed to run during collection, then
+		 * pages that are written during this iteration may be missed,
+		 * i.e. collected in the next iteration.  And if the vCPU is
+		 * writing memory during verification, pages that this thread
+		 * sees as clean may be written with this iteration's value.
+		 */
+		WRITE_ONCE(vcpu_stop, true);
+		sync_global_to_guest(vm, vcpu_stop);
+		sem_wait(&sem_vcpu_stop);
 
 		/*
-		 * See vcpu_sync_stop_requested definition for details on why
-		 * we need to stop vcpu when verify data.
+		 * Clear vcpu_stop after the vCPU thread has acknowledge the
+		 * stop request and is waiting, i.e. is definitely not running!
 		 */
-		atomic_set(&vcpu_sync_stop_requested, true);
-		sem_wait_until(&sem_vcpu_stop);
+		WRITE_ONCE(vcpu_stop, false);
+		sync_global_to_guest(vm, vcpu_stop);
+
+		/*
+		 * Sync the number of writes performed before verification, the
+		 * info will be printed along with the dirty/clean page counts.
+		 */
+		sync_global_from_guest(vm, nr_writes);
+
 		/*
 		 * NOTE: for dirty ring, it's possible that we didn't stop at
 		 * GUEST_SYNC but instead we stopped because ring is full;
@@ -798,32 +787,22 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 		 * the flush of the last page, and since we handle the last
 		 * page specially verification will succeed anyway.
 		 */
-		assert(host_log_mode == LOG_MODE_DIRTY_RING ||
-		       atomic_read(&vcpu_sync_stop_requested) == false);
+		log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+					     bmap[1], host_num_pages,
+					     &ring_buf_idx);
 		vm_dirty_log_verify(mode, bmap);
-
-		/*
-		 * Set host_quit before sem_vcpu_cont in the final iteration to
-		 * ensure that the vCPU worker doesn't resume the guest.  As
-		 * above, the dirty ring test may stop and wait even when not
-		 * explicitly request to do so, i.e. would hang waiting for a
-		 * "continue" if it's allowed to resume the guest.
-		 */
-		if (++iteration == p->iterations)
-			WRITE_ONCE(host_quit, true);
-
-		sem_post(&sem_vcpu_cont);
-		sync_global_to_guest(vm, iteration);
 	}
 
+	WRITE_ONCE(host_quit, true);
+	sem_post(&sem_vcpu_cont);
+
 	pthread_join(vcpu_thread, NULL);
 
-	pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
-		"track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
-		host_track_next_count);
+	pr_info("Total bits checked: dirty (%lu), clear (%lu)\n",
+		host_dirty_count, host_clear_count);
 
-	free(bmap);
-	free(host_bmap_track);
+	free(bmap[0]);
+	free(bmap[1]);
 	kvm_vm_free(vm);
 }
 
@@ -857,7 +836,6 @@ int main(int argc, char *argv[])
 		.interval = TEST_HOST_LOOP_INTERVAL,
 	};
 	int opt, i;
-	sigset_t sigset;
 
 	sem_init(&sem_vcpu_stop, 0, 0);
 	sem_init(&sem_vcpu_cont, 0, 0);
@@ -908,19 +886,12 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two");
+	TEST_ASSERT(p.iterations > 0, "Iterations must be greater than zero");
 	TEST_ASSERT(p.interval > 0, "Interval must be greater than zero");
 
 	pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
 		p.iterations, p.interval);
 
-	srandom(time(0));
-
-	/* Ensure that vCPU threads start with SIG_IPI blocked.  */
-	sigemptyset(&sigset);
-	sigaddset(&sigset, SIG_IPI);
-	pthread_sigmask(SIG_BLOCK, &sigset, NULL);
-
 	if (host_log_mode_option == LOG_MODE_ALL) {
 		/* Run each log mode */
 		for (i = 0; i < LOG_MODE_NUM; i++) {
diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
index bf461de34785..bf461de34785 100644
--- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
+++ b/tools/testing/selftests/kvm/include/arm64/arch_timer.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/delay.h b/tools/testing/selftests/kvm/include/arm64/delay.h
index 329e4f5079ea..329e4f5079ea 100644
--- a/tools/testing/selftests/kvm/include/aarch64/delay.h
+++ b/tools/testing/selftests/kvm/include/arm64/delay.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/arm64/gic.h
index baeb3c859389..baeb3c859389 100644
--- a/tools/testing/selftests/kvm/include/aarch64/gic.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/arm64/gic_v3.h
index a76615fa39a1..a76615fa39a1 100644
--- a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v3.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
index 3722ed9c8f96..3722ed9c8f96 100644
--- a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h
+++ b/tools/testing/selftests/kvm/include/arm64/gic_v3_its.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
index e43a57d99b56..e43a57d99b56 100644
--- a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index 1e8d0d531fbd..1e8d0d531fbd 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/spinlock.h b/tools/testing/selftests/kvm/include/arm64/spinlock.h
index cf0984106d14..cf0984106d14 100644
--- a/tools/testing/selftests/kvm/include/aarch64/spinlock.h
+++ b/tools/testing/selftests/kvm/include/arm64/spinlock.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/arm64/ucall.h
index 4ec801f37f00..4ec801f37f00 100644
--- a/tools/testing/selftests/kvm/include/aarch64/ucall.h
+++ b/tools/testing/selftests/kvm/include/arm64/ucall.h
diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/arm64/vgic.h
index c481d0c00a5d..c481d0c00a5d 100644
--- a/tools/testing/selftests/kvm/include/aarch64/vgic.h
+++ b/tools/testing/selftests/kvm/include/arm64/vgic.h
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index bc7c242480d6..373912464fb4 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -46,6 +46,12 @@ struct userspace_mem_region {
 	struct hlist_node slot_node;
 };
 
+struct kvm_binary_stats {
+	int fd;
+	struct kvm_stats_header header;
+	struct kvm_stats_desc *desc;
+};
+
 struct kvm_vcpu {
 	struct list_head list;
 	uint32_t id;
@@ -55,6 +61,7 @@ struct kvm_vcpu {
 #ifdef __x86_64__
 	struct kvm_cpuid2 *cpuid;
 #endif
+	struct kvm_binary_stats stats;
 	struct kvm_dirty_gfn *dirty_gfns;
 	uint32_t fetch_index;
 	uint32_t dirty_gfns_count;
@@ -99,10 +106,7 @@ struct kvm_vm {
 
 	struct kvm_vm_arch arch;
 
-	/* Cache of information for binary stats interface */
-	int stats_fd;
-	struct kvm_stats_header stats_header;
-	struct kvm_stats_desc *stats_desc;
+	struct kvm_binary_stats stats;
 
 	/*
 	 * KVM region slots. These are the default memslots used by page
@@ -531,16 +535,19 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header,
 		    struct kvm_stats_desc *desc, uint64_t *data,
 		    size_t max_elements);
 
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
-		   size_t max_elements);
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+		  uint64_t *data, size_t max_elements);
 
-static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
-{
-	uint64_t data;
+#define __get_stat(stats, stat)							\
+({										\
+	uint64_t data;								\
+										\
+	kvm_get_stat(stats, #stat, &data, 1);					\
+	data;									\
+})
 
-	__vm_get_stat(vm, stat_name, &data, 1);
-	return data;
-}
+#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat)
+#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat)
 
 void vm_create_irqchip(struct kvm_vm *vm);
 
@@ -702,16 +709,22 @@ static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t va
 
 	return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
 }
-static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+static inline uint64_t vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id)
 {
-	struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
+	uint64_t val;
+	struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+
+	TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id);
 
 	vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
+	return val;
 }
 static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
 {
 	struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
 
+	TEST_ASSERT(KVM_REG_SIZE(id) <= sizeof(val), "Reg %lx too big", id);
+
 	vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
 }
 
@@ -957,6 +970,8 @@ static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape
 
 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
 
+void kvm_set_files_rlimit(uint32_t nr_vcpus);
+
 void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
 void kvm_print_vcpu_pinning_help(void);
 void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
diff --git a/tools/testing/selftests/kvm/include/s390x/debug_print.h b/tools/testing/selftests/kvm/include/s390/debug_print.h
index 1bf275631cc6..1bf275631cc6 100644
--- a/tools/testing/selftests/kvm/include/s390x/debug_print.h
+++ b/tools/testing/selftests/kvm/include/s390/debug_print.h
diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h
index b0ed71302722..b0ed71302722 100644
--- a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h
+++ b/tools/testing/selftests/kvm/include/s390/diag318_test_handler.h
diff --git a/tools/testing/selftests/kvm/include/s390x/facility.h b/tools/testing/selftests/kvm/include/s390/facility.h
index 00a1ced6538b..00a1ced6538b 100644
--- a/tools/testing/selftests/kvm/include/s390x/facility.h
+++ b/tools/testing/selftests/kvm/include/s390/facility.h
diff --git a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
index e43a57d99b56..e43a57d99b56 100644
--- a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/s390/kvm_util_arch.h
diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390/processor.h
index 33fef6fd9617..33fef6fd9617 100644
--- a/tools/testing/selftests/kvm/include/s390x/processor.h
+++ b/tools/testing/selftests/kvm/include/s390/processor.h
diff --git a/tools/testing/selftests/kvm/include/s390x/sie.h b/tools/testing/selftests/kvm/include/s390/sie.h
index 160acd4a1db9..160acd4a1db9 100644
--- a/tools/testing/selftests/kvm/include/s390x/sie.h
+++ b/tools/testing/selftests/kvm/include/s390/sie.h
diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390/ucall.h
index 8035a872a351..8035a872a351 100644
--- a/tools/testing/selftests/kvm/include/s390x/ucall.h
+++ b/tools/testing/selftests/kvm/include/s390/ucall.h
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 3e473058849f..77d13d7920cb 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -22,7 +22,7 @@
 
 #define msecs_to_usecs(msec)    ((msec) * 1000ULL)
 
-static inline int _no_printf(const char *format, ...) { return 0; }
+static inline __printf(1, 2) int _no_printf(const char *format, ...) { return 0; }
 
 #ifdef DEBUG
 #define pr_debug(...) printf(__VA_ARGS__)
diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86/apic.h
index 51990094effd..80fe9f69b38d 100644
--- a/tools/testing/selftests/kvm/include/x86_64/apic.h
+++ b/tools/testing/selftests/kvm/include/x86/apic.h
@@ -1,7 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * tools/testing/selftests/kvm/include/x86_64/apic.h
- *
  * Copyright (C) 2021, Google LLC.
  */
 
diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86/evmcs.h
index 901caf0e0939..5a74bb30e2f8 100644
--- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86/evmcs.h
@@ -1,9 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * tools/testing/selftests/kvm/include/x86_64/evmcs.h
- *
  * Copyright (C) 2018, Red Hat, Inc.
- *
  */
 
 #ifndef SELFTEST_KVM_EVMCS_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86/hyperv.h
index 6849e2552f1b..f13e532be240 100644
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86/hyperv.h
@@ -1,9 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * tools/testing/selftests/kvm/include/x86_64/hyperv.h
- *
  * Copyright (C) 2021, Red Hat, Inc.
- *
  */
 
 #ifndef SELFTEST_KVM_HYPERV_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
index 972bb1c4ab4c..972bb1c4ab4c 100644
--- a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/x86/kvm_util_arch.h
diff --git a/tools/testing/selftests/kvm/include/x86_64/mce.h b/tools/testing/selftests/kvm/include/x86/mce.h
index 6119321f3f5d..295f2d554754 100644
--- a/tools/testing/selftests/kvm/include/x86_64/mce.h
+++ b/tools/testing/selftests/kvm/include/x86/mce.h
@@ -1,7 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * tools/testing/selftests/kvm/include/x86_64/mce.h
- *
  * Copyright (C) 2022, Google LLC.
  */
 
diff --git a/tools/testing/selftests/kvm/include/x86_64/pmu.h b/tools/testing/selftests/kvm/include/x86/pmu.h
index 3c10c4dc0ae8..3c10c4dc0ae8 100644
--- a/tools/testing/selftests/kvm/include/x86_64/pmu.h
+++ b/tools/testing/selftests/kvm/include/x86/pmu.h
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index 645200e95f89..32ab6ca7ec32 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -1,7 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * tools/testing/selftests/kvm/include/x86_64/processor.h
- *
  * Copyright (C) 2018, Google LLC.
  */
 
@@ -29,6 +27,8 @@ extern uint64_t guest_tsc_khz;
 #define MAX_NR_CPUID_ENTRIES 100
 #endif
 
+#define NONCANONICAL 0xaaaaaaaaaaaaaaaaull
+
 /* Forced emulation prefix, used to invoke the emulator unconditionally. */
 #define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
 
@@ -183,6 +183,9 @@ struct kvm_x86_cpu_feature {
  * Extended Leafs, a.k.a. AMD defined
  */
 #define	X86_FEATURE_SVM			KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
+#define	X86_FEATURE_PERFCTR_CORE	KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23)
+#define	X86_FEATURE_PERFCTR_NB		KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24)
+#define	X86_FEATURE_PERFCTR_LLC		KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28)
 #define	X86_FEATURE_NX			KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
 #define	X86_FEATURE_GBPAGES		KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
 #define	X86_FEATURE_RDTSCP		KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
@@ -197,8 +200,11 @@ struct kvm_x86_cpu_feature {
 #define X86_FEATURE_PAUSEFILTER         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
 #define X86_FEATURE_PFTHRESHOLD         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
 #define	X86_FEATURE_VGIF		KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
+#define X86_FEATURE_IDLE_HLT		KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
 #define X86_FEATURE_SEV			KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
 #define X86_FEATURE_SEV_ES		KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+#define	X86_FEATURE_PERFMON_V2		KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0)
+#define	X86_FEATURE_LBR_PMC_FREEZE	KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2)
 
 /*
  * KVM defined paravirt features.
@@ -285,6 +291,8 @@ struct kvm_x86_cpu_property {
 #define X86_PROPERTY_GUEST_MAX_PHY_ADDR		KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
 #define X86_PROPERTY_SEV_C_BIT			KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
 #define X86_PROPERTY_PHYS_ADDR_REDUCTION	KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
+#define X86_PROPERTY_NR_PERFCTR_CORE		KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3)
+#define X86_PROPERTY_NR_PERFCTR_NB		KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15)
 
 #define X86_PROPERTY_MAX_CENTAUR_LEAF		KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
 
@@ -571,6 +579,11 @@ static inline void set_cr4(uint64_t val)
 	__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
 }
 
+static inline void set_idt(const struct desc_ptr *idt_desc)
+{
+	__asm__ __volatile__("lidt %0"::"m"(*idt_desc));
+}
+
 static inline u64 xgetbv(u32 index)
 {
 	u32 eax, edx;
@@ -1012,10 +1025,19 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
 
 void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
 
+static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
+{
+	vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+}
+
 static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
 							      uint32_t function,
 							      uint32_t index)
 {
+	TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)");
+
+	vcpu_get_cpuid(vcpu);
+
 	return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
 							  function, index);
 }
@@ -1036,7 +1058,7 @@ static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
 		return r;
 
 	/* On success, refresh the cache to pick up adjustments made by KVM. */
-	vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+	vcpu_get_cpuid(vcpu);
 	return 0;
 }
 
@@ -1046,12 +1068,7 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
 	vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
 
 	/* Refresh the cache to pick up adjustments made by KVM. */
-	vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
-}
-
-static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
-{
-	vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+	vcpu_get_cpuid(vcpu);
 }
 
 void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
@@ -1235,7 +1252,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
 	uint64_t ign_error_code;					\
 	uint8_t vector;							\
 									\
-	asm volatile(KVM_ASM_SAFE(insn)					\
+	asm volatile(KVM_ASM_SAFE_FEP(insn)				\
 		     : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)	\
 		     : inputs						\
 		     : KVM_ASM_SAFE_CLOBBERS);				\
@@ -1330,6 +1347,46 @@ static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
 	GUEST_ASSERT(!ret);
 }
 
+/*
+ * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's
+ * intended to be a wake event arrives *after* HLT is executed.  Modern CPUs,
+ * except for a few oddballs that KVM is unlikely to run on, block IRQs for one
+ * instruction after STI, *if* RFLAGS.IF=0 before STI.  Note, Intel CPUs may
+ * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too.
+ */
+static inline void safe_halt(void)
+{
+	asm volatile("sti; hlt");
+}
+
+/*
+ * Enable interrupts and ensure that interrupts are evaluated upon return from
+ * this function, i.e. execute a nop to consume the STi interrupt shadow.
+ */
+static inline void sti_nop(void)
+{
+	asm volatile ("sti; nop");
+}
+
+/*
+ * Enable interrupts for one instruction (nop), to allow the CPU to process all
+ * interrupts that are already pending.
+ */
+static inline void sti_nop_cli(void)
+{
+	asm volatile ("sti; nop; cli");
+}
+
+static inline void sti(void)
+{
+	asm volatile("sti");
+}
+
+static inline void cli(void)
+{
+	asm volatile ("cli");
+}
+
 void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
 
 #define vm_xsave_require_permission(xfeature)	\
diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h
index 82c11c81a956..82c11c81a956 100644
--- a/tools/testing/selftests/kvm/include/x86_64/sev.h
+++ b/tools/testing/selftests/kvm/include/x86/sev.h
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86/svm.h
index 4803e1056055..29cffd0a9181 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm.h
+++ b/tools/testing/selftests/kvm/include/x86/svm.h
@@ -1,10 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*
- * tools/testing/selftests/kvm/include/x86_64/svm.h
- * This is a copy of arch/x86/include/asm/svm.h
- *
- */
-
 #ifndef SELFTEST_KVM_SVM_H
 #define SELFTEST_KVM_SVM_H
 
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86/svm_util.h
index 044f0f872ba9..b74c6dcddcbd 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86/svm_util.h
@@ -1,8 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * tools/testing/selftests/kvm/include/x86_64/svm_utils.h
- * Header for nested SVM testing
- *
  * Copyright (C) 2020, Red Hat, Inc.
  */
 
diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86/ucall.h
index d3825dcc3cd9..d3825dcc3cd9 100644
--- a/tools/testing/selftests/kvm/include/x86_64/ucall.h
+++ b/tools/testing/selftests/kvm/include/x86/ucall.h
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86/vmx.h
index 5f0c0a29c556..edb3c391b982 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86/vmx.h
@@ -1,7 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * tools/testing/selftests/kvm/include/x86_64/vmx.h
- *
  * Copyright (C) 2018, Google LLC.
  */
 
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index c78f34699f73..c5310736ed06 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -10,7 +10,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <sys/resource.h>
 
 #include "test_util.h"
 
@@ -39,36 +38,11 @@ int main(int argc, char *argv[])
 {
 	int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
 	int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
-	/*
-	 * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
-	 * an arbitrary number for everything else.
-	 */
-	int nr_fds_wanted = kvm_max_vcpus + 100;
-	struct rlimit rl;
 
 	pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
 	pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
 
-	/*
-	 * Check that we're allowed to open nr_fds_wanted file descriptors and
-	 * try raising the limits if needed.
-	 */
-	TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
-
-	if (rl.rlim_cur < nr_fds_wanted) {
-		rl.rlim_cur = nr_fds_wanted;
-		if (rl.rlim_max < nr_fds_wanted) {
-			int old_rlim_max = rl.rlim_max;
-			rl.rlim_max = nr_fds_wanted;
-
-			int r = setrlimit(RLIMIT_NOFILE, &rl);
-			__TEST_REQUIRE(r >= 0,
-				       "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
-				       old_rlim_max, nr_fds_wanted);
-		} else {
-			TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
-		}
-	}
+	kvm_set_files_rlimit(kvm_max_vcpus);
 
 	/*
 	 * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/arm64/gic.c
index 7abbf8866512..7abbf8866512 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic.c
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
index d24e9ecc96c6..d24e9ecc96c6 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_private.h
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
index 66d05506f78b..66d05506f78b 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3.c
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
index 09f270545646..09f270545646 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c
+++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c
diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/arm64/handlers.S
index 0e443eadfac6..0e443eadfac6 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/handlers.S
+++ b/tools/testing/selftests/kvm/lib/arm64/handlers.S
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 698e34f39241..7ba3aa3755f3 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -281,8 +281,8 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
 	 */
 	vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
 
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1);
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1);
+	sctlr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1));
+	tcr_el1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1));
 
 	/* Configure base granule size */
 	switch (vm->mode) {
@@ -360,8 +360,8 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
 {
 	uint64_t pstate, pc;
 
-	vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate), &pstate);
-	vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc);
+	pstate = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate));
+	pc = vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc));
 
 	fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
 		indent, "", pstate, pc);
diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/arm64/spinlock.c
index a076e780be5d..a076e780be5d 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c
+++ b/tools/testing/selftests/kvm/lib/arm64/spinlock.c
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/arm64/ucall.c
index ddab0ce89d4d..ddab0ce89d4d 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/arm64/ucall.c
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/arm64/vgic.c
index 4427f43f73ea..4427f43f73ea 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/arm64/vgic.c
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 480e3a40d197..279ad8946040 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -12,6 +12,7 @@
 #include <assert.h>
 #include <sched.h>
 #include <sys/mman.h>
+#include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
@@ -196,6 +197,11 @@ static void vm_open(struct kvm_vm *vm)
 
 	vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type);
 	TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd));
+
+	if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+		vm->stats.fd = vm_get_stats_fd(vm);
+	else
+		vm->stats.fd = -1;
 }
 
 const char *vm_guest_mode_string(uint32_t i)
@@ -406,6 +412,38 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
 	return vm_adjust_num_guest_pages(mode, nr_pages);
 }
 
+void kvm_set_files_rlimit(uint32_t nr_vcpus)
+{
+	/*
+	 * Each vCPU will open two file descriptors: the vCPU itself and the
+	 * vCPU's binary stats file descriptor.  Add an arbitrary amount of
+	 * buffer for all other files a test may open.
+	 */
+	int nr_fds_wanted = nr_vcpus * 2 + 100;
+	struct rlimit rl;
+
+	/*
+	 * Check that we're allowed to open nr_fds_wanted file descriptors and
+	 * try raising the limits if needed.
+	 */
+	TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
+
+	if (rl.rlim_cur < nr_fds_wanted) {
+		rl.rlim_cur = nr_fds_wanted;
+		if (rl.rlim_max < nr_fds_wanted) {
+			int old_rlim_max = rl.rlim_max;
+
+			rl.rlim_max = nr_fds_wanted;
+			__TEST_REQUIRE(setrlimit(RLIMIT_NOFILE, &rl) >= 0,
+				       "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
+				       old_rlim_max, nr_fds_wanted);
+		} else {
+			TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
+		}
+	}
+
+}
+
 struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
 			   uint64_t nr_extra_pages)
 {
@@ -415,6 +453,8 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
 	struct kvm_vm *vm;
 	int i;
 
+	kvm_set_files_rlimit(nr_runnable_vcpus);
+
 	pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__,
 		 vm_guest_mode_string(shape.mode), shape.type, nr_pages);
 
@@ -657,6 +697,23 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
 	return NULL;
 }
 
+static void kvm_stats_release(struct kvm_binary_stats *stats)
+{
+	int ret;
+
+	if (stats->fd < 0)
+		return;
+
+	if (stats->desc) {
+		free(stats->desc);
+		stats->desc = NULL;
+	}
+
+	ret = close(stats->fd);
+	TEST_ASSERT(!ret,  __KVM_SYSCALL_ERROR("close()", ret));
+	stats->fd = -1;
+}
+
 __weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
 {
 
@@ -690,6 +747,8 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
 	ret = close(vcpu->fd);
 	TEST_ASSERT(!ret,  __KVM_SYSCALL_ERROR("close()", ret));
 
+	kvm_stats_release(&vcpu->stats);
+
 	list_del(&vcpu->list);
 
 	vcpu_arch_free(vcpu);
@@ -709,6 +768,9 @@ void kvm_vm_release(struct kvm_vm *vmp)
 
 	ret = close(vmp->kvm_fd);
 	TEST_ASSERT(!ret,  __KVM_SYSCALL_ERROR("close()", ret));
+
+	/* Free cached stats metadata and close FD */
+	kvm_stats_release(&vmp->stats);
 }
 
 static void __vm_mem_region_delete(struct kvm_vm *vm,
@@ -748,12 +810,6 @@ void kvm_vm_free(struct kvm_vm *vmp)
 	if (vmp == NULL)
 		return;
 
-	/* Free cached stats metadata and close FD */
-	if (vmp->stats_fd) {
-		free(vmp->stats_desc);
-		close(vmp->stats_fd);
-	}
-
 	/* Free userspace_mem_regions. */
 	hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
 		__vm_mem_region_delete(vmp, region);
@@ -1286,6 +1342,11 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
 	TEST_ASSERT(vcpu->run != MAP_FAILED,
 		    __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
 
+	if (kvm_has_cap(KVM_CAP_BINARY_STATS_FD))
+		vcpu->stats.fd = vcpu_get_stats_fd(vcpu);
+	else
+		vcpu->stats.fd = -1;
+
 	/* Add to linked-list of VCPUs. */
 	list_add(&vcpu->list, &vm->vcpus);
 
@@ -1648,7 +1709,8 @@ int _vcpu_run(struct kvm_vcpu *vcpu)
 		rc = __vcpu_run(vcpu);
 	} while (rc == -1 && errno == EINTR);
 
-	assert_on_unhandled_exception(vcpu);
+	if (!rc)
+		assert_on_unhandled_exception(vcpu);
 
 	return rc;
 }
@@ -2197,46 +2259,31 @@ void read_stat_data(int stats_fd, struct kvm_stats_header *header,
 		    desc->name, size, ret);
 }
 
-/*
- * Read the data of the named stat
- *
- * Input Args:
- *   vm - the VM for which the stat should be read
- *   stat_name - the name of the stat to read
- *   max_elements - the maximum number of 8-byte values to read into data
- *
- * Output Args:
- *   data - the buffer into which stat data should be read
- *
- * Read the data values of a specified stat from the binary stats interface.
- */
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
-		   size_t max_elements)
+void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
+		  uint64_t *data, size_t max_elements)
 {
 	struct kvm_stats_desc *desc;
 	size_t size_desc;
 	int i;
 
-	if (!vm->stats_fd) {
-		vm->stats_fd = vm_get_stats_fd(vm);
-		read_stats_header(vm->stats_fd, &vm->stats_header);
-		vm->stats_desc = read_stats_descriptors(vm->stats_fd,
-							&vm->stats_header);
+	if (!stats->desc) {
+		read_stats_header(stats->fd, &stats->header);
+		stats->desc = read_stats_descriptors(stats->fd, &stats->header);
 	}
 
-	size_desc = get_stats_descriptor_size(&vm->stats_header);
+	size_desc = get_stats_descriptor_size(&stats->header);
 
-	for (i = 0; i < vm->stats_header.num_desc; ++i) {
-		desc = (void *)vm->stats_desc + (i * size_desc);
+	for (i = 0; i < stats->header.num_desc; ++i) {
+		desc = (void *)stats->desc + (i * size_desc);
 
-		if (strcmp(desc->name, stat_name))
+		if (strcmp(desc->name, name))
 			continue;
 
-		read_stat_data(vm->stats_fd, &vm->stats_header, desc,
-			       data, max_elements);
-
-		break;
+		read_stat_data(stats->fd, &stats->header, desc, data, max_elements);
+		return;
 	}
+
+	TEST_FAIL("Unable to find stat '%s'", name);
 }
 
 __weak void kvm_arch_vm_post_create(struct kvm_vm *vm)
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index 6ae47b3d6b25..dd663bcf0cc0 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -221,39 +221,39 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
 {
 	struct kvm_riscv_core core;
 
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5);
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6);
+	core.mode = vcpu_get_reg(vcpu, RISCV_CORE_REG(mode));
+	core.regs.pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc));
+	core.regs.ra = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra));
+	core.regs.sp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp));
+	core.regs.gp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp));
+	core.regs.tp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp));
+	core.regs.t0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0));
+	core.regs.t1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1));
+	core.regs.t2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2));
+	core.regs.s0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0));
+	core.regs.s1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1));
+	core.regs.a0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0));
+	core.regs.a1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1));
+	core.regs.a2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2));
+	core.regs.a3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3));
+	core.regs.a4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4));
+	core.regs.a5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5));
+	core.regs.a6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6));
+	core.regs.a7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7));
+	core.regs.s2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2));
+	core.regs.s3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3));
+	core.regs.s4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4));
+	core.regs.s5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5));
+	core.regs.s6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6));
+	core.regs.s7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7));
+	core.regs.s8 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8));
+	core.regs.s9 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9));
+	core.regs.s10 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10));
+	core.regs.s11 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11));
+	core.regs.t3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3));
+	core.regs.t4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4));
+	core.regs.t5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5));
+	core.regs.t6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6));
 
 	fprintf(stream,
 		" MODE:  0x%lx\n", core.mode);
diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c
index 2c432fa164f1..2c432fa164f1 100644
--- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
+++ b/tools/testing/selftests/kvm/lib/s390/diag318_test_handler.c
diff --git a/tools/testing/selftests/kvm/lib/s390x/facility.c b/tools/testing/selftests/kvm/lib/s390/facility.c
index d540812d911a..d540812d911a 100644
--- a/tools/testing/selftests/kvm/lib/s390x/facility.c
+++ b/tools/testing/selftests/kvm/lib/s390/facility.c
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390/processor.c
index 20cfe970e3e3..20cfe970e3e3 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390/processor.c
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390/ucall.c
index cca98734653d..cca98734653d 100644
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ b/tools/testing/selftests/kvm/lib/s390/ucall.c
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
index 7c9de8414462..5bde176cedd5 100644
--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -114,7 +114,7 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
 
 	PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
 		       is_minor ? "MINOR" : "MISSING",
-		       is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
+		       is_minor ? "UFFDIO_CONTINUE" : "UFFDIO_COPY");
 
 	uffd_desc = malloc(sizeof(struct uffd_desc));
 	TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor");
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86/apic.c
index 89153a333e83..89153a333e83 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/apic.c
+++ b/tools/testing/selftests/kvm/lib/x86/apic.c
diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86/handlers.S
index 7629819734af..7629819734af 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S
+++ b/tools/testing/selftests/kvm/lib/x86/handlers.S
diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86/hyperv.c
index 15bc8cd583aa..15bc8cd583aa 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
+++ b/tools/testing/selftests/kvm/lib/x86/hyperv.c
diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86/memstress.c
index d61e623afc8c..7f5d62a65c68 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/memstress.c
+++ b/tools/testing/selftests/kvm/lib/x86/memstress.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * x86_64-specific extensions to memstress.c.
+ * x86-specific extensions to memstress.c.
  *
  * Copyright (C) 2022, Google, Inc.
  */
diff --git a/tools/testing/selftests/kvm/lib/x86_64/pmu.c b/tools/testing/selftests/kvm/lib/x86/pmu.c
index f31f0427c17c..f31f0427c17c 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/pmu.c
+++ b/tools/testing/selftests/kvm/lib/x86/pmu.c
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index 636b29ba8985..bd5a802fa7a5 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * tools/testing/selftests/kvm/lib/x86_64/processor.c
- *
  * Copyright (C) 2018, Google LLC.
  */
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c
index e9535ee20b7f..e9535ee20b7f 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/sev.c
+++ b/tools/testing/selftests/kvm/lib/x86/sev.c
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86/svm.c
index 5495a92dfd5a..d239c2097391 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86/svm.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * tools/testing/selftests/kvm/lib/x86_64/svm.c
  * Helpers used for nested SVM testing
  * Largely inspired from KVM unit test svm.c
  *
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86/ucall.c
index 1265cecc7dd1..1265cecc7dd1 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86/ucall.c
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86/vmx.c
index d7ac122820bf..d4d1208dd023 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86/vmx.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * tools/testing/selftests/kvm/lib/x86_64/vmx.c
- *
  * Copyright (C) 2018, Google LLC.
  */
 
diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c
index 0b9678858b6d..6a437d2be9fa 100644
--- a/tools/testing/selftests/kvm/max_guest_memory_test.c
+++ b/tools/testing/selftests/kvm/mmu_stress_test.c
@@ -15,16 +15,63 @@
 #include "test_util.h"
 #include "guest_modes.h"
 #include "processor.h"
+#include "ucall_common.h"
+
+static bool mprotect_ro_done;
+static bool all_vcpus_hit_ro_fault;
 
 static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
 {
 	uint64_t gpa;
+	int i;
 
-	for (;;) {
+	for (i = 0; i < 2; i++) {
 		for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
-			*((volatile uint64_t *)gpa) = gpa;
-		GUEST_SYNC(0);
+			vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
+		GUEST_SYNC(i);
 	}
+
+	for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+		*((volatile uint64_t *)gpa);
+	GUEST_SYNC(2);
+
+	/*
+	 * Write to the region while mprotect(PROT_READ) is underway.  Keep
+	 * looping until the memory is guaranteed to be read-only and a fault
+	 * has occurred, otherwise vCPUs may complete their writes and advance
+	 * to the next stage prematurely.
+	 *
+	 * For architectures that support skipping the faulting instruction,
+	 * generate the store via inline assembly to ensure the exact length
+	 * of the instruction is known and stable (vcpu_arch_put_guest() on
+	 * fixed-length architectures should work, but the cost of paranoia
+	 * is low in this case).  For x86, hand-code the exact opcode so that
+	 * there is no room for variability in the generated instruction.
+	 */
+	do {
+		for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+#ifdef __x86_64__
+			asm volatile(".byte 0x48,0x89,0x00" :: "a"(gpa) : "memory"); /* mov %rax, (%rax) */
+#elif defined(__aarch64__)
+			asm volatile("str %0, [%0]" :: "r" (gpa) : "memory");
+#else
+			vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
+#endif
+	} while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault));
+
+	/*
+	 * Only architectures that write the entire range can explicitly sync,
+	 * as other architectures will be stuck on the write fault.
+	 */
+#if defined(__x86_64__) || defined(__aarch64__)
+	GUEST_SYNC(3);
+#endif
+
+	for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+		vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
+	GUEST_SYNC(4);
+
+	GUEST_ASSERT(0);
 }
 
 struct vcpu_info {
@@ -35,6 +82,7 @@ struct vcpu_info {
 
 static int nr_vcpus;
 static atomic_t rendezvous;
+static atomic_t nr_ro_faults;
 
 static void rendezvous_with_boss(void)
 {
@@ -51,34 +99,104 @@ static void rendezvous_with_boss(void)
 	}
 }
 
-static void run_vcpu(struct kvm_vcpu *vcpu)
+static void assert_sync_stage(struct kvm_vcpu *vcpu, int stage)
+{
+	struct ucall uc;
+
+	TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+	TEST_ASSERT_EQ(uc.args[1], stage);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
 {
 	vcpu_run(vcpu);
-	TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+	assert_sync_stage(vcpu, stage);
 }
 
 static void *vcpu_worker(void *data)
 {
+	struct kvm_sregs __maybe_unused sregs;
 	struct vcpu_info *info = data;
 	struct kvm_vcpu *vcpu = info->vcpu;
 	struct kvm_vm *vm = vcpu->vm;
-	struct kvm_sregs sregs;
+	int r;
 
 	vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size);
 
 	rendezvous_with_boss();
 
-	run_vcpu(vcpu);
+	/* Stage 0, write all of guest memory. */
+	run_vcpu(vcpu, 0);
 	rendezvous_with_boss();
-	vcpu_sregs_get(vcpu, &sregs);
 #ifdef __x86_64__
+	vcpu_sregs_get(vcpu, &sregs);
 	/* Toggle CR0.WP to trigger a MMU context reset. */
 	sregs.cr0 ^= X86_CR0_WP;
-#endif
 	vcpu_sregs_set(vcpu, &sregs);
+#endif
 	rendezvous_with_boss();
 
-	run_vcpu(vcpu);
+	/* Stage 1, re-write all of guest memory. */
+	run_vcpu(vcpu, 1);
+	rendezvous_with_boss();
+
+	/* Stage 2, read all of guest memory, which is now read-only. */
+	run_vcpu(vcpu, 2);
+
+	/*
+	 * Stage 3, write guest memory and verify KVM returns -EFAULT for once
+	 * the mprotect(PROT_READ) lands.  Only architectures that support
+	 * validating *all* of guest memory sync for this stage, as vCPUs will
+	 * be stuck on the faulting instruction for other architectures.  Go to
+	 * stage 3 without a rendezvous
+	 */
+	r = _vcpu_run(vcpu);
+	TEST_ASSERT(r == -1 && errno == EFAULT,
+		    "Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno);
+
+	atomic_inc(&nr_ro_faults);
+	if (atomic_read(&nr_ro_faults) == nr_vcpus) {
+		WRITE_ONCE(all_vcpus_hit_ro_fault, true);
+		sync_global_to_guest(vm, all_vcpus_hit_ro_fault);
+	}
+
+#if defined(__x86_64__) || defined(__aarch64__)
+	/*
+	 * Verify *all* writes from the guest hit EFAULT due to the VMA now
+	 * being read-only.  x86 and arm64 only at this time as skipping the
+	 * instruction that hits the EFAULT requires advancing the program
+	 * counter, which is arch specific and relies on inline assembly.
+	 */
+#ifdef __x86_64__
+	vcpu->run->kvm_valid_regs = KVM_SYNC_X86_REGS;
+#endif
+	for (;;) {
+		r = _vcpu_run(vcpu);
+		if (!r)
+			break;
+		TEST_ASSERT_EQ(errno, EFAULT);
+#if defined(__x86_64__)
+		WRITE_ONCE(vcpu->run->kvm_dirty_regs, KVM_SYNC_X86_REGS);
+		vcpu->run->s.regs.regs.rip += 3;
+#elif defined(__aarch64__)
+		vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc),
+			     vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc)) + 4);
+#endif
+
+	}
+	assert_sync_stage(vcpu, 3);
+#endif /* __x86_64__ || __aarch64__ */
+	rendezvous_with_boss();
+
+	/*
+	 * Stage 4.  Run to completion, waiting for mprotect(PROT_WRITE) to
+	 * make the memory writable again.
+	 */
+	do {
+		r = _vcpu_run(vcpu);
+	} while (r && errno == EFAULT);
+	TEST_ASSERT_EQ(r, 0);
+	assert_sync_stage(vcpu, 4);
 	rendezvous_with_boss();
 
 	return NULL;
@@ -161,7 +279,7 @@ int main(int argc, char *argv[])
 	const uint64_t start_gpa = SZ_4G;
 	const int first_slot = 1;
 
-	struct timespec time_start, time_run1, time_reset, time_run2;
+	struct timespec time_start, time_run1, time_reset, time_run2, time_ro, time_rw;
 	uint64_t max_gpa, gpa, slot_size, max_mem, i;
 	int max_slots, slot, opt, fd;
 	bool hugepages = false;
@@ -209,7 +327,13 @@ int main(int argc, char *argv[])
 	vcpus = malloc(nr_vcpus * sizeof(*vcpus));
 	TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
 
-	vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+	vm = __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus,
+#ifdef __x86_64__
+				    max_mem / SZ_1G,
+#else
+				    max_mem / vm_guest_mode_params[VM_MODE_DEFAULT].page_size,
+#endif
+				    guest_code, vcpus);
 
 	max_gpa = vm->max_gfn << vm->page_shift;
 	TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb ");
@@ -259,14 +383,27 @@ int main(int argc, char *argv[])
 	rendezvous_with_vcpus(&time_reset, "reset");
 	rendezvous_with_vcpus(&time_run2, "run 2");
 
+	mprotect(mem, slot_size, PROT_READ);
+	mprotect_ro_done = true;
+	sync_global_to_guest(vm, mprotect_ro_done);
+
+	rendezvous_with_vcpus(&time_ro, "mprotect RO");
+	mprotect(mem, slot_size, PROT_READ | PROT_WRITE);
+	rendezvous_with_vcpus(&time_rw, "mprotect RW");
+
+	time_rw    = timespec_sub(time_rw,     time_ro);
+	time_ro    = timespec_sub(time_ro,     time_run2);
 	time_run2  = timespec_sub(time_run2,   time_reset);
-	time_reset = timespec_sub(time_reset, time_run1);
+	time_reset = timespec_sub(time_reset,  time_run1);
 	time_run1  = timespec_sub(time_run1,   time_start);
 
-	pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 =  %ld.%.9lds\n",
+	pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds, "
+		"ro = %ld.%.9lds, rw = %ld.%.9lds\n",
 		time_run1.tv_sec, time_run1.tv_nsec,
 		time_reset.tv_sec, time_reset.tv_nsec,
-		time_run2.tv_sec, time_run2.tv_nsec);
+		time_run2.tv_sec, time_run2.tv_nsec,
+		time_ro.tv_sec, time_ro.tv_nsec,
+		time_rw.tv_sec, time_rw.tv_nsec);
 
 	/*
 	 * Delete even numbered slots (arbitrary) and unmap the first half of
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
index 2c792228ac0b..9e370800a6a2 100644
--- a/tools/testing/selftests/kvm/riscv/arch_timer.c
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -93,7 +93,7 @@ struct kvm_vm *test_vm_create(void)
 		vcpu_init_vector_tables(vcpus[i]);
 
 	/* Initialize guest timer frequency. */
-	vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency), &timer_freq);
+	timer_freq = vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency));
 	sync_global_to_guest(vm, timer_freq);
 	pr_debug("timer_freq: %lu\n", timer_freq);
 
diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c
index 0e0712854953..cfed6c727bfc 100644
--- a/tools/testing/selftests/kvm/riscv/ebreak_test.c
+++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c
@@ -60,7 +60,7 @@ int main(void)
 
 	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
 
-	vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc);
+	pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc));
 	TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1));
 
 	/* skip sw_bp_1 */
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 4bc1051848e5..8515921dfdbf 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -52,6 +52,8 @@ bool filter_reg(__u64 reg)
 	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL:
 	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
 	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT:
+	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC:
+	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA:
 	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS:
 	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS:
 	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
@@ -71,6 +73,7 @@ bool filter_reg(__u64 reg)
 	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN:
 	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM:
 	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ:
+	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICCRSE:
 	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR:
 	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICOND:
 	case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICSR:
@@ -112,6 +115,7 @@ bool filter_reg(__u64 reg)
 	case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM:
 	case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU:
 	case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN:
+	case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SUSP:
 	case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA:
 	case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL:
 	case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR:
@@ -429,6 +433,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
 		KVM_ISA_EXT_ARR(SVINVAL),
 		KVM_ISA_EXT_ARR(SVNAPOT),
 		KVM_ISA_EXT_ARR(SVPBMT),
+		KVM_ISA_EXT_ARR(SVVPTC),
+		KVM_ISA_EXT_ARR(ZABHA),
 		KVM_ISA_EXT_ARR(ZACAS),
 		KVM_ISA_EXT_ARR(ZAWRS),
 		KVM_ISA_EXT_ARR(ZBA),
@@ -448,6 +454,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
 		KVM_ISA_EXT_ARR(ZFHMIN),
 		KVM_ISA_EXT_ARR(ZICBOM),
 		KVM_ISA_EXT_ARR(ZICBOZ),
+		KVM_ISA_EXT_ARR(ZICCRSE),
 		KVM_ISA_EXT_ARR(ZICNTR),
 		KVM_ISA_EXT_ARR(ZICOND),
 		KVM_ISA_EXT_ARR(ZICSR),
@@ -535,10 +542,11 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off)
 		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SRST),
 		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_HSM),
 		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_PMU),
+		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN),
+		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SUSP),
 		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA),
 		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL),
 		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR),
-		KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN),
 	};
 
 	if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name))
@@ -949,6 +957,7 @@ KVM_SBI_EXT_SUBLIST_CONFIG(base, BASE);
 KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA);
 KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU);
 KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN);
+KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP);
 
 KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA);
 KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
@@ -964,6 +973,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svadu, SVADU);
 KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
 KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
 KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT);
+KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA);
 KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS);
 KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS);
 KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
@@ -983,6 +994,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
 KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN);
 KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM);
 KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ);
+KVM_ISA_EXT_SIMPLE_CONFIG(ziccrse, ZICCRSE);
 KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR);
 KVM_ISA_EXT_SIMPLE_CONFIG(zicond, ZICOND);
 KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR);
@@ -1017,6 +1029,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
 	&config_sbi_sta,
 	&config_sbi_pmu,
 	&config_sbi_dbcn,
+	&config_sbi_susp,
 	&config_aia,
 	&config_fp_f,
 	&config_fp_d,
@@ -1031,6 +1044,8 @@ struct vcpu_reg_list *vcpu_configs[] = {
 	&config_svinval,
 	&config_svnapot,
 	&config_svpbmt,
+	&config_svvptc,
+	&config_zabha,
 	&config_zacas,
 	&config_zawrs,
 	&config_zba,
@@ -1050,6 +1065,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
 	&config_zfhmin,
 	&config_zicbom,
 	&config_zicboz,
+	&config_ziccrse,
 	&config_zicntr,
 	&config_zicond,
 	&config_zicsr,
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
index f299cbfd23ca..03406de4989d 100644
--- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -39,7 +39,13 @@ static bool illegal_handler_invoked;
 #define SBI_PMU_TEST_SNAPSHOT	BIT(2)
 #define SBI_PMU_TEST_OVERFLOW	BIT(3)
 
-static int disabled_tests;
+#define SBI_PMU_OVERFLOW_IRQNUM_DEFAULT 5
+struct test_args {
+	int disabled_tests;
+	int overflow_irqnum;
+};
+
+static struct test_args targs;
 
 unsigned long pmu_csr_read_num(int csr_num)
 {
@@ -118,8 +124,8 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags)
 
 	ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags,
 			0, 0, 0);
-	__GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n",
-			       counter, ret.error);
+	__GUEST_ASSERT(ret.error == 0 || ret.error == SBI_ERR_ALREADY_STOPPED,
+		       "Unable to stop counter %ld error %ld\n", counter, ret.error);
 }
 
 static void guest_illegal_exception_handler(struct ex_regs *regs)
@@ -137,7 +143,6 @@ static void guest_irq_handler(struct ex_regs *regs)
 	unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
 	struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
 	unsigned long overflown_mask;
-	unsigned long counter_val = 0;
 
 	/* Validate that we are in the correct irq handler */
 	GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF);
@@ -151,10 +156,6 @@ static void guest_irq_handler(struct ex_regs *regs)
 	GUEST_ASSERT(overflown_mask & 0x01);
 
 	WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1);
-
-	counter_val = READ_ONCE(snapshot_data->ctr_values[0]);
-	/* Now start the counter to mimick the real driver behavior */
-	start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val);
 }
 
 static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask,
@@ -479,7 +480,7 @@ static void test_pmu_events_snaphost(void)
 
 static void test_pmu_events_overflow(void)
 {
-	int num_counters = 0;
+	int num_counters = 0, i = 0;
 
 	/* Verify presence of SBI PMU and minimum requrired SBI version */
 	verify_sbi_requirement_assert();
@@ -496,11 +497,15 @@ static void test_pmu_events_overflow(void)
 	 * Qemu supports overflow for cycle/instruction.
 	 * This test may fail on any platform that do not support overflow for these two events.
 	 */
-	test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
-	GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1);
+	for (i = 0; i < targs.overflow_irqnum; i++)
+		test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
+	GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
+
+	vcpu_shared_irq_count = 0;
 
-	test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
-	GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2);
+	for (i = 0; i < targs.overflow_irqnum; i++)
+		test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
+	GUEST_ASSERT_EQ(vcpu_shared_irq_count, targs.overflow_irqnum);
 
 	GUEST_DONE();
 }
@@ -608,8 +613,12 @@ static void test_vm_events_overflow(void *guest_code)
 
 	vcpu_init_vector_tables(vcpu);
 	/* Initialize guest timer frequency. */
-	vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency), &timer_freq);
+	timer_freq = vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency));
+
+	/* Export the shared variables to the guest */
 	sync_global_to_guest(vm, timer_freq);
+	sync_global_to_guest(vm, vcpu_shared_irq_count);
+	sync_global_to_guest(vm, targs);
 
 	run_vcpu(vcpu);
 
@@ -618,28 +627,38 @@ static void test_vm_events_overflow(void *guest_code)
 
 static void test_print_help(char *name)
 {
-	pr_info("Usage: %s [-h] [-d <test name>]\n", name);
-	pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+	pr_info("Usage: %s [-h] [-t <test name>] [-n <number of LCOFI interrupt for overflow test>]\n",
+		name);
+	pr_info("\t-t: Test to run (default all). Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+	pr_info("\t-n: Number of LCOFI interrupt to trigger for each event in overflow test (default: %d)\n",
+		SBI_PMU_OVERFLOW_IRQNUM_DEFAULT);
 	pr_info("\t-h: print this help screen\n");
 }
 
 static bool parse_args(int argc, char *argv[])
 {
 	int opt;
+	int temp_disabled_tests = SBI_PMU_TEST_BASIC | SBI_PMU_TEST_EVENTS | SBI_PMU_TEST_SNAPSHOT |
+				  SBI_PMU_TEST_OVERFLOW;
+	int overflow_interrupts = 0;
 
-	while ((opt = getopt(argc, argv, "hd:")) != -1) {
+	while ((opt = getopt(argc, argv, "ht:n:")) != -1) {
 		switch (opt) {
-		case 'd':
+		case 't':
 			if (!strncmp("basic", optarg, 5))
-				disabled_tests |= SBI_PMU_TEST_BASIC;
+				temp_disabled_tests &= ~SBI_PMU_TEST_BASIC;
 			else if (!strncmp("events", optarg, 6))
-				disabled_tests |= SBI_PMU_TEST_EVENTS;
+				temp_disabled_tests &= ~SBI_PMU_TEST_EVENTS;
 			else if (!strncmp("snapshot", optarg, 8))
-				disabled_tests |= SBI_PMU_TEST_SNAPSHOT;
+				temp_disabled_tests &= ~SBI_PMU_TEST_SNAPSHOT;
 			else if (!strncmp("overflow", optarg, 8))
-				disabled_tests |= SBI_PMU_TEST_OVERFLOW;
+				temp_disabled_tests &= ~SBI_PMU_TEST_OVERFLOW;
 			else
 				goto done;
+			targs.disabled_tests = temp_disabled_tests;
+			break;
+		case 'n':
+			overflow_interrupts = atoi_positive("Number of LCOFI", optarg);
 			break;
 		case 'h':
 		default:
@@ -647,6 +666,15 @@ static bool parse_args(int argc, char *argv[])
 		}
 	}
 
+	if (overflow_interrupts > 0) {
+		if (targs.disabled_tests & SBI_PMU_TEST_OVERFLOW) {
+			pr_info("-n option is only available for overflow test\n");
+			goto done;
+		} else {
+			targs.overflow_irqnum = overflow_interrupts;
+		}
+	}
+
 	return true;
 done:
 	test_print_help(argv[0]);
@@ -655,25 +683,28 @@ done:
 
 int main(int argc, char *argv[])
 {
+	targs.disabled_tests = 0;
+	targs.overflow_irqnum = SBI_PMU_OVERFLOW_IRQNUM_DEFAULT;
+
 	if (!parse_args(argc, argv))
 		exit(KSFT_SKIP);
 
-	if (!(disabled_tests & SBI_PMU_TEST_BASIC)) {
+	if (!(targs.disabled_tests & SBI_PMU_TEST_BASIC)) {
 		test_vm_basic_test(test_pmu_basic_sanity);
 		pr_info("SBI PMU basic test : PASS\n");
 	}
 
-	if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) {
+	if (!(targs.disabled_tests & SBI_PMU_TEST_EVENTS)) {
 		test_vm_events_test(test_pmu_events);
 		pr_info("SBI PMU event verification test : PASS\n");
 	}
 
-	if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
+	if (!(targs.disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
 		test_vm_events_snapshot_test(test_pmu_events_snaphost);
 		pr_info("SBI PMU event verification with snapshot test : PASS\n");
 	}
 
-	if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
+	if (!(targs.disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
 		test_vm_events_overflow(test_pmu_events_overflow);
 		pr_info("SBI PMU event verification with overflow test : PASS\n");
 	}
diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c
index e32dd59703a0..85cc8c18d6e7 100644
--- a/tools/testing/selftests/kvm/s390x/cmma_test.c
+++ b/tools/testing/selftests/kvm/s390/cmma_test.c
@@ -444,7 +444,7 @@ static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
 			 );
 }
 
-static void test_get_inital_dirty(void)
+static void test_get_initial_dirty(void)
 {
 	struct kvm_vm *vm = create_vm_two_memslots();
 	struct kvm_vcpu *vcpu;
@@ -651,7 +651,7 @@ struct testdef {
 } testlist[] = {
 	{ "migration mode and dirty tracking", test_migration_mode },
 	{ "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
-	{ "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
+	{ "GET_CMMA_BITS: all pages are dirty initially", test_get_initial_dirty },
 	{ "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
 };
 
diff --git a/tools/testing/selftests/kvm/s390x/config b/tools/testing/selftests/kvm/s390/config
index 23270f2d679f..23270f2d679f 100644
--- a/tools/testing/selftests/kvm/s390x/config
+++ b/tools/testing/selftests/kvm/s390/config
diff --git a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
index 27255880dabd..27255880dabd 100644
--- a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c
+++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c
diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390/debug_test.c
index ad8095968601..ad8095968601 100644
--- a/tools/testing/selftests/kvm/s390x/debug_test.c
+++ b/tools/testing/selftests/kvm/s390/debug_test.c
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390/memop.c
index 4374b4cd2a80..4374b4cd2a80 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390/memop.c
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390/resets.c
index 357943f2bea8..b58f75b381e5 100644
--- a/tools/testing/selftests/kvm/s390x/resets.c
+++ b/tools/testing/selftests/kvm/s390/resets.c
@@ -61,7 +61,7 @@ static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value)
 {
 	uint64_t eval_reg;
 
-	vcpu_get_reg(vcpu, id, &eval_reg);
+	eval_reg = vcpu_get_reg(vcpu, id);
 	TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
 }
 
diff --git a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c
index bba0d9a6dcc8..bba0d9a6dcc8 100644
--- a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c
+++ b/tools/testing/selftests/kvm/s390/shared_zeropage_test.c
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390/sync_regs_test.c
index 53def355ccba..53def355ccba 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390/sync_regs_test.c
diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390/tprot.c
index 12d5e1cb62e3..12d5e1cb62e3 100644
--- a/tools/testing/selftests/kvm/s390x/tprot.c
+++ b/tools/testing/selftests/kvm/s390/tprot.c
diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c
index 135ee22856cf..d265b34c54be 100644
--- a/tools/testing/selftests/kvm/s390x/ucontrol_test.c
+++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c
@@ -88,10 +88,6 @@ asm("test_skey_asm:\n"
 	"	ahi	%r0,1\n"
 	"	st	%r1,0(%r5,%r6)\n"
 
-	"	iske	%r1,%r6\n"
-	"	ahi	%r0,1\n"
-	"	diag	0,0,0x44\n"
-
 	"	sske	%r1,%r6\n"
 	"	xgr	%r1,%r1\n"
 	"	iske	%r1,%r6\n"
@@ -459,10 +455,14 @@ TEST_F(uc_kvm, uc_no_user_region)
 	};
 
 	ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, &region));
-	ASSERT_EQ(EINVAL, errno);
+	ASSERT_TRUE(errno == EEXIST || errno == EINVAL)
+		TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION",
+		       strerror(errno), errno);
 
 	ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, &region2));
-	ASSERT_EQ(EINVAL, errno);
+	ASSERT_TRUE(errno == EEXIST || errno == EINVAL)
+		TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION2",
+		       strerror(errno), errno);
 }
 
 TEST_F(uc_kvm, uc_map_unmap)
@@ -596,7 +596,9 @@ TEST_F(uc_kvm, uc_skey)
 	ASSERT_EQ(true, uc_handle_exit(self));
 	ASSERT_EQ(1, sync_regs->gprs[0]);
 
-	/* ISKE */
+	/* SSKE + ISKE */
+	sync_regs->gprs[1] = skeyvalue;
+	run->kvm_dirty_regs |= KVM_SYNC_GPRS;
 	ASSERT_EQ(0, uc_run_once(self));
 
 	/*
@@ -608,21 +610,11 @@ TEST_F(uc_kvm, uc_skey)
 	TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE));
 	TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason);
 	TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode);
-	TEST_REQUIRE(sie_block->ipa != 0xb229);
+	TEST_REQUIRE(sie_block->ipa != 0xb22b);
 
-	/* ISKE contd. */
+	/* SSKE + ISKE contd. */
 	ASSERT_EQ(false, uc_handle_exit(self));
 	ASSERT_EQ(2, sync_regs->gprs[0]);
-	/* assert initial skey (ACC = 0, R & C = 1) */
-	ASSERT_EQ(0x06, sync_regs->gprs[1]);
-	uc_assert_diag44(self);
-
-	/* SSKE + ISKE */
-	sync_regs->gprs[1] = skeyvalue;
-	run->kvm_dirty_regs |= KVM_SYNC_GPRS;
-	ASSERT_EQ(0, uc_run_once(self));
-	ASSERT_EQ(false, uc_handle_exit(self));
-	ASSERT_EQ(3, sync_regs->gprs[0]);
 	ASSERT_EQ(skeyvalue, sync_regs->gprs[1]);
 	uc_assert_diag44(self);
 
@@ -631,7 +623,7 @@ TEST_F(uc_kvm, uc_skey)
 	run->kvm_dirty_regs |= KVM_SYNC_GPRS;
 	ASSERT_EQ(0, uc_run_once(self));
 	ASSERT_EQ(false, uc_handle_exit(self));
-	ASSERT_EQ(4, sync_regs->gprs[0]);
+	ASSERT_EQ(3, sync_regs->gprs[0]);
 	/* assert R reset but rest of skey unchanged */
 	ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]);
 	ASSERT_EQ(0, sync_regs->gprs[1] & 0x04);
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index a8267628e9ed..bc440d5aba57 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -17,9 +17,9 @@
 #include <processor.h>
 
 /*
- * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
- * 2MB sized and aligned region so that the initial region corresponds to
- * exactly one large page.
+ * s390 needs at least 1MB alignment, and the x86 MOVE/DELETE tests need a 2MB
+ * sized and aligned region so that the initial region corresponds to exactly
+ * one large page.
  */
 #define MEM_REGION_SIZE		0x200000
 
@@ -235,7 +235,7 @@ static void guest_code_delete_memory_region(void)
 	 * in the guest will never succeed, and so isn't an option.
 	 */
 	memset(&idt, 0, sizeof(idt));
-	__asm__ __volatile__("lidt %0" :: "m"(idt));
+	set_idt(&idt);
 
 	GUEST_SYNC(0);
 
@@ -553,6 +553,56 @@ static void test_add_overlapping_private_memory_regions(void)
 	close(memfd);
 	kvm_vm_free(vm);
 }
+
+static void guest_code_mmio_during_vectoring(void)
+{
+	const struct desc_ptr idt_desc = {
+		.address = MEM_REGION_GPA,
+		.size = 0xFFF,
+	};
+
+	set_idt(&idt_desc);
+
+	/* Generate a #GP by dereferencing a non-canonical address */
+	*((uint8_t *)NONCANONICAL) = 0x1;
+
+	GUEST_ASSERT(0);
+}
+
+/*
+ * This test points the IDT descriptor base to an MMIO address. It should cause
+ * a KVM internal error when an event occurs in the guest.
+ */
+static void test_mmio_during_vectoring(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+	u64 expected_gpa;
+
+	pr_info("Testing MMIO during vectoring error handling\n");
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code_mmio_during_vectoring);
+	virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 1);
+
+	run = vcpu->run;
+
+	vcpu_run(vcpu);
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+	TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_DELIVERY_EV,
+		    "Unexpected suberror = %d", vcpu->run->internal.suberror);
+	TEST_ASSERT(run->internal.ndata != 4, "Unexpected internal error data array size = %d",
+		    run->internal.ndata);
+
+	/* The reported GPA should be IDT base + offset of the GP vector */
+	expected_gpa = MEM_REGION_GPA + GP_VECTOR * sizeof(struct idt_entry);
+
+	TEST_ASSERT(run->internal.data[3] == expected_gpa,
+		    "Unexpected GPA = %llx (expected %lx)",
+		    vcpu->run->internal.data[3], expected_gpa);
+
+	kvm_vm_free(vm);
+}
 #endif
 
 int main(int argc, char *argv[])
@@ -568,6 +618,7 @@ int main(int argc, char *argv[])
 	 * KVM_RUN fails with ENOEXEC or EFAULT.
 	 */
 	test_zero_memory_regions();
+	test_mmio_during_vectoring();
 #endif
 
 	test_invalid_memory_region_flags();
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index a8d3afa0b86b..cce2520af720 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -269,9 +269,8 @@ static void guest_code(int cpu)
 static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
 {
 	uint64_t id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA);
-	unsigned long enabled;
+	unsigned long enabled = vcpu_get_reg(vcpu, id);
 
-	vcpu_get_reg(vcpu, id, &enabled);
 	TEST_ASSERT(enabled == 0 || enabled == 1, "Expected boolean result");
 
 	return enabled;
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86/amx_test.c
index f4ce5a185a7d..f4ce5a185a7d 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86/amx_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
index f8916bb34405..f8916bb34405 100644
--- a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c
+++ b/tools/testing/selftests/kvm/x86/apic_bus_clock_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c
index 7b3fda6842bc..7b3fda6842bc 100644
--- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86/cpuid_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c
index 28cc66454601..28cc66454601 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86/cr4_cpuid_sync_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86/debug_regs.c
index 2d814c1d1dc4..2d814c1d1dc4 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86/debug_regs.c
diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
index 2929c067c207..b0d2b04a7ff2 100644
--- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
+++ b/tools/testing/selftests/kvm/x86/dirty_log_page_splitting_test.c
@@ -41,9 +41,9 @@ struct kvm_page_stats {
 
 static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
 {
-	stats->pages_4k = vm_get_stat(vm, "pages_4k");
-	stats->pages_2m = vm_get_stat(vm, "pages_2m");
-	stats->pages_1g = vm_get_stat(vm, "pages_1g");
+	stats->pages_4k = vm_get_stat(vm, pages_4k);
+	stats->pages_2m = vm_get_stat(vm, pages_2m);
+	stats->pages_1g = vm_get_stat(vm, pages_1g);
 	stats->hugepages = stats->pages_2m + stats->pages_1g;
 
 	pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c
index 81055476d394..81055476d394 100644
--- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
+++ b/tools/testing/selftests/kvm/x86/exit_on_emulation_failure_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c b/tools/testing/selftests/kvm/x86/feature_msrs_test.c
index a72f13ae2edb..a72f13ae2edb 100644
--- a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86/feature_msrs_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
index 762628f7d4ba..762628f7d4ba 100644
--- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
+++ b/tools/testing/selftests/kvm/x86/fix_hypercall_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86/flds_emulation.h
index 37b1a9f52864..37b1a9f52864 100644
--- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h
+++ b/tools/testing/selftests/kvm/x86/flds_emulation.h
diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
index 10b1b0ba374e..10b1b0ba374e 100644
--- a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
+++ b/tools/testing/selftests/kvm/x86/hwcr_msr_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86/hyperv_clock.c
index e058bc676cd6..e058bc676cd6 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_clock.c
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
index 4f5881d4ef66..4e920705681a 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
@@ -41,13 +41,19 @@ static bool smt_possible(void)
 	return res;
 }
 
-static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
-			  bool evmcs_expected)
+static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
 {
+	const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip;
+	const struct kvm_cpuid2 *hv_cpuid_entries;
 	int i;
 	int nent_expected = 10;
 	u32 test_val;
 
+	if (vcpu)
+		hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+	else
+		hv_cpuid_entries = kvm_get_supported_hv_cpuid();
+
 	TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
 		    "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
 		    " (returned %d)",
@@ -80,12 +86,19 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
 				    entry->eax, evmcs_expected
 				);
 			break;
+		case 0x40000003:
+			TEST_ASSERT(has_irqchip || !(entry->edx & BIT(19)),
+				    "\"Direct\" Synthetic Timers should require in-kernel APIC");
+			break;
 		case 0x40000004:
 			test_val = entry->eax & (1UL << 18);
 
 			TEST_ASSERT(!!test_val == !smt_possible(),
 				    "NoNonArchitecturalCoreSharing bit"
 				    " doesn't reflect SMT setting");
+
+			TEST_ASSERT(has_irqchip || !(entry->eax & BIT(10)),
+				    "Cluster IPI (i.e. SEND_IPI) should require in-kernel APIC");
 			break;
 		case 0x4000000A:
 			TEST_ASSERT(entry->eax & (1UL << 19),
@@ -109,9 +122,16 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
 		 *	entry->edx);
 		 */
 	}
+
+	/*
+	 * Note, the CPUID array returned by the system-scoped helper is a one-
+	 * time allocation, i.e. must not be freed.
+	 */
+	if (vcpu)
+		free((void *)hv_cpuid_entries);
 }
 
-void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+static void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
 {
 	static struct kvm_cpuid2 cpuid = {.nent = 0};
 	int ret;
@@ -129,19 +149,20 @@ void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
 int main(int argc, char *argv[])
 {
 	struct kvm_vm *vm;
-	const struct kvm_cpuid2 *hv_cpuid_entries;
 	struct kvm_vcpu *vcpu;
 
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
 
-	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	/* Test the vCPU ioctl without an in-kernel local APIC. */
+	vm = vm_create_barebones();
+	vcpu = __vm_vcpu_add(vm, 0);
+	test_hv_cpuid(vcpu, false);
+	kvm_vm_free(vm);
 
 	/* Test vCPU ioctl version */
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 	test_hv_cpuid_e2big(vm, vcpu);
-
-	hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
-	test_hv_cpuid(hv_cpuid_entries, false);
-	free((void *)hv_cpuid_entries);
+	test_hv_cpuid(vcpu, false);
 
 	if (!kvm_cpu_has(X86_FEATURE_VMX) ||
 	    !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
@@ -149,9 +170,7 @@ int main(int argc, char *argv[])
 		goto do_sys;
 	}
 	vcpu_enable_evmcs(vcpu);
-	hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
-	test_hv_cpuid(hv_cpuid_entries, true);
-	free((void *)hv_cpuid_entries);
+	test_hv_cpuid(vcpu, true);
 
 do_sys:
 	/* Test system ioctl version */
@@ -161,9 +180,7 @@ do_sys:
 	}
 
 	test_hv_cpuid_e2big(vm, NULL);
-
-	hv_cpuid_entries = kvm_get_supported_hv_cpuid();
-	test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX));
+	test_hv_cpuid(NULL, kvm_cpu_has(X86_FEATURE_VMX));
 
 out:
 	kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
index 74cf19661309..74cf19661309 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_evmcs.c
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
index 949e08e98f31..949e08e98f31 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_extended_hypercalls.c
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86/hyperv_features.c
index 068e9c69710d..068e9c69710d 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_features.c
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
index 22c0c124582f..2b5b4bc6ef7e 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_ipi.c
@@ -63,8 +63,10 @@ static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
 	/* Signal sender vCPU we're ready */
 	ipis_rcvd[vcpu_id] = (u64)-1;
 
-	for (;;)
-		asm volatile("sti; hlt; cli");
+	for (;;) {
+		safe_halt();
+		cli();
+	}
 }
 
 static void guest_ipi_handler(struct ex_regs *regs)
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
index 0ddb63229bcb..0ddb63229bcb 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_svm_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
index 077cd0ec3040..077cd0ec3040 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86/kvm_clock_test.c
index 5bc12222d87a..5bc12222d87a 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
+++ b/tools/testing/selftests/kvm/x86/kvm_clock_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
index 78878b3a2725..1b805cbdb47b 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86/kvm_pv_test.c
@@ -139,10 +139,12 @@ static void test_pv_unhalt(void)
 	struct kvm_vm *vm;
 	struct kvm_cpuid_entry2 *ent;
 	u32 kvm_sig_old;
+	int r;
 
-	pr_info("testing KVM_FEATURE_PV_UNHALT\n");
+	if (!(kvm_check_cap(KVM_CAP_X86_DISABLE_EXITS) & KVM_X86_DISABLE_EXITS_HLT))
+		return;
 
-	TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
+	pr_info("testing KVM_FEATURE_PV_UNHALT\n");
 
 	/* KVM_PV_UNHALT test */
 	vm = vm_create_with_one_vcpu(&vcpu, guest_main);
@@ -151,19 +153,45 @@ static void test_pv_unhalt(void)
 	TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
 		    "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
 
-	/* Make sure KVM clears vcpu->arch.kvm_cpuid */
+	/* Verify KVM disallows disabling exits after vCPU creation. */
+	r = __vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+	TEST_ASSERT(r && errno == EINVAL,
+		    "Disabling exits after vCPU creation didn't fail as expected");
+
+	kvm_vm_free(vm);
+
+	/* Verify that KVM clear PV_UNHALT from guest CPUID. */
+	vm = vm_create(1);
+	vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+
+	vcpu = vm_vcpu_add(vm, 0, NULL);
+	TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+		    "vCPU created with PV_UNHALT set by default");
+
+	vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+	TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+		    "PV_UNHALT set in guest CPUID when HLT-exiting is disabled");
+
+	/*
+	 * Clobber the KVM PV signature and verify KVM does NOT clear PV_UNHALT
+	 * when KVM PV is not present, and DOES clear PV_UNHALT when switching
+	 * back to the correct signature..
+	 */
 	ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
 	kvm_sig_old = ent->ebx;
 	ent->ebx = 0xdeadbeef;
 	vcpu_set_cpuid(vcpu);
 
-	vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+	vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+	TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+		    "PV_UNHALT cleared when using bogus KVM PV signature");
+
 	ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
 	ent->ebx = kvm_sig_old;
 	vcpu_set_cpuid(vcpu);
 
 	TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
-		    "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
+		    "PV_UNHALT set in guest CPUID when HLT-exiting is disabled");
 
 	/* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
 
diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c
index 7e2bfb3c3f3b..7e2bfb3c3f3b 100644
--- a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
+++ b/tools/testing/selftests/kvm/x86/max_vcpuid_cap_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
index 2b550eff35f1..2b550eff35f1 100644
--- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
diff --git a/tools/testing/selftests/kvm/x86/nested_emulation_test.c b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
new file mode 100644
index 000000000000..abc824dba04f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/nested_emulation_test.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+enum {
+	SVM_F,
+	VMX_F,
+	NR_VIRTUALIZATION_FLAVORS,
+};
+
+struct emulated_instruction {
+	const char name[32];
+	uint8_t opcode[15];
+	uint32_t exit_reason[NR_VIRTUALIZATION_FLAVORS];
+};
+
+static struct emulated_instruction instructions[] = {
+	{
+		.name = "pause",
+		.opcode = { 0xf3, 0x90 },
+		.exit_reason = { SVM_EXIT_PAUSE,
+				 EXIT_REASON_PAUSE_INSTRUCTION, }
+	},
+	{
+		.name = "hlt",
+		.opcode = { 0xf4 },
+		.exit_reason = { SVM_EXIT_HLT,
+				 EXIT_REASON_HLT, }
+	},
+};
+
+static uint8_t kvm_fep[] = { 0x0f, 0x0b, 0x6b, 0x76, 0x6d };	/* ud2 ; .ascii "kvm" */
+static uint8_t l2_guest_code[sizeof(kvm_fep) + 15];
+static uint8_t *l2_instruction = &l2_guest_code[sizeof(kvm_fep)];
+
+static uint32_t get_instruction_length(struct emulated_instruction *insn)
+{
+	uint32_t i;
+
+	for (i = 0; i < ARRAY_SIZE(insn->opcode) && insn->opcode[i]; i++)
+		;
+
+	return i;
+}
+
+static void guest_code(void *test_data)
+{
+	int f = this_cpu_has(X86_FEATURE_SVM) ? SVM_F : VMX_F;
+	int i;
+
+	memcpy(l2_guest_code, kvm_fep, sizeof(kvm_fep));
+
+	if (f == SVM_F) {
+		struct svm_test_data *svm = test_data;
+		struct vmcb *vmcb = svm->vmcb;
+
+		generic_svm_setup(svm, NULL, NULL);
+		vmcb->save.idtr.limit = 0;
+		vmcb->save.rip = (u64)l2_guest_code;
+
+		vmcb->control.intercept |= BIT_ULL(INTERCEPT_SHUTDOWN) |
+					   BIT_ULL(INTERCEPT_PAUSE) |
+					   BIT_ULL(INTERCEPT_HLT);
+		vmcb->control.intercept_exceptions = 0;
+	} else {
+		GUEST_ASSERT(prepare_for_vmx_operation(test_data));
+		GUEST_ASSERT(load_vmcs(test_data));
+
+		prepare_vmcs(test_data, NULL, NULL);
+		GUEST_ASSERT(!vmwrite(GUEST_IDTR_LIMIT, 0));
+		GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+		GUEST_ASSERT(!vmwrite(EXCEPTION_BITMAP, 0));
+
+		vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+						   CPU_BASED_PAUSE_EXITING |
+						   CPU_BASED_HLT_EXITING);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(instructions); i++) {
+		struct emulated_instruction *insn = &instructions[i];
+		uint32_t insn_len = get_instruction_length(insn);
+		uint32_t exit_insn_len;
+		u32 exit_reason;
+
+		/*
+		 * Copy the target instruction to the L2 code stream, and fill
+		 * the remaining bytes with INT3s so that a missed intercept
+		 * results in a consistent failure mode (SHUTDOWN).
+		 */
+		memcpy(l2_instruction, insn->opcode, insn_len);
+		memset(l2_instruction + insn_len, 0xcc, sizeof(insn->opcode) - insn_len);
+
+		if (f == SVM_F) {
+			struct svm_test_data *svm = test_data;
+			struct vmcb *vmcb = svm->vmcb;
+
+			run_guest(vmcb, svm->vmcb_gpa);
+			exit_reason = vmcb->control.exit_code;
+			exit_insn_len = vmcb->control.next_rip - vmcb->save.rip;
+			GUEST_ASSERT_EQ(vmcb->save.rip, (u64)l2_instruction);
+		} else {
+			GUEST_ASSERT_EQ(i ? vmresume() : vmlaunch(), 0);
+			exit_reason = vmreadz(VM_EXIT_REASON);
+			exit_insn_len = vmreadz(VM_EXIT_INSTRUCTION_LEN);
+			GUEST_ASSERT_EQ(vmreadz(GUEST_RIP), (u64)l2_instruction);
+		}
+
+		__GUEST_ASSERT(exit_reason == insn->exit_reason[f],
+			       "Wanted exit_reason '0x%x' for '%s', got '0x%x'",
+			       insn->exit_reason[f], insn->name, exit_reason);
+
+		__GUEST_ASSERT(exit_insn_len == insn_len,
+			       "Wanted insn_len '%u' for '%s', got '%u'",
+			       insn_len, insn->name, exit_insn_len);
+	}
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t nested_test_data_gva;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	TEST_REQUIRE(is_forced_emulation_enabled);
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+	if (kvm_cpu_has(X86_FEATURE_SVM))
+		vcpu_alloc_svm(vm, &nested_test_data_gva);
+	else
+		vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+	vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+	vcpu_run(vcpu);
+	TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
index 3eb0313ffa39..3641a42934ac 100644
--- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
@@ -85,6 +85,7 @@ static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
 
 	GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
 	GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
+	GUEST_ASSERT(!ctrl->int_state);
 }
 
 static void l1_svm_code(struct svm_test_data *svm)
@@ -122,6 +123,7 @@ static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
 	GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
 	GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
 	GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
+	GUEST_ASSERT(!vmreadz(GUEST_INTERRUPTIBILITY_INFO));
 }
 
 static void l1_vmx_code(struct vmx_pages *vmx)
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
index e7efb2b35f8b..c0d84827f736 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.c
@@ -73,7 +73,7 @@ static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
 {
 	int actual_pages_2m;
 
-	actual_pages_2m = vm_get_stat(vm, "pages_2m");
+	actual_pages_2m = vm_get_stat(vm, pages_2m);
 
 	TEST_ASSERT(actual_pages_2m == expected_pages_2m,
 		    "Unexpected 2m page count. Expected %d, got %d",
@@ -84,7 +84,7 @@ static void check_split_count(struct kvm_vm *vm, int expected_splits)
 {
 	int actual_splits;
 
-	actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+	actual_splits = vm_get_stat(vm, nx_lpage_splits);
 
 	TEST_ASSERT(actual_splits == expected_splits,
 		    "Unexpected NX huge page split count. Expected %d, got %d",
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh
index caad084b8bfd..caad084b8bfd 100755
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
+++ b/tools/testing/selftests/kvm/x86/nx_huge_pages_test.sh
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86/platform_info_test.c
index 9cbf283ebc55..9cbf283ebc55 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86/platform_info_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
index 698cb36989db..8aaaf25b6111 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_counters_test.c
@@ -17,7 +17,7 @@
  * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
  * 1 LOOP.
  */
-#define NUM_INSNS_PER_LOOP		3
+#define NUM_INSNS_PER_LOOP		4
 
 /*
  * Number of "extra" instructions that will be counted, i.e. the number of
@@ -29,10 +29,59 @@
 /* Total number of instructions retired within the measured section. */
 #define NUM_INSNS_RETIRED		(NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
 
+/* Track which architectural events are supported by hardware. */
+static uint32_t hardware_pmu_arch_events;
 
 static uint8_t kvm_pmu_version;
 static bool kvm_has_perf_caps;
 
+#define X86_PMU_FEATURE_NULL						\
+({									\
+	struct kvm_x86_pmu_feature feature = {};			\
+									\
+	feature;							\
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+	return !(*(u64 *)&event);
+}
+
+struct kvm_intel_pmu_event {
+	struct kvm_x86_pmu_feature gp_event;
+	struct kvm_x86_pmu_feature fixed_event;
+};
+
+/*
+ * Wrap the array to appease the compiler, as the macros used to construct each
+ * kvm_x86_pmu_feature use syntax that's only valid in function scope, and the
+ * compiler often thinks the feature definitions aren't compile-time constants.
+ */
+static struct kvm_intel_pmu_event intel_event_to_feature(uint8_t idx)
+{
+	const struct kvm_intel_pmu_event __intel_event_to_feature[] = {
+		[INTEL_ARCH_CPU_CYCLES_INDEX]		 = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+		[INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]	 = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+		/*
+		 * Note, the fixed counter for reference cycles is NOT the same as the
+		 * general purpose architectural event.  The fixed counter explicitly
+		 * counts at the same frequency as the TSC, whereas the GP event counts
+		 * at a fixed, but uarch specific, frequency.  Bundle them here for
+		 * simplicity.
+		 */
+		[INTEL_ARCH_REFERENCE_CYCLES_INDEX]	 = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+		[INTEL_ARCH_LLC_REFERENCES_INDEX]	 = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_LLC_MISSES_INDEX]		 = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_BRANCHES_RETIRED_INDEX]	 = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+		[INTEL_ARCH_TOPDOWN_SLOTS_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+	};
+
+	kvm_static_assert(ARRAY_SIZE(__intel_event_to_feature) == NR_INTEL_ARCH_EVENTS);
+
+	return __intel_event_to_feature[idx];
+}
+
 static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
 						  void *guest_code,
 						  uint8_t pmu_version,
@@ -42,6 +91,7 @@ static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
 
 	vm = vm_create_with_one_vcpu(vcpu, guest_code);
 	sync_global_to_guest(vm, kvm_pmu_version);
+	sync_global_to_guest(vm, hardware_pmu_arch_events);
 
 	/*
 	 * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
@@ -98,14 +148,12 @@ static uint8_t guest_get_pmu_version(void)
  * Sanity check that in all cases, the event doesn't count when it's disabled,
  * and that KVM correctly emulates the write of an arbitrary value.
  */
-static void guest_assert_event_count(uint8_t idx,
-				     struct kvm_x86_pmu_feature event,
-				     uint32_t pmc, uint32_t pmc_msr)
+static void guest_assert_event_count(uint8_t idx, uint32_t pmc, uint32_t pmc_msr)
 {
 	uint64_t count;
 
 	count = _rdpmc(pmc);
-	if (!this_pmu_has(event))
+	if (!(hardware_pmu_arch_events & BIT(idx)))
 		goto sanity_checks;
 
 	switch (idx) {
@@ -126,7 +174,9 @@ static void guest_assert_event_count(uint8_t idx,
 		GUEST_ASSERT_NE(count, 0);
 		break;
 	case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
-		GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+		__GUEST_ASSERT(count >= NUM_INSNS_RETIRED,
+			       "Expected top-down slots >= %u, got count = %lu",
+			       NUM_INSNS_RETIRED, count);
 		break;
 	default:
 		break;
@@ -162,75 +212,42 @@ do {										\
 			     "1:\n\t"						\
 			     clflush "\n\t"					\
 			     "mfence\n\t"					\
+			     "mov %[m], %%eax\n\t"				\
 			     FEP "loop 1b\n\t"					\
 			     FEP "mov %%edi, %%ecx\n\t"				\
 			     FEP "xor %%eax, %%eax\n\t"				\
 			     FEP "xor %%edx, %%edx\n\t"				\
 			     "wrmsr\n\t"					\
 			     :: "a"((uint32_t)_value), "d"(_value >> 32),	\
-				"c"(_msr), "D"(_msr)				\
+				"c"(_msr), "D"(_msr), [m]"m"(kvm_pmu_version)	\
 	);									\
 } while (0)
 
-#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP)	\
+#define GUEST_TEST_EVENT(_idx, _pmc, _pmc_msr, _ctrl_msr, _value, FEP)		\
 do {										\
-	wrmsr(pmc_msr, 0);							\
+	wrmsr(_pmc_msr, 0);							\
 										\
 	if (this_cpu_has(X86_FEATURE_CLFLUSHOPT))				\
-		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP);	\
+		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt %[m]", FEP);	\
 	else if (this_cpu_has(X86_FEATURE_CLFLUSH))				\
-		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP);	\
+		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush  %[m]", FEP);	\
 	else									\
 		GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP);		\
 										\
-	guest_assert_event_count(_idx, _event, _pmc, _pmc_msr);			\
+	guest_assert_event_count(_idx, _pmc, _pmc_msr);				\
 } while (0)
 
-static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
-				    uint32_t pmc, uint32_t pmc_msr,
+static void __guest_test_arch_event(uint8_t idx, uint32_t pmc, uint32_t pmc_msr,
 				    uint32_t ctrl_msr, uint64_t ctrl_msr_value)
 {
-	GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+	GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
 
 	if (is_forced_emulation_enabled)
-		GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
-}
-
-#define X86_PMU_FEATURE_NULL						\
-({									\
-	struct kvm_x86_pmu_feature feature = {};			\
-									\
-	feature;							\
-})
-
-static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
-{
-	return !(*(u64 *)&event);
+		GUEST_TEST_EVENT(idx, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
 }
 
 static void guest_test_arch_event(uint8_t idx)
 {
-	const struct {
-		struct kvm_x86_pmu_feature gp_event;
-		struct kvm_x86_pmu_feature fixed_event;
-	} intel_event_to_feature[] = {
-		[INTEL_ARCH_CPU_CYCLES_INDEX]		 = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
-		[INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX]	 = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
-		/*
-		 * Note, the fixed counter for reference cycles is NOT the same
-		 * as the general purpose architectural event.  The fixed counter
-		 * explicitly counts at the same frequency as the TSC, whereas
-		 * the GP event counts at a fixed, but uarch specific, frequency.
-		 * Bundle them here for simplicity.
-		 */
-		[INTEL_ARCH_REFERENCE_CYCLES_INDEX]	 = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
-		[INTEL_ARCH_LLC_REFERENCES_INDEX]	 = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
-		[INTEL_ARCH_LLC_MISSES_INDEX]		 = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
-		[INTEL_ARCH_BRANCHES_RETIRED_INDEX]	 = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
-		[INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
-		[INTEL_ARCH_TOPDOWN_SLOTS_INDEX]	 = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
-	};
-
 	uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
 	uint32_t pmu_version = guest_get_pmu_version();
 	/* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
@@ -248,7 +265,7 @@ static void guest_test_arch_event(uint8_t idx)
 	else
 		base_pmc_msr = MSR_IA32_PERFCTR0;
 
-	gp_event = intel_event_to_feature[idx].gp_event;
+	gp_event = intel_event_to_feature(idx).gp_event;
 	GUEST_ASSERT_EQ(idx, gp_event.f.bit);
 
 	GUEST_ASSERT(nr_gp_counters);
@@ -262,14 +279,14 @@ static void guest_test_arch_event(uint8_t idx)
 		if (guest_has_perf_global_ctrl)
 			wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
 
-		__guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+		__guest_test_arch_event(idx, i, base_pmc_msr + i,
 					MSR_P6_EVNTSEL0 + i, eventsel);
 	}
 
 	if (!guest_has_perf_global_ctrl)
 		return;
 
-	fixed_event = intel_event_to_feature[idx].fixed_event;
+	fixed_event = intel_event_to_feature(idx).fixed_event;
 	if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
 		return;
 
@@ -277,7 +294,7 @@ static void guest_test_arch_event(uint8_t idx)
 
 	wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
 
-	__guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+	__guest_test_arch_event(idx, i | INTEL_RDPMC_FIXED,
 				MSR_CORE_PERF_FIXED_CTR0 + i,
 				MSR_CORE_PERF_GLOBAL_CTRL,
 				FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
@@ -331,9 +348,9 @@ __GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector,			\
 	       expect_gp ? "#GP" : "no fault", msr, vector)			\
 
 #define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected)			\
-	__GUEST_ASSERT(val == expected_val,					\
+	__GUEST_ASSERT(val == expected,					\
 		       "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx",	\
-		       msr, expected_val, val);
+		       msr, expected, val);
 
 static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
 			     uint64_t expected_val)
@@ -545,7 +562,6 @@ static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
 
 static void test_intel_counters(void)
 {
-	uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
 	uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
 	uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
 	uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
@@ -567,18 +583,26 @@ static void test_intel_counters(void)
 
 	/*
 	 * Detect the existence of events that aren't supported by selftests.
-	 * This will (obviously) fail any time the kernel adds support for a
-	 * new event, but it's worth paying that price to keep the test fresh.
+	 * This will (obviously) fail any time hardware adds support for a new
+	 * event, but it's worth paying that price to keep the test fresh.
 	 */
-	TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+	TEST_ASSERT(this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH) <= NR_INTEL_ARCH_EVENTS,
 		    "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
-		    nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+		    this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH),
+		    this_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
 
 	/*
-	 * Force iterating over known arch events regardless of whether or not
-	 * KVM/hardware supports a given event.
+	 * Iterate over known arch events irrespective of KVM/hardware support
+	 * to verify that KVM doesn't reject programming of events just because
+	 * the *architectural* encoding is unsupported.  Track which events are
+	 * supported in hardware; the guest side will validate supported events
+	 * count correctly, even if *enumeration* of the event is unsupported
+	 * by KVM and/or isn't exposed to the guest.
 	 */
-	nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+	for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) {
+		if (this_pmu_has(intel_event_to_feature(i).gp_event))
+			hardware_pmu_arch_events |= BIT(i);
+	}
 
 	for (v = 0; v <= max_pmu_version; v++) {
 		for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
@@ -594,8 +618,8 @@ static void test_intel_counters(void)
 			 * vector length.
 			 */
 			if (v == pmu_version) {
-				for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
-					test_arch_events(v, perf_caps[i], nr_arch_events, k);
+				for (k = 1; k < (BIT(NR_INTEL_ARCH_EVENTS) - 1); k++)
+					test_arch_events(v, perf_caps[i], NR_INTEL_ARCH_EVENTS, k);
 			}
 			/*
 			 * Test single bits for all PMU version and lengths up
@@ -604,11 +628,11 @@ static void test_intel_counters(void)
 			 * host length).  Explicitly test a mask of '0' and all
 			 * ones i.e. all events being available and unavailable.
 			 */
-			for (j = 0; j <= nr_arch_events + 1; j++) {
+			for (j = 0; j <= NR_INTEL_ARCH_EVENTS + 1; j++) {
 				test_arch_events(v, perf_caps[i], j, 0);
 				test_arch_events(v, perf_caps[i], j, 0xff);
 
-				for (k = 0; k < nr_arch_events; k++)
+				for (k = 0; k < NR_INTEL_ARCH_EVENTS; k++)
 					test_arch_events(v, perf_caps[i], j, BIT(k));
 			}
 
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
index c15513cd74d1..c15513cd74d1 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86/pmu_event_filter_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
index 82a8d88b5338..82a8d88b5338 100644
--- a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86/private_mem_conversions_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
index 13e72fcec8dd..13e72fcec8dd 100644
--- a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c
+++ b/tools/testing/selftests/kvm/x86/private_mem_kvm_exits_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c
index cbc92a862ea9..cbc92a862ea9 100644
--- a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
+++ b/tools/testing/selftests/kvm/x86/recalc_apic_map_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
index 49913784bc82..49913784bc82 100644
--- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
+++ b/tools/testing/selftests/kvm/x86/set_boot_cpu_id.c
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86/set_sregs_test.c
index c021c0795a96..f4095a3d1278 100644
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86/set_sregs_test.c
@@ -41,13 +41,15 @@ do {										\
 	TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs");	\
 } while (0)
 
+#define KVM_ALWAYS_ALLOWED_CR4 (X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD |	\
+				X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE |	\
+				X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE |	\
+				X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
+
 static uint64_t calc_supported_cr4_feature_bits(void)
 {
-	uint64_t cr4;
+	uint64_t cr4 = KVM_ALWAYS_ALLOWED_CR4;
 
-	cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
-	      X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
-	      X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
 	if (kvm_cpu_has(X86_FEATURE_UMIP))
 		cr4 |= X86_CR4_UMIP;
 	if (kvm_cpu_has(X86_FEATURE_LA57))
@@ -72,36 +74,31 @@ static uint64_t calc_supported_cr4_feature_bits(void)
 	return cr4;
 }
 
-int main(int argc, char *argv[])
+static void test_cr_bits(struct kvm_vcpu *vcpu, uint64_t cr4)
 {
 	struct kvm_sregs sregs;
-	struct kvm_vcpu *vcpu;
-	struct kvm_vm *vm;
-	uint64_t cr4;
 	int rc, i;
 
-	/*
-	 * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
-	 * use it to verify all supported CR4 bits can be set prior to defining
-	 * the vCPU model, i.e. without doing KVM_SET_CPUID2.
-	 */
-	vm = vm_create_barebones();
-	vcpu = __vm_vcpu_add(vm, 0);
-
 	vcpu_sregs_get(vcpu, &sregs);
-
-	sregs.cr0 = 0;
-	sregs.cr4 |= calc_supported_cr4_feature_bits();
-	cr4 = sregs.cr4;
-
+	sregs.cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+	sregs.cr4 |= cr4;
 	rc = _vcpu_sregs_set(vcpu, &sregs);
 	TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
 
+	TEST_ASSERT(!!(sregs.cr4 & X86_CR4_OSXSAVE) ==
+		    (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSXSAVE)),
+		    "KVM didn't %s OSXSAVE in CPUID as expected",
+		    (sregs.cr4 & X86_CR4_OSXSAVE) ? "set" : "clear");
+
+	TEST_ASSERT(!!(sregs.cr4 & X86_CR4_PKE) ==
+		    (vcpu->cpuid && vcpu_cpuid_has(vcpu, X86_FEATURE_OSPKE)),
+		    "KVM didn't %s OSPKE in CPUID as expected",
+		    (sregs.cr4 & X86_CR4_PKE) ? "set" : "clear");
+
 	vcpu_sregs_get(vcpu, &sregs);
 	TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
 		    sregs.cr4, cr4);
 
-	/* Verify all unsupported features are rejected by KVM. */
 	TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
 	TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
 	TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
@@ -119,10 +116,28 @@ int main(int argc, char *argv[])
 	/* NW without CD is illegal, as is PG without PE. */
 	TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
 	TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_sregs sregs;
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	int rc;
 
+	/*
+	 * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
+	 * use it to verify KVM enforces guest CPUID even if *userspace* never
+	 * sets CPUID.
+	 */
+	vm = vm_create_barebones();
+	vcpu = __vm_vcpu_add(vm, 0);
+	test_cr_bits(vcpu, KVM_ALWAYS_ALLOWED_CR4);
 	kvm_vm_free(vm);
 
-	/* Create a "real" VM and verify APIC_BASE can be set. */
+	/* Create a "real" VM with a fully populated guest CPUID and verify
+	 * APIC_BASE and all supported CR4 can be set.
+	 */
 	vm = vm_create_with_one_vcpu(&vcpu, NULL);
 
 	vcpu_sregs_get(vcpu, &sregs);
@@ -135,6 +150,8 @@ int main(int argc, char *argv[])
 	TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
 		    sregs.apic_base);
 
+	test_cr_bits(vcpu, calc_supported_cr4_feature_bits());
+
 	kvm_vm_free(vm);
 
 	return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
index 3fb967f40c6a..3fb967f40c6a 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
index 0a6dfba3905b..0a6dfba3905b 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_migrate_tests.c
diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index ae77698e6e97..d97816dc476a 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -52,7 +52,8 @@ static void compare_xsave(u8 *from_host, u8 *from_guest)
 	bool bad = false;
 	for (i = 0; i < 4095; i++) {
 		if (from_host[i] != from_guest[i]) {
-			printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]);
+			printf("mismatch at %u | %02hhx %02hhx\n",
+			       i, from_host[i], from_guest[i]);
 			bad = true;
 		}
 	}
@@ -155,7 +156,7 @@ static void guest_shutdown_code(void)
 
 	/* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */
 	memset(&idt, 0, sizeof(idt));
-	__asm__ __volatile__("lidt %0" :: "m"(idt));
+	set_idt(&idt);
 
 	__asm__ __volatile__("ud2");
 }
diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
index fabeeaddfb3a..fabeeaddfb3a 100644
--- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86/smaller_maxphyaddr_emulation_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86/smm_test.c
index 55c88d664a94..55c88d664a94 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86/smm_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86/state_test.c
index 141b7fc0c965..141b7fc0c965 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86/state_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
index 916e04248fbb..917b6066cfc1 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_int_ctl_test.c
@@ -42,10 +42,7 @@ static void l2_guest_code(struct svm_test_data *svm)
 	x2apic_write_reg(APIC_ICR,
 		APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
 
-	__asm__ __volatile__(
-		"sti\n"
-		"nop\n"
-	);
+	sti_nop();
 
 	GUEST_ASSERT(vintr_irq_called);
 	GUEST_ASSERT(intr_irq_called);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
index 00135cbba35e..00135cbba35e 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_nested_shutdown_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
index 7b6481d6c0d3..7b6481d6c0d3 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_nested_soft_inject_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
index 8a62cca28cfb..8a62cca28cfb 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86/svm_vmcall_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86/sync_regs_test.c
index 8fa3948b0170..8fa3948b0170 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86/sync_regs_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
index 56306a19144a..56306a19144a 100644
--- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
+++ b/tools/testing/selftests/kvm/x86/triple_fault_event_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
index 12b0964f4f13..12b0964f4f13 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86/tsc_msrs_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
index 59c7304f805e..59c7304f805e 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
+++ b/tools/testing/selftests/kvm/x86/tsc_scaling_sync.c
diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
index 57f157c06b39..1e5e564523b3 100644
--- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
+++ b/tools/testing/selftests/kvm/x86/ucna_injection_test.c
@@ -86,7 +86,7 @@ static void ucna_injection_guest_code(void)
 	wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
 
 	/* Enables interrupt in guest. */
-	asm volatile("sti");
+	sti();
 
 	/* Let user space inject the first UCNA */
 	GUEST_SYNC(SYNC_FIRST_UCNA);
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86/userspace_io_test.c
index 9481cbcf284f..9481cbcf284f 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_io_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
index 32b2794b78fe..32b2794b78fe 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
index a81a24761aac..a81a24761aac 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_apic_access_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c
index dad988351493..dad988351493 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
index fa512d033205..fa512d033205 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
index 3fd6eceab46f..3fd6eceab46f 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
index a100ee5f0009..a100ee5f0009 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
+++ b/tools/testing/selftests/kvm/x86/vmx_invalid_nested_guest_state.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
index 90720b6205f4..90720b6205f4 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_msrs_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c
index 1759fa5cb3f2..1759fa5cb3f2 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
index a1f5ff45d518..a1f5ff45d518 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
index 00dd2ac07a61..00dd2ac07a61 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_preemption_timer_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c
index 67a62a5a8895..67a62a5a8895 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_set_nested_state_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c
index 2ceb5c78c442..2ceb5c78c442 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
index a76078a08ff8..35cb9de54a82 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_ipi_test.c
@@ -106,7 +106,8 @@ static void halter_guest_code(struct test_data_page *data)
 		data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
 		data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
 		data->hlt_count++;
-		asm volatile("sti; hlt; cli");
+		safe_halt();
+		cli();
 		data->wake_count++;
 	}
 }
@@ -465,6 +466,19 @@ int main(int argc, char *argv[])
 	cancel_join_vcpu_thread(threads[0], params[0].vcpu);
 	cancel_join_vcpu_thread(threads[1], params[1].vcpu);
 
+	/*
+	 * If the host support Idle HLT, i.e. KVM *might* be using Idle HLT,
+	 * then the number of HLT exits may be less than the number of HLTs
+	 * that were executed, as Idle HLT elides the exit if the vCPU has an
+	 * unmasked, pending IRQ (or NMI).
+	 */
+	if (this_cpu_has(X86_FEATURE_IDLE_HLT))
+		TEST_ASSERT(data->hlt_count >= vcpu_get_stat(params[0].vcpu, halt_exits),
+			    "HLT insns = %lu, HLT exits = %lu",
+			    data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+	else
+		TEST_ASSERT_EQ(data->hlt_count, vcpu_get_stat(params[0].vcpu, halt_exits));
+
 	fprintf(stderr,
 		"Test successful after running for %d seconds.\n"
 		"Sending vCPU sent %lu IPIs to halting vCPU\n"
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86/xapic_state_test.c
index 88bcca188799..fdebff1165c7 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86/xapic_state_test.c
@@ -18,7 +18,7 @@ struct xapic_vcpu {
 
 static void xapic_guest_code(void)
 {
-	asm volatile("cli");
+	cli();
 
 	xapic_enable();
 
@@ -38,7 +38,7 @@ static void xapic_guest_code(void)
 
 static void x2apic_guest_code(void)
 {
-	asm volatile("cli");
+	cli();
 
 	x2apic_enable();
 
diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
index c8a5c5e51661..c8a5c5e51661 100644
--- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
index a59b3c799bb2..287829f850f7 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c
@@ -191,10 +191,7 @@ static void guest_code(void)
 	struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
 	int i;
 
-	__asm__ __volatile__(
-		"sti\n"
-		"nop\n"
-	);
+	sti_nop();
 
 	/* Trigger an interrupt injection */
 	GUEST_SYNC(TEST_INJECT_VECTOR);
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
index 2585087cdf5c..2585087cdf5c 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86/xss_msr_test.c
index f331a4e9bae3..f331a4e9bae3 100644
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86/xss_msr_test.c
diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore
index 470203a7cd73..335b2b1a3463 100644
--- a/tools/testing/selftests/landlock/.gitignore
+++ b/tools/testing/selftests/landlock/.gitignore
@@ -1,2 +1,4 @@
 /*_test
+/sandbox-and-launch
 /true
+/wait-pipe
diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile
index 348e2dbdb4e0..5cb0828f0514 100644
--- a/tools/testing/selftests/landlock/Makefile
+++ b/tools/testing/selftests/landlock/Makefile
@@ -10,14 +10,14 @@ src_test := $(wildcard *_test.c)
 
 TEST_GEN_PROGS := $(src_test:.c=)
 
-TEST_GEN_PROGS_EXTENDED := true
+TEST_GEN_PROGS_EXTENDED := true sandbox-and-launch wait-pipe
 
 # Short targets:
-$(TEST_GEN_PROGS): LDLIBS += -lcap
+$(TEST_GEN_PROGS): LDLIBS += -lcap -lpthread
 $(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static
 
 include ../lib.mk
 
 # Targets with $(OUTPUT)/ prefix:
-$(TEST_GEN_PROGS): LDLIBS += -lcap
+$(TEST_GEN_PROGS): LDLIBS += -lcap -lpthread
 $(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index 61056fa074bb..6064c9ac0532 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -9,17 +9,15 @@
 
 #include <arpa/inet.h>
 #include <errno.h>
-#include <linux/landlock.h>
 #include <linux/securebits.h>
 #include <sys/capability.h>
 #include <sys/socket.h>
-#include <sys/syscall.h>
-#include <sys/types.h>
 #include <sys/un.h>
 #include <sys/wait.h>
 #include <unistd.h>
 
 #include "../kselftest_harness.h"
+#include "wrappers.h"
 
 #define TMP_DIR "tmp"
 
@@ -30,33 +28,8 @@
 /* TEST_F_FORK() should not be used for new tests. */
 #define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name)
 
-#ifndef landlock_create_ruleset
-static inline int
-landlock_create_ruleset(const struct landlock_ruleset_attr *const attr,
-			const size_t size, const __u32 flags)
-{
-	return syscall(__NR_landlock_create_ruleset, attr, size, flags);
-}
-#endif
-
-#ifndef landlock_add_rule
-static inline int landlock_add_rule(const int ruleset_fd,
-				    const enum landlock_rule_type rule_type,
-				    const void *const rule_attr,
-				    const __u32 flags)
-{
-	return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr,
-		       flags);
-}
-#endif
-
-#ifndef landlock_restrict_self
-static inline int landlock_restrict_self(const int ruleset_fd,
-					 const __u32 flags)
-{
-	return syscall(__NR_landlock_restrict_self, ruleset_fd, flags);
-}
-#endif
+static const char bin_sandbox_and_launch[] = "./sandbox-and-launch";
+static const char bin_wait_pipe[] = "./wait-pipe";
 
 static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
 {
@@ -234,6 +207,7 @@ enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd)
 struct protocol_variant {
 	int domain;
 	int type;
+	int protocol;
 };
 
 struct service_fixture {
@@ -250,11 +224,6 @@ struct service_fixture {
 	};
 };
 
-static pid_t __maybe_unused sys_gettid(void)
-{
-	return syscall(__NR_gettid);
-}
-
 static void __maybe_unused set_unix_address(struct service_fixture *const srv,
 					    const unsigned short index)
 {
diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config
index 29af19c4e9f9..425de4c20271 100644
--- a/tools/testing/selftests/landlock/config
+++ b/tools/testing/selftests/landlock/config
@@ -1,8 +1,11 @@
+CONFIG_AF_UNIX_OOB=y
 CONFIG_CGROUPS=y
 CONFIG_CGROUP_SCHED=y
 CONFIG_INET=y
 CONFIG_IPV6=y
 CONFIG_KEYS=y
+CONFIG_MPTCP=y
+CONFIG_MPTCP_IPV6=y
 CONFIG_NET=y
 CONFIG_NET_NS=y
 CONFIG_OVERLAY_FS=y
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 6788762188fe..aa6f2c1cbec7 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -37,6 +37,10 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 
+/* Defines AT_EXECVE_CHECK without type conflicts. */
+#define _ASM_GENERIC_FCNTL_H
+#include <linux/fcntl.h>
+
 #include "common.h"
 
 #ifndef renameat2
@@ -55,11 +59,17 @@ int open_tree(int dfd, const char *filename, unsigned int flags)
 }
 #endif
 
+static int sys_execveat(int dirfd, const char *pathname, char *const argv[],
+			char *const envp[], int flags)
+{
+	return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+
 #ifndef RENAME_EXCHANGE
 #define RENAME_EXCHANGE (1 << 1)
 #endif
 
-#define BINARY_PATH "./true"
+static const char bin_true[] = "./true";
 
 /* Paths (sibling number and depth) */
 static const char dir_s1d1[] = TMP_DIR "/s1d1";
@@ -85,6 +95,9 @@ static const char file1_s3d1[] = TMP_DIR "/s3d1/f1";
 /* dir_s3d2 is a mount point. */
 static const char dir_s3d2[] = TMP_DIR "/s3d1/s3d2";
 static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3";
+static const char file1_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3/f1";
+static const char dir_s3d4[] = TMP_DIR "/s3d1/s3d2/s3d4";
+static const char file1_s3d4[] = TMP_DIR "/s3d1/s3d2/s3d4/f1";
 
 /*
  * layout1 hierarchy:
@@ -108,8 +121,11 @@ static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3";
  * │           └── f2
  * └── s3d1
  *     ├── f1
- *     └── s3d2
- *         └── s3d3
+ *     └── s3d2 [mount point]
+ *         ├── s3d3
+ *         │   └── f1
+ *         └── s3d4
+ *             └── f1
  */
 
 static bool fgrep(FILE *const inf, const char *const str)
@@ -358,7 +374,8 @@ static void create_layout1(struct __test_metadata *const _metadata)
 	ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s3d2));
 	clear_cap(_metadata, CAP_SYS_ADMIN);
 
-	ASSERT_EQ(0, mkdir(dir_s3d3, 0700));
+	create_file(_metadata, file1_s3d3);
+	create_file(_metadata, file1_s3d4);
 }
 
 static void remove_layout1(struct __test_metadata *const _metadata)
@@ -378,7 +395,8 @@ static void remove_layout1(struct __test_metadata *const _metadata)
 	EXPECT_EQ(0, remove_path(dir_s2d2));
 
 	EXPECT_EQ(0, remove_path(file1_s3d1));
-	EXPECT_EQ(0, remove_path(dir_s3d3));
+	EXPECT_EQ(0, remove_path(file1_s3d3));
+	EXPECT_EQ(0, remove_path(file1_s3d4));
 	set_cap(_metadata, CAP_SYS_ADMIN);
 	umount(dir_s3d2);
 	clear_cap(_metadata, CAP_SYS_ADMIN);
@@ -1957,8 +1975,8 @@ TEST_F_FORK(layout1, relative_chroot_chdir)
 	test_relative_path(_metadata, REL_CHROOT_CHDIR);
 }
 
-static void copy_binary(struct __test_metadata *const _metadata,
-			const char *const dst_path)
+static void copy_file(struct __test_metadata *const _metadata,
+		      const char *const src_path, const char *const dst_path)
 {
 	int dst_fd, src_fd;
 	struct stat statbuf;
@@ -1968,11 +1986,10 @@ static void copy_binary(struct __test_metadata *const _metadata,
 	{
 		TH_LOG("Failed to open \"%s\": %s", dst_path, strerror(errno));
 	}
-	src_fd = open(BINARY_PATH, O_RDONLY | O_CLOEXEC);
+	src_fd = open(src_path, O_RDONLY | O_CLOEXEC);
 	ASSERT_LE(0, src_fd)
 	{
-		TH_LOG("Failed to open \"" BINARY_PATH "\": %s",
-		       strerror(errno));
+		TH_LOG("Failed to open \"%s\": %s", src_path, strerror(errno));
 	}
 	ASSERT_EQ(0, fstat(src_fd, &statbuf));
 	ASSERT_EQ(statbuf.st_size,
@@ -2003,11 +2020,26 @@ static void test_execute(struct __test_metadata *const _metadata, const int err,
 	ASSERT_EQ(1, WIFEXITED(status));
 	ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status))
 	{
-		TH_LOG("Unexpected return code for \"%s\": %s", path,
-		       strerror(errno));
+		TH_LOG("Unexpected return code for \"%s\"", path);
 	};
 }
 
+static void test_check_exec(struct __test_metadata *const _metadata,
+			    const int err, const char *const path)
+{
+	int ret;
+	char *const argv[] = { (char *)path, NULL };
+
+	ret = sys_execveat(AT_FDCWD, path, argv, NULL,
+			   AT_EMPTY_PATH | AT_EXECVE_CHECK);
+	if (err) {
+		EXPECT_EQ(-1, ret);
+		EXPECT_EQ(errno, err);
+	} else {
+		EXPECT_EQ(0, ret);
+	}
+}
+
 TEST_F_FORK(layout1, execute)
 {
 	const struct rule rules[] = {
@@ -2021,9 +2053,13 @@ TEST_F_FORK(layout1, execute)
 		create_ruleset(_metadata, rules[0].access, rules);
 
 	ASSERT_LE(0, ruleset_fd);
-	copy_binary(_metadata, file1_s1d1);
-	copy_binary(_metadata, file1_s1d2);
-	copy_binary(_metadata, file1_s1d3);
+	copy_file(_metadata, bin_true, file1_s1d1);
+	copy_file(_metadata, bin_true, file1_s1d2);
+	copy_file(_metadata, bin_true, file1_s1d3);
+
+	/* Checks before file1_s1d1 being denied. */
+	test_execute(_metadata, 0, file1_s1d1);
+	test_check_exec(_metadata, 0, file1_s1d1);
 
 	enforce_ruleset(_metadata, ruleset_fd);
 	ASSERT_EQ(0, close(ruleset_fd));
@@ -2031,14 +2067,94 @@ TEST_F_FORK(layout1, execute)
 	ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
 	ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
 	test_execute(_metadata, EACCES, file1_s1d1);
+	test_check_exec(_metadata, EACCES, file1_s1d1);
 
 	ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
 	ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
 	test_execute(_metadata, 0, file1_s1d2);
+	test_check_exec(_metadata, 0, file1_s1d2);
 
 	ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
 	ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
 	test_execute(_metadata, 0, file1_s1d3);
+	test_check_exec(_metadata, 0, file1_s1d3);
+}
+
+TEST_F_FORK(layout1, umount_sandboxer)
+{
+	int pipe_child[2], pipe_parent[2];
+	char buf_parent;
+	pid_t child;
+	int status;
+
+	copy_file(_metadata, bin_sandbox_and_launch, file1_s3d3);
+	ASSERT_EQ(0, pipe2(pipe_child, 0));
+	ASSERT_EQ(0, pipe2(pipe_parent, 0));
+
+	child = fork();
+	ASSERT_LE(0, child);
+	if (child == 0) {
+		char pipe_child_str[12], pipe_parent_str[12];
+		char *const argv[] = { (char *)file1_s3d3,
+				       (char *)bin_wait_pipe, pipe_child_str,
+				       pipe_parent_str, NULL };
+
+		/* Passes the pipe FDs to the executed binary and its child. */
+		EXPECT_EQ(0, close(pipe_child[0]));
+		EXPECT_EQ(0, close(pipe_parent[1]));
+		snprintf(pipe_child_str, sizeof(pipe_child_str), "%d",
+			 pipe_child[1]);
+		snprintf(pipe_parent_str, sizeof(pipe_parent_str), "%d",
+			 pipe_parent[0]);
+
+		/*
+		 * We need bin_sandbox_and_launch (copied inside the mount as
+		 * file1_s3d3) to execute bin_wait_pipe (outside the mount) to
+		 * make sure the mount point will not be EBUSY because of
+		 * file1_s3d3 being in use.  This avoids a potential race
+		 * condition between the following read() and umount() calls.
+		 */
+		ASSERT_EQ(0, execve(argv[0], argv, NULL))
+		{
+			TH_LOG("Failed to execute \"%s\": %s", argv[0],
+			       strerror(errno));
+		};
+		_exit(1);
+		return;
+	}
+
+	EXPECT_EQ(0, close(pipe_child[1]));
+	EXPECT_EQ(0, close(pipe_parent[0]));
+
+	/* Waits for the child to sandbox itself. */
+	EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+	/* Tests that the sandboxer is tied to its mount point. */
+	set_cap(_metadata, CAP_SYS_ADMIN);
+	EXPECT_EQ(-1, umount(dir_s3d2));
+	EXPECT_EQ(EBUSY, errno);
+	clear_cap(_metadata, CAP_SYS_ADMIN);
+
+	/* Signals the child to launch a grandchild. */
+	EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+
+	/* Waits for the grandchild. */
+	EXPECT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+	/* Tests that the domain's sandboxer is not tied to its mount point. */
+	set_cap(_metadata, CAP_SYS_ADMIN);
+	EXPECT_EQ(0, umount(dir_s3d2))
+	{
+		TH_LOG("Failed to umount \"%s\": %s", dir_s3d2,
+		       strerror(errno));
+	};
+	clear_cap(_metadata, CAP_SYS_ADMIN);
+
+	/* Signals the grandchild to terminate. */
+	EXPECT_EQ(1, write(pipe_parent[1], ".", 1));
+	ASSERT_EQ(child, waitpid(child, &status, 0));
+	ASSERT_EQ(1, WIFEXITED(status));
+	ASSERT_EQ(0, WEXITSTATUS(status));
 }
 
 TEST_F_FORK(layout1, link)
@@ -2444,6 +2560,44 @@ TEST_F_FORK(layout1, refer_mount_root_deny)
 	EXPECT_EQ(0, close(root_fd));
 }
 
+TEST_F_FORK(layout1, refer_part_mount_tree_is_allowed)
+{
+	const struct rule layer1[] = {
+		{
+			/* Parent mount point. */
+			.path = dir_s3d1,
+			.access = LANDLOCK_ACCESS_FS_REFER |
+				  LANDLOCK_ACCESS_FS_MAKE_REG,
+		},
+		{
+			/*
+			 * Removing the source file is allowed because its
+			 * access rights are already a superset of the
+			 * destination.
+			 */
+			.path = dir_s3d4,
+			.access = LANDLOCK_ACCESS_FS_REFER |
+				  LANDLOCK_ACCESS_FS_MAKE_REG |
+				  LANDLOCK_ACCESS_FS_REMOVE_FILE,
+		},
+		{},
+	};
+	int ruleset_fd;
+
+	ASSERT_EQ(0, unlink(file1_s3d3));
+	ruleset_fd = create_ruleset(_metadata,
+				    LANDLOCK_ACCESS_FS_REFER |
+					    LANDLOCK_ACCESS_FS_MAKE_REG |
+					    LANDLOCK_ACCESS_FS_REMOVE_FILE,
+				    layer1);
+
+	ASSERT_LE(0, ruleset_fd);
+	enforce_ruleset(_metadata, ruleset_fd);
+	ASSERT_EQ(0, close(ruleset_fd));
+
+	ASSERT_EQ(0, rename(file1_s3d4, file1_s3d3));
+}
+
 TEST_F_FORK(layout1, reparent_link)
 {
 	const struct rule layer1[] = {
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
index 4e0aeb53b225..d9de0ee49ebc 100644
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -85,18 +85,18 @@ static void setup_loopback(struct __test_metadata *const _metadata)
 	clear_ambient_cap(_metadata, CAP_NET_ADMIN);
 }
 
+static bool prot_is_tcp(const struct protocol_variant *const prot)
+{
+	return (prot->domain == AF_INET || prot->domain == AF_INET6) &&
+	       prot->type == SOCK_STREAM &&
+	       (prot->protocol == IPPROTO_TCP || prot->protocol == IPPROTO_IP);
+}
+
 static bool is_restricted(const struct protocol_variant *const prot,
 			  const enum sandbox_type sandbox)
 {
-	switch (prot->domain) {
-	case AF_INET:
-	case AF_INET6:
-		switch (prot->type) {
-		case SOCK_STREAM:
-			return sandbox == TCP_SANDBOX;
-		}
-		break;
-	}
+	if (sandbox == TCP_SANDBOX)
+		return prot_is_tcp(prot);
 	return false;
 }
 
@@ -105,7 +105,7 @@ static int socket_variant(const struct service_fixture *const srv)
 	int ret;
 
 	ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC,
-		     0);
+		     srv->protocol.protocol);
 	if (ret < 0)
 		return -errno;
 	return ret;
@@ -290,22 +290,70 @@ FIXTURE_TEARDOWN(protocol)
 }
 
 /* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp) {
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp1) {
 	/* clang-format on */
 	.sandbox = NO_SANDBOX,
 	.prot = {
 		.domain = AF_INET,
 		.type = SOCK_STREAM,
+		/* IPPROTO_IP == 0 */
+		.protocol = IPPROTO_IP,
 	},
 };
 
 /* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp) {
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp2) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.prot = {
+		.domain = AF_INET,
+		.type = SOCK_STREAM,
+		.protocol = IPPROTO_TCP,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_mptcp) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.prot = {
+		.domain = AF_INET,
+		.type = SOCK_STREAM,
+		.protocol = IPPROTO_MPTCP,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp1) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.prot = {
+		.domain = AF_INET6,
+		.type = SOCK_STREAM,
+		/* IPPROTO_IP == 0 */
+		.protocol = IPPROTO_IP,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp2) {
 	/* clang-format on */
 	.sandbox = NO_SANDBOX,
 	.prot = {
 		.domain = AF_INET6,
 		.type = SOCK_STREAM,
+		.protocol = IPPROTO_TCP,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_mptcp) {
+	/* clang-format on */
+	.sandbox = NO_SANDBOX,
+	.prot = {
+		.domain = AF_INET6,
+		.type = SOCK_STREAM,
+		.protocol = IPPROTO_MPTCP,
 	},
 };
 
@@ -350,22 +398,70 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) {
 };
 
 /* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp) {
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp1) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.prot = {
+		.domain = AF_INET,
+		.type = SOCK_STREAM,
+		/* IPPROTO_IP == 0 */
+		.protocol = IPPROTO_IP,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp2) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.prot = {
+		.domain = AF_INET,
+		.type = SOCK_STREAM,
+		.protocol = IPPROTO_TCP,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_mptcp) {
 	/* clang-format on */
 	.sandbox = TCP_SANDBOX,
 	.prot = {
 		.domain = AF_INET,
 		.type = SOCK_STREAM,
+		.protocol = IPPROTO_MPTCP,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp1) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.prot = {
+		.domain = AF_INET6,
+		.type = SOCK_STREAM,
+		/* IPPROTO_IP == 0 */
+		.protocol = IPPROTO_IP,
+	},
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp2) {
+	/* clang-format on */
+	.sandbox = TCP_SANDBOX,
+	.prot = {
+		.domain = AF_INET6,
+		.type = SOCK_STREAM,
+		.protocol = IPPROTO_TCP,
 	},
 };
 
 /* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp) {
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_mptcp) {
 	/* clang-format on */
 	.sandbox = TCP_SANDBOX,
 	.prot = {
 		.domain = AF_INET6,
 		.type = SOCK_STREAM,
+		.protocol = IPPROTO_MPTCP,
 	},
 };
 
diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
index a19db4d0b3bd..8f31b673ff2d 100644
--- a/tools/testing/selftests/landlock/ptrace_test.c
+++ b/tools/testing/selftests/landlock/ptrace_test.c
@@ -22,8 +22,6 @@
 /* Copied from security/yama/yama_lsm.c */
 #define YAMA_SCOPE_DISABLED 0
 #define YAMA_SCOPE_RELATIONAL 1
-#define YAMA_SCOPE_CAPABILITY 2
-#define YAMA_SCOPE_NO_ATTACH 3
 
 static void create_domain(struct __test_metadata *const _metadata)
 {
diff --git a/tools/testing/selftests/landlock/sandbox-and-launch.c b/tools/testing/selftests/landlock/sandbox-and-launch.c
new file mode 100644
index 000000000000..3e32e1a51ac5
--- /dev/null
+++ b/tools/testing/selftests/landlock/sandbox-and-launch.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Sandbox itself and execute another program (in a different mount point).
+ *
+ * Used by layout1.umount_sandboxer from fs_test.c
+ *
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "wrappers.h"
+
+int main(int argc, char *argv[])
+{
+	struct landlock_ruleset_attr ruleset_attr = {
+		.scoped = LANDLOCK_SCOPE_SIGNAL,
+	};
+	int pipe_child, pipe_parent, ruleset_fd;
+	char buf;
+
+	/*
+	 * The first argument must be the file descriptor number of a pipe.
+	 * The second argument must be the program to execute.
+	 */
+	if (argc != 4) {
+		fprintf(stderr, "Wrong number of arguments (not three)\n");
+		return 1;
+	}
+
+	pipe_child = atoi(argv[2]);
+	pipe_parent = atoi(argv[3]);
+
+	ruleset_fd =
+		landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+	if (ruleset_fd < 0) {
+		perror("Failed to create ruleset");
+		return 1;
+	}
+
+	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		perror("Failed to call prctl()");
+		return 1;
+	}
+
+	if (landlock_restrict_self(ruleset_fd, 0)) {
+		perror("Failed to restrict self");
+		return 1;
+	}
+
+	if (close(ruleset_fd)) {
+		perror("Failed to close ruleset");
+		return 1;
+	}
+
+	/* Signals that we are sandboxed. */
+	errno = 0;
+	if (write(pipe_child, ".", 1) != 1) {
+		perror("Failed to write to the second argument");
+		return 1;
+	}
+
+	/* Waits for the parent to try to umount. */
+	if (read(pipe_parent, &buf, 1) != 1) {
+		perror("Failed to write to the third argument");
+		return 1;
+	}
+
+	/* Shifts arguments. */
+	argv[0] = argv[1];
+	argv[1] = argv[2];
+	argv[2] = argv[3];
+	argv[3] = NULL;
+	execve(argv[0], argv, NULL);
+	perror("Failed to execute the provided binary");
+	return 1;
+}
diff --git a/tools/testing/selftests/landlock/wait-pipe.c b/tools/testing/selftests/landlock/wait-pipe.c
new file mode 100644
index 000000000000..0dbcd260a0fa
--- /dev/null
+++ b/tools/testing/selftests/landlock/wait-pipe.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Write in a pipe and wait.
+ *
+ * Used by layout1.umount_sandboxer from fs_test.c
+ *
+ * Copyright © 2024-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+int main(int argc, char *argv[])
+{
+	int pipe_child, pipe_parent;
+	char buf;
+
+	/* The first argument must be the file descriptor number of a pipe. */
+	if (argc != 3) {
+		fprintf(stderr, "Wrong number of arguments (not two)\n");
+		return 1;
+	}
+
+	pipe_child = atoi(argv[1]);
+	pipe_parent = atoi(argv[2]);
+
+	/* Signals that we are waiting. */
+	if (write(pipe_child, ".", 1) != 1) {
+		perror("Failed to write to first argument");
+		return 1;
+	}
+
+	/* Waits for the parent do its test. */
+	if (read(pipe_parent, &buf, 1) != 1) {
+		perror("Failed to write to the second argument");
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/landlock/wrappers.h b/tools/testing/selftests/landlock/wrappers.h
new file mode 100644
index 000000000000..65548323e45d
--- /dev/null
+++ b/tools/testing/selftests/landlock/wrappers.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Syscall wrappers
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019-2020 ANSSI
+ * Copyright © 2021-2025 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <linux/landlock.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef landlock_create_ruleset
+static inline int
+landlock_create_ruleset(const struct landlock_ruleset_attr *const attr,
+			const size_t size, const __u32 flags)
+{
+	return syscall(__NR_landlock_create_ruleset, attr, size, flags);
+}
+#endif
+
+#ifndef landlock_add_rule
+static inline int landlock_add_rule(const int ruleset_fd,
+				    const enum landlock_rule_type rule_type,
+				    const void *const rule_attr,
+				    const __u32 flags)
+{
+	return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr,
+		       flags);
+}
+#endif
+
+#ifndef landlock_restrict_self
+static inline int landlock_restrict_self(const int ruleset_fd,
+					 const __u32 flags)
+{
+	return syscall(__NR_landlock_restrict_self, ruleset_fd, flags);
+}
+#endif
+
+static inline pid_t sys_gettid(void)
+{
+	return syscall(__NR_gettid);
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index d6edcfcb5be8..530390033929 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -228,4 +228,7 @@ $(OUTPUT)/%:%.S
 	$(LINK.S) $^ $(LDLIBS) -o $@
 endif
 
-.PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir
+headers:
+	$(Q)$(MAKE) -C $(top_srcdir) headers
+
+.PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir headers
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index c52fe3ad8e98..f876bf4744e1 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -4,5 +4,5 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh
+TEST_PROGS := bitmap.sh
 include ../lib.mk
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index dc15aba8d0a3..81a1f64a22e8 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,5 +1,2 @@
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
-CONFIG_PRIME_NUMBERS=m
 CONFIG_TEST_BITOPS=m
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
deleted file mode 100755
index 370b79a9cb2e..000000000000
--- a/tools/testing/selftests/lib/prime_numbers.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Checks fast/slow prime_number generation for inconsistencies
-$(dirname $0)/../kselftest/module.sh "prime numbers" prime_numbers selftest=65536
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
deleted file mode 100755
index 05f4544e87f9..000000000000
--- a/tools/testing/selftests/lib/printf.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Tests the printf infrastructure using test_printf kernel module.
-$(dirname $0)/../kselftest/module.sh "printf" test_printf
diff --git a/tools/testing/selftests/lib/scanf.sh b/tools/testing/selftests/lib/scanf.sh
deleted file mode 100755
index b59b8ba561c3..000000000000
--- a/tools/testing/selftests/lib/scanf.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Tests the scanf infrastructure using test_scanf kernel module.
-$(dirname $0)/../kselftest/module.sh "scanf" test_scanf
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index e5d06fb40233..15601402dee6 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -306,7 +306,8 @@ function check_result {
 	result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \
 		 grep -e 'livepatch:' -e 'test_klp' | \
 		 grep -v '\(tainting\|taints\) kernel' | \
-		 sed 's/^\[[ 0-9.]*\] //')
+		 sed 's/^\[[ 0-9.]*\] //' | \
+		 sed 's/^\[[ ]*[CT][0-9]*\] //')
 
 	if [[ "$expect" == "$result" ]] ; then
 		echo "ok"
diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh
index 37bbc3fb2780..2a03deb26a12 100755
--- a/tools/testing/selftests/livepatch/test-callbacks.sh
+++ b/tools/testing/selftests/livepatch/test-callbacks.sh
@@ -259,7 +259,7 @@ $MOD_TARGET: ${MOD_TARGET}_init
 % insmod test_modules/$MOD_LIVEPATCH.ko pre_patch_ret=-19
 livepatch: enabling patch '$MOD_LIVEPATCH'
 livepatch: '$MOD_LIVEPATCH': initializing patching transition
-test_klp_callbacks_demo: pre_patch_callback: vmlinux
+$MOD_LIVEPATCH: pre_patch_callback: vmlinux
 livepatch: pre-patch callback failed for object 'vmlinux'
 livepatch: failed to enable patch '$MOD_LIVEPATCH'
 livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch
diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh
index 2c91428d2997..58fe1d96997c 100755
--- a/tools/testing/selftests/livepatch/test-sysfs.sh
+++ b/tools/testing/selftests/livepatch/test-sysfs.sh
@@ -5,6 +5,8 @@
 . $(dirname $0)/functions.sh
 
 MOD_LIVEPATCH=test_klp_livepatch
+MOD_LIVEPATCH2=test_klp_callbacks_demo
+MOD_LIVEPATCH3=test_klp_syscall
 
 setup_config
 
@@ -19,6 +21,8 @@ check_sysfs_rights "$MOD_LIVEPATCH" "enabled" "-rw-r--r--"
 check_sysfs_value  "$MOD_LIVEPATCH" "enabled" "1"
 check_sysfs_rights "$MOD_LIVEPATCH" "force" "--w-------"
 check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--"
+check_sysfs_rights "$MOD_LIVEPATCH" "stack_order" "-r--r--r--"
+check_sysfs_value  "$MOD_LIVEPATCH" "stack_order" "1"
 check_sysfs_rights "$MOD_LIVEPATCH" "transition" "-r--r--r--"
 check_sysfs_value  "$MOD_LIVEPATCH" "transition" "0"
 check_sysfs_rights "$MOD_LIVEPATCH" "vmlinux/patched" "-r--r--r--"
@@ -131,4 +135,71 @@ livepatch: '$MOD_LIVEPATCH': completing unpatching transition
 livepatch: '$MOD_LIVEPATCH': unpatching complete
 % rmmod $MOD_LIVEPATCH"
 
+start_test "sysfs test stack_order value"
+
+load_lp $MOD_LIVEPATCH
+
+check_sysfs_value  "$MOD_LIVEPATCH" "stack_order" "1"
+
+load_lp $MOD_LIVEPATCH2
+
+check_sysfs_value  "$MOD_LIVEPATCH2" "stack_order" "2"
+
+load_lp $MOD_LIVEPATCH3
+
+check_sysfs_value  "$MOD_LIVEPATCH3" "stack_order" "3"
+
+disable_lp $MOD_LIVEPATCH2
+unload_lp $MOD_LIVEPATCH2
+
+check_sysfs_value  "$MOD_LIVEPATCH" "stack_order" "1"
+check_sysfs_value  "$MOD_LIVEPATCH3" "stack_order" "2"
+
+disable_lp $MOD_LIVEPATCH3
+unload_lp $MOD_LIVEPATCH3
+
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+% insmod test_modules/$MOD_LIVEPATCH2.ko
+livepatch: enabling patch '$MOD_LIVEPATCH2'
+livepatch: '$MOD_LIVEPATCH2': initializing patching transition
+$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': starting patching transition
+livepatch: '$MOD_LIVEPATCH2': completing patching transition
+$MOD_LIVEPATCH2: post_patch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': patching complete
+% insmod test_modules/$MOD_LIVEPATCH3.ko
+livepatch: enabling patch '$MOD_LIVEPATCH3'
+livepatch: '$MOD_LIVEPATCH3': initializing patching transition
+livepatch: '$MOD_LIVEPATCH3': starting patching transition
+livepatch: '$MOD_LIVEPATCH3': completing patching transition
+livepatch: '$MOD_LIVEPATCH3': patching complete
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled
+livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
+$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH2': completing unpatching transition
+$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': unpatching complete
+% rmmod $MOD_LIVEPATCH2
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH3/enabled
+livepatch: '$MOD_LIVEPATCH3': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH3': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH3': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH3': unpatching complete
+% rmmod $MOD_LIVEPATCH3
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
 exit 0
diff --git a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
index 66dec47e3ca3..732e89fe99c0 100644
--- a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
+++ b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
@@ -56,16 +56,15 @@ TEST(flags_zero_lsm_set_self_attr)
 TEST(flags_overset_lsm_set_self_attr)
 {
 	const long page_size = sysconf(_SC_PAGESIZE);
-	char *ctx = calloc(page_size, 1);
+	struct lsm_ctx *ctx = calloc(page_size, 1);
 	__u32 size = page_size;
-	struct lsm_ctx *tctx = (struct lsm_ctx *)ctx;
 
 	ASSERT_NE(NULL, ctx);
 	if (attr_lsm_count()) {
-		ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, tctx, &size,
+		ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size,
 					       0));
 	}
-	ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, tctx,
+	ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, ctx,
 					size, 0));
 
 	free(ctx);
diff --git a/tools/testing/selftests/media_tests/regression_test.txt b/tools/testing/selftests/media_tests/regression_test.txt
index 2627367681f7..9d0fcd98c085 100644
--- a/tools/testing/selftests/media_tests/regression_test.txt
+++ b/tools/testing/selftests/media_tests/regression_test.txt
@@ -1,5 +1,5 @@
 Testing for regressions in Media Controller API register, ioctl, syscall,
-and unregister paths. There have a few problems that result in user-after
+and unregister paths. There have a few problems that result in use-after
 free on media_device, media_devnode, and cdev pointers when the driver is
 unbound while ioctl is in progress.
 
@@ -15,11 +15,11 @@ Build media_device_test
 cd tools/testing/selftests/media_tests
 make
 
-Regressions test for cdev user-after free error on /dev/mediaX when driver
+Regressions test for cdev use-after-free error on /dev/mediaX when driver
 is unbound:
 
 Start media_device_test to regression test media devnode dynamic alloc
-and cdev user-after-free fixes. This opens media dev files and sits in
+and cdev use-after-free fixes. This opens media dev files and sits in
 a loop running media ioctl MEDIA_IOC_DEVICE_INFO command once every 10
 seconds. The idea is when device file goes away, media devnode and cdev
 should stick around until this test exits.
@@ -40,4 +40,4 @@ keep ioctls going while bind/unbind runs.
 Copy bind_unbind_sample.txt and make changes to specify the driver name
 and number to run bind and unbind. Start the bind_unbind.sh
 
-Run dmesg looking for any user-after free errors or mutex lock errors.
+Run dmesg looking for any use-after-free errors or mutex lock errors.
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index c0c53451a16d..5b993924cc3f 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -171,7 +171,7 @@ static void mfd_fail_new(const char *name, unsigned int flags)
 	r = sys_memfd_create(name, flags);
 	if (r >= 0) {
 		printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
-		       name, flags);
+		       name ? name : "NULL", flags);
 		close(r);
 		abort();
 	}
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index 8f01f4da1c0d..121000c28c10 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -27,6 +27,7 @@ protection_keys_64
 madv_populate
 uffd-stress
 uffd-unit-tests
+uffd-wp-mremap
 mlock-intersect-test
 mlock-random-test
 virtual_address_range
@@ -36,6 +37,9 @@ map_fixed_noreplace
 write_to_hugetlbfs
 hmm-tests
 memfd_secret
+hugetlb_dio
+pkey_sighandler_tests_32
+pkey_sighandler_tests_64
 soft-dirty
 split_huge_page_test
 ksm_tests
@@ -49,7 +53,6 @@ va_high_addr_switch
 hugetlb_fault_after_madv
 hugetlb_madv_vs_map
 mseal_test
-seal_elf
 droppable
 hugetlb_dio
 pkey_sighandler_tests_32
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 3de23ea4663f..63ce39d024bb 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -33,9 +33,16 @@ endif
 # LDLIBS.
 MAKEFLAGS += --no-builtin-rules
 
-CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
+CFLAGS = -Wall -O2 -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
 LDLIBS = -lrt -lpthread -lm
 
+# Some distributions (such as Ubuntu) configure GCC so that _FORTIFY_SOURCE is
+# automatically enabled at -O1 or above. This triggers various unused-result
+# warnings where functions such as read() or write() are called and their
+# return value is not checked. Disable _FORTIFY_SOURCE to silence those
+# warnings.
+CFLAGS += -U_FORTIFY_SOURCE
+
 KDIR ?= /lib/modules/$(shell uname -r)/build
 ifneq (,$(wildcard $(KDIR)/Module.symvers))
 ifneq (,$(wildcard $(KDIR)/include/linux/page_frag_cache.h))
@@ -75,13 +82,13 @@ TEST_GEN_FILES += mrelease_test
 TEST_GEN_FILES += mremap_dontunmap
 TEST_GEN_FILES += mremap_test
 TEST_GEN_FILES += mseal_test
-TEST_GEN_FILES += seal_elf
 TEST_GEN_FILES += on-fault-limit
 TEST_GEN_FILES += pagemap_ioctl
 TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
 TEST_GEN_FILES += uffd-stress
 TEST_GEN_FILES += uffd-unit-tests
+TEST_GEN_FILES += uffd-wp-mremap
 TEST_GEN_FILES += split_huge_page_test
 TEST_GEN_FILES += ksm_tests
 TEST_GEN_FILES += ksm_functional_tests
@@ -152,11 +159,16 @@ $(TEST_GEN_FILES): vm_util.c thp_settings.c
 
 $(OUTPUT)/uffd-stress: uffd-common.c
 $(OUTPUT)/uffd-unit-tests: uffd-common.c
+$(OUTPUT)/uffd-wp-mremap: uffd-common.c
+$(OUTPUT)/protection_keys: pkey_util.c
+$(OUTPUT)/pkey_sighandler_tests: pkey_util.c
 
 ifeq ($(ARCH),x86_64)
 BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
 BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
 
+$(BINARIES_32) $(BINARIES_64): pkey_util.c
+
 define gen-target-rule-32
 $(1) $(1)_32: $(OUTPUT)/$(1)_32
 .PHONY: $(1) $(1)_32
diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config
index 4309916f629e..a28baa536332 100644
--- a/tools/testing/selftests/mm/config
+++ b/tools/testing/selftests/mm/config
@@ -7,3 +7,4 @@ CONFIG_TEST_HMM=m
 CONFIG_GUP_TEST=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_MEM_SOFT_DIRTY=y
+CONFIG_ANON_VMA_NAME=y
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index 1238e1c5aae1..9446673645eb 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -1482,7 +1482,7 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
 	}
 
 	smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
-	if (mem == MAP_FAILED) {
+	if (smem == MAP_FAILED) {
 		ksft_test_result_fail("mmap() failed\n");
 		goto munmap;
 	}
@@ -1583,7 +1583,7 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc)
 		goto close;
 	}
 	smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
-	if (mem == MAP_FAILED) {
+	if (smem == MAP_FAILED) {
 		ksft_test_result_fail("mmap() failed\n");
 		goto munmap;
 	}
@@ -1634,7 +1634,7 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
 		goto close;
 	}
 	smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
-	if (mem == MAP_FAILED) {
+	if (smem == MAP_FAILED) {
 		ksft_test_result_fail("mmap() failed\n");
 		goto munmap;
 	}
@@ -1684,7 +1684,7 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
 		goto close;
 	}
 	smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
-	if (mem == MAP_FAILED) {
+	if (smem == MAP_FAILED) {
 		ksft_test_result_fail("mmap() failed\n");
 		goto munmap;
 	}
@@ -1696,7 +1696,7 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
 	fn(mem, smem, hugetlbsize);
 munmap:
 	munmap(mem, hugetlbsize);
-	if (mem != MAP_FAILED)
+	if (smem != MAP_FAILED)
 		munmap(smem, hugetlbsize);
 close:
 	close(fd);
diff --git a/tools/testing/selftests/mm/guard-pages.c b/tools/testing/selftests/mm/guard-pages.c
index 7cdf815d0d63..525c50d3ec23 100644
--- a/tools/testing/selftests/mm/guard-pages.c
+++ b/tools/testing/selftests/mm/guard-pages.c
@@ -19,6 +19,8 @@
 #include <sys/uio.h>
 #include <unistd.h>
 
+#include "../pidfd/pidfd.h"
+
 /*
  * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1:
  *
@@ -50,9 +52,10 @@ static void handle_fatal(int c)
 	siglongjmp(signal_jmp_buf, c);
 }
 
-static int pidfd_open(pid_t pid, unsigned int flags)
+static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec,
+				   size_t n, int advice, unsigned int flags)
 {
-	return syscall(SYS_pidfd_open, pid, flags);
+	return syscall(__NR_process_madvise, pidfd, iovec, n, advice, flags);
 }
 
 /*
@@ -364,14 +367,10 @@ TEST_F(guard_pages, multi_vma)
 TEST_F(guard_pages, process_madvise)
 {
 	const unsigned long page_size = self->page_size;
-	pid_t pid = getpid();
-	int pidfd = pidfd_open(pid, 0);
 	char *ptr_region, *ptr1, *ptr2, *ptr3;
 	ssize_t count;
 	struct iovec vec[6];
 
-	ASSERT_NE(pidfd, -1);
-
 	/* Reserve region to map over. */
 	ptr_region = mmap(NULL, 100 * page_size, PROT_NONE,
 			  MAP_ANON | MAP_PRIVATE, -1, 0);
@@ -419,7 +418,7 @@ TEST_F(guard_pages, process_madvise)
 	ASSERT_EQ(munmap(&ptr_region[99 * page_size], page_size), 0);
 
 	/* Now guard in one step. */
-	count = process_madvise(pidfd, vec, 6, MADV_GUARD_INSTALL, 0);
+	count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_INSTALL, 0);
 
 	/* OK we don't have permission to do this, skip. */
 	if (count == -1 && errno == EPERM)
@@ -440,7 +439,7 @@ TEST_F(guard_pages, process_madvise)
 	ASSERT_FALSE(try_read_write_buf(&ptr3[19 * page_size]));
 
 	/* Now do the same with unguard... */
-	count = process_madvise(pidfd, vec, 6, MADV_GUARD_REMOVE, 0);
+	count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_REMOVE, 0);
 
 	/* ...and everything should now succeed. */
 
@@ -457,7 +456,6 @@ TEST_F(guard_pages, process_madvise)
 	ASSERT_EQ(munmap(ptr1, 10 * page_size), 0);
 	ASSERT_EQ(munmap(ptr2, 5 * page_size), 0);
 	ASSERT_EQ(munmap(ptr3, 20 * page_size), 0);
-	close(pidfd);
 }
 
 /* Assert that unmapping ranges does not leave guard markers behind. */
@@ -990,7 +988,7 @@ TEST_F(guard_pages, fork)
 		   MAP_ANON | MAP_PRIVATE, -1, 0);
 	ASSERT_NE(ptr, MAP_FAILED);
 
-	/* Establish guard apges in the first 5 pages. */
+	/* Establish guard pages in the first 5 pages. */
 	ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
 
 	pid = fork();
@@ -1030,6 +1028,77 @@ TEST_F(guard_pages, fork)
 }
 
 /*
+ * Assert expected behaviour after we fork populated ranges of anonymous memory
+ * and then guard and unguard the range.
+ */
+TEST_F(guard_pages, fork_cow)
+{
+	const unsigned long page_size = self->page_size;
+	char *ptr;
+	pid_t pid;
+	int i;
+
+	/* Map 10 pages. */
+	ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANON | MAP_PRIVATE, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+
+	/* Populate range. */
+	for (i = 0; i < 10 * page_size; i++) {
+		char chr = 'a' + (i % 26);
+
+		ptr[i] = chr;
+	}
+
+	pid = fork();
+	ASSERT_NE(pid, -1);
+	if (!pid) {
+		/* This is the child process now. */
+
+		/* Ensure the range is as expected. */
+		for (i = 0; i < 10 * page_size; i++) {
+			char expected = 'a' + (i % 26);
+			char actual = ptr[i];
+
+			ASSERT_EQ(actual, expected);
+		}
+
+		/* Establish guard pages across the whole range. */
+		ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+		/* Remove it. */
+		ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+		/*
+		 * By removing the guard pages, the page tables will be
+		 * cleared. Assert that we are looking at the zero page now.
+		 */
+		for (i = 0; i < 10 * page_size; i++) {
+			char actual = ptr[i];
+
+			ASSERT_EQ(actual, '\0');
+		}
+
+		exit(0);
+	}
+
+	/* Parent process. */
+
+	/* Parent simply waits on child. */
+	waitpid(pid, NULL, 0);
+
+	/* Ensure the range is unchanged in parent anon range. */
+	for (i = 0; i < 10 * page_size; i++) {
+		char expected = 'a' + (i % 26);
+		char actual = ptr[i];
+
+		ASSERT_EQ(actual, expected);
+	}
+
+	/* Cleanup. */
+	ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
  * Assert that forking a process with VMAs that do have VM_WIPEONFORK set
  * behave as expected.
  */
diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c
index ada9156cc497..c463d1c09c9b 100644
--- a/tools/testing/selftests/mm/hugepage-mremap.c
+++ b/tools/testing/selftests/mm/hugepage-mremap.c
@@ -15,7 +15,7 @@
 #define _GNU_SOURCE
 #include <stdlib.h>
 #include <stdio.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
 #include <sys/mman.h>
 #include <errno.h>
 #include <fcntl.h> /* Definition of O_* constants */
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index 66b4e111b5a2..b61803e36d1c 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -11,7 +11,7 @@
 #include <string.h>
 #include <stdbool.h>
 #include <stdint.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <sys/mman.h>
@@ -369,6 +369,7 @@ unmap:
 	munmap(map, size);
 }
 
+#ifdef __NR_userfaultfd
 static void test_unmerge_uffd_wp(void)
 {
 	struct uffdio_writeprotect uffd_writeprotect;
@@ -429,6 +430,7 @@ close_uffd:
 unmap:
 	munmap(map, size);
 }
+#endif
 
 /* Verify that KSM can be enabled / queried with prctl. */
 static void test_prctl(void)
@@ -684,7 +686,9 @@ int main(int argc, char **argv)
 		exit(test_child_ksm());
 	}
 
+#ifdef __NR_userfaultfd
 	tests++;
+#endif
 
 	ksft_print_header();
 	ksft_set_plan(tests);
@@ -696,7 +700,9 @@ int main(int argc, char **argv)
 	test_unmerge();
 	test_unmerge_zero_pages();
 	test_unmerge_discarded();
+#ifdef __NR_userfaultfd
 	test_unmerge_uffd_wp();
+#endif
 
 	test_prot_none();
 
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
index b748c48908d9..dcdd5bb20f3d 100644
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -776,7 +776,7 @@ err_out:
 
 int main(int argc, char *argv[])
 {
-	int ret, opt;
+	int ret = 0, opt;
 	int prot = 0;
 	int ksm_scan_limit_sec = KSM_SCAN_LIMIT_SEC_DEFAULT;
 	int merge_type = KSM_MERGE_TYPE_DEFAULT;
diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c
index 74c911aa3aea..9a0597310a76 100644
--- a/tools/testing/selftests/mm/memfd_secret.c
+++ b/tools/testing/selftests/mm/memfd_secret.c
@@ -17,7 +17,7 @@
 
 #include <stdlib.h>
 #include <string.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
 #include <errno.h>
 #include <stdio.h>
 #include <fcntl.h>
@@ -28,6 +28,8 @@
 #define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__)
 #define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__)
 
+#ifdef __NR_memfd_secret
+
 #define PATTERN	0x55
 
 static const int prot = PROT_READ | PROT_WRITE;
@@ -332,3 +334,13 @@ int main(int argc, char *argv[])
 
 	ksft_finished();
 }
+
+#else /* __NR_memfd_secret */
+
+int main(int argc, char *argv[])
+{
+	printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n");
+	return KSFT_SKIP;
+}
+
+#endif /* __NR_memfd_secret */
diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c
index 64bcbb7151cf..1e3a595fbf01 100644
--- a/tools/testing/selftests/mm/migration.c
+++ b/tools/testing/selftests/mm/migration.c
@@ -204,4 +204,103 @@ TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME)
 		ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
 }
 
+/*
+ * migration test with shared anon THP page
+ */
+
+TEST_F_TIMEOUT(migration, shared_anon_thp, 2*RUNTIME)
+{
+	pid_t pid;
+	uint64_t *ptr;
+	int i;
+
+	if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+		SKIP(return, "Not enough threads or NUMA nodes available");
+
+	ptr = mmap(NULL, 2 * TWOMEG, PROT_READ | PROT_WRITE,
+		MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+
+	ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG);
+	ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0);
+
+	memset(ptr, 0xde, TWOMEG);
+	for (i = 0; i < self->nthreads - 1; i++) {
+		pid = fork();
+		if (!pid) {
+			prctl(PR_SET_PDEATHSIG, SIGHUP);
+			/* Parent may have died before prctl so check now. */
+			if (getppid() == 1)
+				kill(getpid(), SIGHUP);
+			access_mem(ptr);
+		} else {
+			self->pids[i] = pid;
+		}
+	}
+
+	ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+	for (i = 0; i < self->nthreads - 1; i++)
+		ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+}
+
+/*
+ * migration test with private anon hugetlb page
+ */
+TEST_F_TIMEOUT(migration, private_anon_htlb, 2*RUNTIME)
+{
+	uint64_t *ptr;
+	int i;
+
+	if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+		SKIP(return, "Not enough threads or NUMA nodes available");
+
+	ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+		MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+
+	memset(ptr, 0xde, TWOMEG);
+	for (i = 0; i < self->nthreads - 1; i++)
+		if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+			perror("Couldn't create thread");
+
+	ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+	for (i = 0; i < self->nthreads - 1; i++)
+		ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+}
+
+/*
+ * migration test with shared anon hugetlb page
+ */
+TEST_F_TIMEOUT(migration, shared_anon_htlb, 2*RUNTIME)
+{
+	pid_t pid;
+	uint64_t *ptr;
+	int i;
+
+	if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+		SKIP(return, "Not enough threads or NUMA nodes available");
+
+	ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+		MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+	ASSERT_NE(ptr, MAP_FAILED);
+
+	memset(ptr, 0xde, TWOMEG);
+	for (i = 0; i < self->nthreads - 1; i++) {
+		pid = fork();
+		if (!pid) {
+			prctl(PR_SET_PDEATHSIG, SIGHUP);
+			/* Parent may have died before prctl so check now. */
+			if (getppid() == 1)
+				kill(getpid(), SIGHUP);
+			access_mem(ptr);
+		} else {
+			self->pids[i] = pid;
+		}
+	}
+
+	ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+	for (i = 0; i < self->nthreads - 1; i++)
+		ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+}
+
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c
index 1db134063c38..09feeb453646 100644
--- a/tools/testing/selftests/mm/mkdirty.c
+++ b/tools/testing/selftests/mm/mkdirty.c
@@ -9,7 +9,7 @@
  */
 #include <fcntl.h>
 #include <signal.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
 #include <string.h>
 #include <errno.h>
 #include <stdlib.h>
@@ -265,6 +265,7 @@ munmap:
 	munmap(mmap_mem, mmap_size);
 }
 
+#ifdef __NR_userfaultfd
 static void test_uffdio_copy(void)
 {
 	struct uffdio_register uffdio_register;
@@ -280,6 +281,7 @@ static void test_uffdio_copy(void)
 	dst = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0);
 	if (dst == MAP_FAILED) {
 		ksft_test_result_fail("mmap() failed\n");
+		free(src);
 		return;
 	}
 
@@ -321,6 +323,7 @@ munmap:
 	munmap(dst, pagesize);
 	free(src);
 }
+#endif /* __NR_userfaultfd */
 
 int main(void)
 {
@@ -333,7 +336,9 @@ int main(void)
 			       thpsize / 1024);
 		tests += 3;
 	}
+#ifdef __NR_userfaultfd
 	tests += 1;
+#endif /* __NR_userfaultfd */
 
 	ksft_print_header();
 	ksft_set_plan(tests);
@@ -363,7 +368,9 @@ int main(void)
 	if (thpsize)
 		test_pte_mapped_thp();
 	/* Placing a fresh page via userfaultfd may set the PTE dirty. */
+#ifdef __NR_userfaultfd
 	test_uffdio_copy();
+#endif /* __NR_userfaultfd */
 
 	err = ksft_get_fail_cnt();
 	if (err)
diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h
index 1e5731bab499..4417eaa5cfb7 100644
--- a/tools/testing/selftests/mm/mlock2.h
+++ b/tools/testing/selftests/mm/mlock2.h
@@ -3,7 +3,6 @@
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <asm-generic/unistd.h>
 
 static int mlock2_(void *start, size_t len, int flags)
 {
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index 5a3a9bcba640..bb84476a177f 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -34,7 +34,7 @@ struct config {
 	unsigned long long dest_alignment;
 	unsigned long long region_size;
 	int overlapping;
-	int dest_preamble_size;
+	unsigned int dest_preamble_size;
 };
 
 struct test {
@@ -328,7 +328,7 @@ static void mremap_move_within_range(unsigned int pattern_seed, char *rand_addr)
 {
 	char *test_name = "mremap mremap move within range";
 	void *src, *dest;
-	int i, success = 1;
+	unsigned int i, success = 1;
 
 	size_t size = SIZE_MB(20);
 	void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
@@ -384,7 +384,7 @@ out:
 static long long remap_region(struct config c, unsigned int threshold_mb,
 			      char *rand_addr)
 {
-	void *addr, *src_addr, *dest_addr, *dest_preamble_addr;
+	void *addr, *src_addr, *dest_addr, *dest_preamble_addr = NULL;
 	unsigned long long t, d;
 	struct timespec t_start = {0, 0}, t_end = {0, 0};
 	long long  start_ns, end_ns, align_mask, ret, offset;
@@ -569,7 +569,7 @@ static void mremap_move_1mb_from_start(unsigned int pattern_seed,
 {
 	char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src";
 	void *src = NULL, *dest = NULL;
-	int i, success = 1;
+	unsigned int i, success = 1;
 
 	/* Config to reuse get_source_mapping() to do an aligned mmap. */
 	struct config c = {
@@ -636,7 +636,7 @@ out:
 
 static void run_mremap_test_case(struct test test_case, int *failures,
 				 unsigned int threshold_mb,
-				 unsigned int pattern_seed, char *rand_addr)
+				 char *rand_addr)
 {
 	long long remap_time = remap_region(test_case.config, threshold_mb,
 					    rand_addr);
@@ -708,7 +708,8 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
 int main(int argc, char **argv)
 {
 	int failures = 0;
-	int i, run_perf_tests;
+	unsigned int i;
+	int run_perf_tests;
 	unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
 
 	/* hard-coded test configs */
@@ -831,7 +832,7 @@ int main(int argc, char **argv)
 
 	for (i = 0; i < ARRAY_SIZE(test_cases); i++)
 		run_mremap_test_case(test_cases[i], &failures, threshold_mb,
-				     pattern_seed, rand_addr);
+				     rand_addr);
 
 	maps_fp = fopen("/proc/self/maps", "r");
 
@@ -853,7 +854,7 @@ int main(int argc, char **argv)
 		 "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
 		for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++)
 			run_mremap_test_case(perf_test_cases[i], &failures,
-					     threshold_mb, pattern_seed,
+					     threshold_mb,
 					     rand_addr);
 	}
 
diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c
index 01675c412b2a..005f29c86484 100644
--- a/tools/testing/selftests/mm/mseal_test.c
+++ b/tools/testing/selftests/mm/mseal_test.c
@@ -218,7 +218,7 @@ bool seal_support(void)
 bool pkey_supported(void)
 {
 #if defined(__i386__) || defined(__x86_64__) /* arch */
-	int pkey = sys_pkey_alloc(0, 0);
+	int pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
 
 	if (pkey > 0)
 		return true;
@@ -802,7 +802,7 @@ static void test_seal_mprotect_partial_mprotect_tail(bool seal)
 }
 
 
-static void test_seal_mprotect_two_vma_with_gap(bool seal)
+static void test_seal_mprotect_two_vma_with_gap(void)
 {
 	void *ptr;
 	unsigned long page_size = getpagesize();
@@ -1671,7 +1671,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal)
 	setup_single_address_rw(size, &ptr);
 	FAIL_TEST_IF_FALSE(ptr != (void *)-1);
 
-	pkey = sys_pkey_alloc(0, 0);
+	pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
 	FAIL_TEST_IF_FALSE(pkey > 0);
 
 	ret = sys_mprotect_pkey((void *)ptr, size, PROT_READ | PROT_WRITE, pkey);
@@ -1683,7 +1683,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal)
 	}
 
 	/* sealing doesn't take effect if PKRU allow write. */
-	set_pkey(pkey, 0);
+	set_pkey(pkey, PKEY_UNRESTRICTED);
 	ret = sys_madvise(ptr, size, MADV_DONTNEED);
 	FAIL_TEST_IF_FALSE(!ret);
 
@@ -1864,7 +1864,7 @@ static void test_seal_madvise_nodiscard(bool seal)
 	REPORT_TEST_PASS();
 }
 
-int main(int argc, char **argv)
+int main(void)
 {
 	bool test_seal = seal_support();
 
@@ -1913,8 +1913,8 @@ int main(int argc, char **argv)
 	test_seal_mprotect_partial_mprotect(false);
 	test_seal_mprotect_partial_mprotect(true);
 
-	test_seal_mprotect_two_vma_with_gap(false);
-	test_seal_mprotect_two_vma_with_gap(true);
+	test_seal_mprotect_two_vma_with_gap();
+	test_seal_mprotect_two_vma_with_gap();
 
 	test_seal_mprotect_merge(false);
 	test_seal_mprotect_merge(true);
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index bcc73b4e805c..57b4bba2b45f 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -34,8 +34,8 @@
 #define PAGEMAP "/proc/self/pagemap"
 int pagemap_fd;
 int uffd;
-int page_size;
-int hpage_size;
+unsigned int page_size;
+unsigned int hpage_size;
 const char *progname;
 
 #define LEN(region)	((region.end - region.start)/page_size)
@@ -235,7 +235,9 @@ int get_reads(struct page_region *vec, int vec_size)
 
 int sanity_tests_sd(void)
 {
-	int mem_size, vec_size, ret, ret2, ret3, i, num_pages = 1000, total_pages = 0;
+	unsigned long long mem_size, vec_size, i, total_pages = 0;
+	long ret, ret2, ret3;
+	int num_pages = 1000;
 	int total_writes, total_reads, reads, count;
 	struct page_region *vec, *vec2;
 	char *mem, *m[2];
@@ -321,9 +323,9 @@ int sanity_tests_sd(void)
 	ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0,
 			    0, PAGE_IS_WRITTEN);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 
-	ksft_test_result(ret == mem_size/(page_size * 2),
+	ksft_test_result((unsigned long long)ret == mem_size/(page_size * 2),
 			 "%s Repeated pattern of written and non-written pages\n", __func__);
 
 	/* 4. Repeated pattern of written and non-written pages in parts */
@@ -331,21 +333,21 @@ int sanity_tests_sd(void)
 			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
 			    num_pages/2 - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 
 	ret2 = pagemap_ioctl(mem, mem_size, vec, 2, 0, 0, PAGE_IS_WRITTEN, 0, 0,
 			     PAGE_IS_WRITTEN);
 	if (ret2 < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno));
 
 	ret3 = pagemap_ioctl(mem, mem_size, vec, vec_size,
 			     PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
 			     0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
 	if (ret3 < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret3, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret3, errno, strerror(errno));
 
 	ksft_test_result((ret + ret3) == num_pages/2 && ret2 == 2,
-			 "%s Repeated pattern of written and non-written pages in parts %d %d %d\n",
+			 "%s Repeated pattern of written and non-written pages in parts %ld %ld %ld\n",
 			 __func__, ret, ret3, ret2);
 
 	/* 5. Repeated pattern of written and non-written pages max_pages */
@@ -357,13 +359,13 @@ int sanity_tests_sd(void)
 			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
 			    num_pages/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 
 	ret2 = pagemap_ioctl(mem, mem_size, vec, vec_size,
 			     PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
 			     0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
 	if (ret2 < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno));
 
 	ksft_test_result(ret == num_pages/2 && ret2 == 1,
 			 "%s Repeated pattern of written and non-written pages max_pages\n",
@@ -378,12 +380,12 @@ int sanity_tests_sd(void)
 			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
 			    2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 
 	ret2 = pagemap_ioctl(mem, mem_size, vec2, vec_size, 0, 0,
 			      PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
 	if (ret2 < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno));
 
 	ksft_test_result(ret == 1 && LEN(vec[0]) == 2 &&
 			 vec[0].start == (uintptr_t)(mem + page_size) &&
@@ -416,7 +418,7 @@ int sanity_tests_sd(void)
 	ret = pagemap_ioctl(m[1], mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0,
 			    PAGE_IS_WRITTEN);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 
 	ksft_test_result(ret == 1 && LEN(vec[0]) == mem_size/page_size,
 			 "%s Two regions\n", __func__);
@@ -448,7 +450,7 @@ int sanity_tests_sd(void)
 			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
 			    PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 
 	for (i = 0; i < mem_size/page_size; i += 2)
 		mem[i * page_size]++;
@@ -457,7 +459,7 @@ int sanity_tests_sd(void)
 			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
 			    mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 
 	total_pages += ret;
 
@@ -465,7 +467,7 @@ int sanity_tests_sd(void)
 			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
 			    mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 
 	total_pages += ret;
 
@@ -473,7 +475,7 @@ int sanity_tests_sd(void)
 			    PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
 			    mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 
 	total_pages += ret;
 
@@ -515,9 +517,9 @@ int sanity_tests_sd(void)
 					  vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
 					  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 			if (ret < 0)
-				ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+				ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 
-			if (ret > vec_size)
+			if ((unsigned long)ret > vec_size)
 				break;
 
 			reads = get_reads(vec, ret);
@@ -554,63 +556,63 @@ int sanity_tests_sd(void)
 	ret = pagemap_ioc(mem, 0, vec, vec_size, 0,
 			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 	ksft_test_result(ret == 0 && walk_end == (long)mem,
 			 "Walk_end: Same start and end address\n");
 
 	ret = pagemap_ioc(mem, 0, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
 			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 	ksft_test_result(ret == 0 && walk_end == (long)mem,
 			 "Walk_end: Same start and end with WP\n");
 
 	ret = pagemap_ioc(mem, 0, vec, 0, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
 			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 	ksft_test_result(ret == 0 && walk_end == (long)mem,
 			 "Walk_end: Same start and end with 0 output buffer\n");
 
 	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
 			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 	ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
 			 "Walk_end: Big vec\n");
 
 	ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
 			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 	ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
 			 "Walk_end: vec of minimum length\n");
 
 	ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
 			  vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 	ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
 			 "Walk_end: Max pages specified\n");
 
 	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
 			  vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 	ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size/2),
 			 "Walk_end: Half max pages\n");
 
 	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
 			  1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 	ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size),
 			 "Walk_end: 1 max page\n");
 
 	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
 			  -1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 	ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
 			 "Walk_end: max pages\n");
 
@@ -621,49 +623,49 @@ int sanity_tests_sd(void)
 	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
 			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
-	ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size),
 			 "Walk_end sparse: Big vec\n");
 
 	ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
 			  0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 	ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
 			 "Walk_end sparse: vec of minimum length\n");
 
 	ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
 			  vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 	ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
 			 "Walk_end sparse: Max pages specified\n");
 
 	ret = pagemap_ioc(mem, mem_size, vec, vec_size/2, 0,
 			  vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
-	ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size),
 			 "Walk_end sparse: Max pages specified\n");
 
 	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
 			  vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
-	ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size),
 			 "Walk_end sparse: Max pages specified\n");
 
 	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
 			  vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
-	ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
+	ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size),
 			 "Walk_endsparse : Half max pages\n");
 
 	ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
 			  1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
 	if (ret < 0)
-		ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+		ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno));
 	ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
 			 "Walk_end: 1 max page\n");
 
@@ -674,9 +676,10 @@ int sanity_tests_sd(void)
 	return 0;
 }
 
-int base_tests(char *prefix, char *mem, int mem_size, int skip)
+int base_tests(char *prefix, char *mem, unsigned long long mem_size, int skip)
 {
-	int vec_size, written;
+	unsigned long long vec_size;
+	int written;
 	struct page_region *vec, *vec2;
 
 	if (skip) {
@@ -799,8 +802,8 @@ int hpage_unit_tests(void)
 	char *map;
 	int ret, ret2;
 	size_t num_pages = 10;
-	int map_size = hpage_size * num_pages;
-	int vec_size = map_size/page_size;
+	unsigned long long map_size = hpage_size * num_pages;
+	unsigned long long vec_size = map_size/page_size;
 	struct page_region *vec, *vec2;
 
 	vec = malloc(sizeof(struct page_region) * vec_size);
@@ -1047,7 +1050,8 @@ static void test_simple(void)
 
 int sanity_tests(void)
 {
-	int mem_size, vec_size, ret, fd, i, buf_size;
+	unsigned long long mem_size, vec_size;
+	int ret, fd, i, buf_size;
 	struct page_region *vec;
 	char *mem, *fmem;
 	struct stat sbuf;
@@ -1312,7 +1316,9 @@ static ssize_t get_dirty_pages_reset(char *mem, unsigned int count,
 {
 	struct pm_scan_arg arg = {0};
 	struct page_region rgns[256];
-	int i, j, cnt, ret;
+	unsigned long long i, j;
+	long ret;
+	int cnt;
 
 	arg.size = sizeof(struct pm_scan_arg);
 	arg.start = (uintptr_t)mem;
@@ -1330,7 +1336,7 @@ static ssize_t get_dirty_pages_reset(char *mem, unsigned int count,
 		ksft_exit_fail_msg("ioctl failed\n");
 
 	cnt = 0;
-	for (i = 0; i < ret; ++i) {
+	for (i = 0; i < (unsigned long)ret; ++i) {
 		if (rgns[i].categories != PAGE_IS_WRITTEN)
 			ksft_exit_fail_msg("wrong flags\n");
 
@@ -1384,9 +1390,10 @@ void *thread_proc(void *mem)
 static void transact_test(int page_size)
 {
 	unsigned int i, count, extra_pages;
+	unsigned int c;
 	pthread_t th;
 	char *mem;
-	int ret, c;
+	int ret;
 
 	if (pthread_barrier_init(&start_barrier, NULL, nthreads + 1))
 		ksft_exit_fail_msg("pthread_barrier_init\n");
@@ -1405,9 +1412,9 @@ static void transact_test(int page_size)
 	memset(mem, 0, 0x1000 * nthreads * pages_per_thread);
 
 	count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
-	ksft_test_result(count > 0, "%s count %d\n", __func__, count);
+	ksft_test_result(count > 0, "%s count %u\n", __func__, count);
 	count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
-	ksft_test_result(count == 0, "%s count %d\n", __func__, count);
+	ksft_test_result(count == 0, "%s count %u\n", __func__, count);
 
 	finish = 0;
 	for (i = 0; i < nthreads; ++i)
@@ -1429,7 +1436,7 @@ static void transact_test(int page_size)
 			ksft_exit_fail_msg("pthread_barrier_wait\n");
 
 		if (count > nthreads * access_per_thread)
-			ksft_exit_fail_msg("Too big count %d expected %d, iter %d\n",
+			ksft_exit_fail_msg("Too big count %u expected %u, iter %u\n",
 					   count, nthreads * access_per_thread, i);
 
 		c = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
@@ -1454,7 +1461,7 @@ static void transact_test(int page_size)
 			 * access and application gets page fault again for the same write.
 			 */
 			if (count < nthreads * access_per_thread) {
-				ksft_test_result_fail("Lost update, iter %d, %d vs %d.\n", i, count,
+				ksft_test_result_fail("Lost update, iter %u, %u vs %u.\n", i, count,
 						      nthreads * access_per_thread);
 				return;
 			}
@@ -1467,15 +1474,16 @@ static void transact_test(int page_size)
 	finish = 1;
 	pthread_barrier_wait(&end_barrier);
 
-	ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %d.\n", __func__,
+	ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %u.\n", __func__,
 			      extra_pages,
 			      100.0 * extra_pages / (iter_count * nthreads * access_per_thread),
 			      extra_thread_faults);
 }
 
-int main(int argc, char *argv[])
+int main(int __attribute__((unused)) argc, char *argv[])
 {
-	int mem_size, shmid, buf_size, fd, i, ret;
+	int shmid, buf_size, fd, i, ret;
+	unsigned long long mem_size;
 	char *mem, *map, *fmem;
 	struct stat sbuf;
 
diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h
index d9d2100eafc0..8e9685e03c44 100644
--- a/tools/testing/selftests/mm/pkey-arm64.h
+++ b/tools/testing/selftests/mm/pkey-arm64.h
@@ -30,7 +30,7 @@
 #define NR_PKEYS		8
 #define NR_RESERVED_PKEYS	1 /* pkey-0 */
 
-#define PKEY_ALLOW_ALL		0x77777777
+#define PKEY_REG_ALLOW_ALL	0x77777777
 #define PKEY_REG_ALLOW_NONE	0x0
 
 #define PKEY_BITS_PER_PKEY	4
@@ -81,11 +81,11 @@ static inline int get_arch_reserved_keys(void)
 	return NR_RESERVED_PKEYS;
 }
 
-void expect_fault_on_read_execonly_key(void *p1, int pkey)
+static inline void expect_fault_on_read_execonly_key(void *p1, int pkey)
 {
 }
 
-void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
 {
 	return PTR_ERR_ENOTSUP;
 }
diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index f7cfe163b0ff..ea404f80e6cb 100644
--- a/tools/testing/selftests/mm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -13,22 +13,23 @@
 #include <ucontext.h>
 #include <sys/mman.h>
 
+#include <linux/mman.h>
+#include <linux/types.h>
+
 #include "../kselftest.h"
 
 /* Define some kernel-like types */
-#define  u8 __u8
-#define u16 __u16
-#define u32 __u32
-#define u64 __u64
+typedef __u8	u8;
+typedef __u16	u16;
+typedef __u32	u32;
+typedef __u64	u64;
 
 #define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
 
 #ifndef DEBUG_LEVEL
 #define DEBUG_LEVEL 0
 #endif
-#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
 extern int dprint_in_signal;
-extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
 
 extern int test_nr;
 extern int iteration_nr;
@@ -83,17 +84,18 @@ extern void abort_hooks(void);
 #ifndef noinline
 # define noinline __attribute__((noinline))
 #endif
+#ifndef __maybe_unused
+# define __maybe_unused __attribute__((__unused__))
+#endif
 
-noinline int read_ptr(int *ptr)
-{
-	/* Keep GCC from optimizing this away somehow */
-	barrier();
-	return *ptr;
-}
-
-void expected_pkey_fault(int pkey);
 int sys_pkey_alloc(unsigned long flags, unsigned long init_val);
 int sys_pkey_free(unsigned long pkey);
+int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+		unsigned long pkey);
+
+/* For functions called from protection_keys.c only */
+noinline int read_ptr(int *ptr);
+void expected_pkey_fault(int pkey);
 int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
 		unsigned long pkey);
 void record_pkey_malloc(void *ptr, long size, int prot);
@@ -171,38 +173,6 @@ static inline void write_pkey_reg(u64 pkey_reg)
 			pkey_reg, __read_pkey_reg());
 }
 
-/*
- * These are technically racy. since something could
- * change PKEY register between the read and the write.
- */
-static inline void __pkey_access_allow(int pkey, int do_allow)
-{
-	u64 pkey_reg = read_pkey_reg();
-	int bit = pkey * 2;
-
-	if (do_allow)
-		pkey_reg &= (1<<bit);
-	else
-		pkey_reg |= (1<<bit);
-
-	dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
-	write_pkey_reg(pkey_reg);
-}
-
-static inline void __pkey_write_allow(int pkey, int do_allow_write)
-{
-	u64 pkey_reg = read_pkey_reg();
-	int bit = pkey * 2 + 1;
-
-	if (do_allow_write)
-		pkey_reg &= (1<<bit);
-	else
-		pkey_reg |= (1<<bit);
-
-	write_pkey_reg(pkey_reg);
-	dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
-}
-
 #define ALIGN_UP(x, align_to)	(((x) + ((align_to)-1)) & ~((align_to)-1))
 #define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
 #define ALIGN_PTR_UP(p, ptr_align_to)	\
@@ -224,7 +194,7 @@ static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si)
 static inline int kernel_has_pkeys(void)
 {
 	/* try allocating a key and see if it succeeds */
-	int ret = sys_pkey_alloc(0, 0);
+	int ret = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
 	if (ret <= 0) {
 		return 0;
 	}
diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h
index 3d0c0bdae5bc..1bad310d282a 100644
--- a/tools/testing/selftests/mm/pkey-powerpc.h
+++ b/tools/testing/selftests/mm/pkey-powerpc.h
@@ -91,7 +91,7 @@ static inline int get_arch_reserved_keys(void)
 			return NR_RESERVED_PKEYS_64K_3KEYS;
 }
 
-void expect_fault_on_read_execonly_key(void *p1, int pkey)
+static inline void expect_fault_on_read_execonly_key(void *p1, int pkey)
 {
 	/*
 	 * powerpc does not allow userspace to change permissions of exec-only
@@ -105,7 +105,7 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey)
 /* 4-byte instructions * 16384 = 64K page */
 #define __page_o_noops() asm(".rept 16384 ; nop; .endr")
 
-void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
 {
 	void *ptr;
 	int ret;
diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h
index ac91777c8917..f7ecd335df1e 100644
--- a/tools/testing/selftests/mm/pkey-x86.h
+++ b/tools/testing/selftests/mm/pkey-x86.h
@@ -113,7 +113,7 @@ static inline u32 pkey_bit_position(int pkey)
 #define XSTATE_PKEY	0x200
 #define XSTATE_BV_OFFSET	512
 
-int pkey_reg_xstate_offset(void)
+static inline int pkey_reg_xstate_offset(void)
 {
 	unsigned int eax;
 	unsigned int ebx;
@@ -148,7 +148,7 @@ static inline int get_arch_reserved_keys(void)
 	return NR_RESERVED_PKEYS;
 }
 
-void expect_fault_on_read_execonly_key(void *p1, int pkey)
+static inline void expect_fault_on_read_execonly_key(void *p1, int pkey)
 {
 	int ptr_contents;
 
@@ -157,7 +157,7 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey)
 	expected_pkey_fault(pkey);
 }
 
-void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
 {
 	return PTR_ERR_ENOTSUP;
 }
diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c
index c593a426341c..b5e076a564c9 100644
--- a/tools/testing/selftests/mm/pkey_sighandler_tests.c
+++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c
@@ -32,11 +32,9 @@
 
 #define STACK_SIZE PTHREAD_STACK_MIN
 
-void expected_pkey_fault(int pkey) {}
-
-pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
-pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
-siginfo_t siginfo = {0};
+static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+static siginfo_t siginfo = {0};
 
 /*
  * We need to use inline assembly instead of glibc's syscall because glibc's
@@ -163,7 +161,7 @@ static void *thread_segv_with_pkey0_disabled(void *ptr)
 	__write_pkey_reg(pkey_reg_restrictive_default());
 
 	/* Segfault (with SEGV_MAPERR) */
-	*(int *) (0x1) = 1;
+	*(volatile int *)NULL = 1;
 	return NULL;
 }
 
@@ -179,7 +177,6 @@ static void *thread_segv_pkuerr_stack(void *ptr)
 static void *thread_segv_maperr_ptr(void *ptr)
 {
 	stack_t *stack = ptr;
-	int *bad = (int *)1;
 	u64 pkey_reg;
 
 	/*
@@ -195,7 +192,7 @@ static void *thread_segv_maperr_ptr(void *ptr)
 	__write_pkey_reg(pkey_reg);
 
 	/* Segfault */
-	*bad = 1;
+	*(volatile int *)NULL = 1;
 	syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0);
 	return NULL;
 }
@@ -234,7 +231,7 @@ static void test_sigsegv_handler_with_pkey0_disabled(void)
 
 	ksft_test_result(siginfo.si_signo == SIGSEGV &&
 			 siginfo.si_code == SEGV_MAPERR &&
-			 siginfo.si_addr == (void *)1,
+			 siginfo.si_addr == NULL,
 			 "%s\n", __func__);
 }
 
@@ -314,11 +311,11 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void)
 	__write_pkey_reg(pkey_reg);
 
 	/* Protect the new stack with MPK 1 */
-	pkey = pkey_alloc(0, 0);
-	pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
+	pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
+	sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
 
 	/* Set up alternate signal stack that will use the default MPK */
-	sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
+	sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE,
 			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 	sigstack.ss_flags = 0;
 	sigstack.ss_size = STACK_SIZE;
@@ -349,7 +346,7 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void)
 
 	ksft_test_result(siginfo.si_signo == SIGSEGV &&
 			 siginfo.si_code == SEGV_MAPERR &&
-			 siginfo.si_addr == (void *)1,
+			 siginfo.si_addr == NULL,
 			 "%s\n", __func__);
 }
 
@@ -487,11 +484,11 @@ static void test_pkru_sigreturn(void)
 	__write_pkey_reg(pkey_reg);
 
 	/* Protect the stack with MPK 2 */
-	pkey = pkey_alloc(0, 0);
-	pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
+	pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
+	sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey);
 
 	/* Set up alternate signal stack that will use the default MPK */
-	sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
+	sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE,
 			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 	sigstack.ss_flags = 0;
 	sigstack.ss_size = STACK_SIZE;
@@ -538,6 +535,9 @@ int main(int argc, char *argv[])
 	ksft_print_header();
 	ksft_set_plan(ARRAY_SIZE(pkey_tests));
 
+	if (!is_pkeys_supported())
+		ksft_exit_skip("pkeys not supported\n");
+
 	for (i = 0; i < ARRAY_SIZE(pkey_tests); i++)
 		(*pkey_tests[i])();
 
diff --git a/tools/testing/selftests/mm/pkey_util.c b/tools/testing/selftests/mm/pkey_util.c
new file mode 100644
index 000000000000..ca4ad0d44ab2
--- /dev/null
+++ b/tools/testing/selftests/mm/pkey_util.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "pkey-helpers.h"
+
+int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+{
+	int ret = syscall(SYS_pkey_alloc, flags, init_val);
+	dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
+			__func__, flags, init_val, ret, errno);
+	return ret;
+}
+
+int sys_pkey_free(unsigned long pkey)
+{
+	int ret = syscall(SYS_pkey_free, pkey);
+	dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
+	return ret;
+}
+
+int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+		unsigned long pkey)
+{
+	int sret;
+
+	dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
+			ptr, size, orig_prot, pkey);
+
+	errno = 0;
+	sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey);
+	if (errno) {
+		dprintf2("SYS_mprotect_key sret: %d\n", sret);
+		dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
+		dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
+		if (DEBUG_LEVEL >= 2)
+			perror("SYS_mprotect_pkey");
+	}
+	return sret;
+}
diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index 4990f7ab4cb7..23ebec367015 100644
--- a/tools/testing/selftests/mm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -42,7 +42,7 @@
 #include <sys/wait.h>
 #include <sys/stat.h>
 #include <fcntl.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
 #include <sys/ptrace.h>
 #include <setjmp.h>
 
@@ -53,9 +53,15 @@ int test_nr;
 
 u64 shadow_pkey_reg;
 int dprint_in_signal;
-char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
 
-void cat_into_file(char *str, char *file)
+noinline int read_ptr(int *ptr)
+{
+	/* Keep GCC from optimizing this away somehow */
+	barrier();
+	return *ptr;
+}
+
+static void cat_into_file(char *str, char *file)
 {
 	int fd = open(file, O_RDWR);
 	int ret;
@@ -82,7 +88,7 @@ void cat_into_file(char *str, char *file)
 
 #if CONTROL_TRACING > 0
 static int warned_tracing;
-int tracing_root_ok(void)
+static int tracing_root_ok(void)
 {
 	if (geteuid() != 0) {
 		if (!warned_tracing)
@@ -95,7 +101,7 @@ int tracing_root_ok(void)
 }
 #endif
 
-void tracing_on(void)
+static void tracing_on(void)
 {
 #if CONTROL_TRACING > 0
 #define TRACEDIR "/sys/kernel/tracing"
@@ -119,7 +125,7 @@ void tracing_on(void)
 #endif
 }
 
-void tracing_off(void)
+static void tracing_off(void)
 {
 #if CONTROL_TRACING > 0
 	if (!tracing_root_ok())
@@ -153,7 +159,7 @@ __attribute__((__aligned__(65536)))
 #else
 __attribute__((__aligned__(PAGE_SIZE)))
 #endif
-void lots_o_noops_around_write(int *write_to_me)
+static void lots_o_noops_around_write(int *write_to_me)
 {
 	dprintf3("running %s()\n", __func__);
 	__page_o_noops();
@@ -164,7 +170,7 @@ void lots_o_noops_around_write(int *write_to_me)
 	dprintf3("%s() done\n", __func__);
 }
 
-void dump_mem(void *dumpme, int len_bytes)
+static void dump_mem(void *dumpme, int len_bytes)
 {
 	char *c = (void *)dumpme;
 	int i;
@@ -207,7 +213,7 @@ static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
 	return 0;
 }
 
-void pkey_disable_set(int pkey, int flags)
+static void pkey_disable_set(int pkey, int flags)
 {
 	unsigned long syscall_flags = 0;
 	int ret;
@@ -245,7 +251,7 @@ void pkey_disable_set(int pkey, int flags)
 		pkey, flags);
 }
 
-void pkey_disable_clear(int pkey, int flags)
+static void pkey_disable_clear(int pkey, int flags)
 {
 	unsigned long syscall_flags = 0;
 	int ret;
@@ -271,19 +277,19 @@ void pkey_disable_clear(int pkey, int flags)
 			pkey, read_pkey_reg());
 }
 
-void pkey_write_allow(int pkey)
+__maybe_unused static void pkey_write_allow(int pkey)
 {
 	pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
 }
-void pkey_write_deny(int pkey)
+__maybe_unused static void pkey_write_deny(int pkey)
 {
 	pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
 }
-void pkey_access_allow(int pkey)
+__maybe_unused static void pkey_access_allow(int pkey)
 {
 	pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
 }
-void pkey_access_deny(int pkey)
+__maybe_unused static void pkey_access_deny(int pkey)
 {
 	pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
 }
@@ -301,9 +307,9 @@ static char *si_code_str(int si_code)
 	return "UNKNOWN";
 }
 
-int pkey_faults;
-int last_si_pkey = -1;
-void signal_handler(int signum, siginfo_t *si, void *vucontext)
+static int pkey_faults;
+static int last_si_pkey = -1;
+static void signal_handler(int signum, siginfo_t *si, void *vucontext)
 {
 	ucontext_t *uctxt = vucontext;
 	int trapno;
@@ -390,27 +396,21 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
 	/* restore access and let the faulting instruction continue */
 	pkey_access_allow(siginfo_pkey);
 #elif defined(__aarch64__)
-	aarch64_write_signal_pkey(uctxt, PKEY_ALLOW_ALL);
+	aarch64_write_signal_pkey(uctxt, PKEY_REG_ALLOW_ALL);
 #endif /* arch */
 	pkey_faults++;
 	dprintf1("<<<<==================================================\n");
 	dprint_in_signal = 0;
 }
 
-int wait_all_children(void)
-{
-	int status;
-	return waitpid(-1, &status, 0);
-}
-
-void sig_chld(int x)
+static void sig_chld(int x)
 {
 	dprint_in_signal = 1;
 	dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
 	dprint_in_signal = 0;
 }
 
-void setup_sigsegv_handler(void)
+static void setup_sigsegv_handler(void)
 {
 	int r, rs;
 	struct sigaction newact;
@@ -436,13 +436,13 @@ void setup_sigsegv_handler(void)
 	pkey_assert(r == 0);
 }
 
-void setup_handlers(void)
+static void setup_handlers(void)
 {
 	signal(SIGCHLD, &sig_chld);
 	setup_sigsegv_handler();
 }
 
-pid_t fork_lazy_child(void)
+static pid_t fork_lazy_child(void)
 {
 	pid_t forkret;
 
@@ -460,38 +460,10 @@ pid_t fork_lazy_child(void)
 	return forkret;
 }
 
-int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
-		unsigned long pkey)
-{
-	int sret;
-
-	dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
-			ptr, size, orig_prot, pkey);
-
-	errno = 0;
-	sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey);
-	if (errno) {
-		dprintf2("SYS_mprotect_key sret: %d\n", sret);
-		dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
-		dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
-		if (DEBUG_LEVEL >= 2)
-			perror("SYS_mprotect_pkey");
-	}
-	return sret;
-}
-
-int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
-{
-	int ret = syscall(SYS_pkey_alloc, flags, init_val);
-	dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
-			__func__, flags, init_val, ret, errno);
-	return ret;
-}
-
-int alloc_pkey(void)
+static int alloc_pkey(void)
 {
 	int ret;
-	unsigned long init_val = 0x0;
+	unsigned long init_val = PKEY_UNRESTRICTED;
 
 	dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
 			__func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
@@ -534,19 +506,12 @@ int alloc_pkey(void)
 	return ret;
 }
 
-int sys_pkey_free(unsigned long pkey)
-{
-	int ret = syscall(SYS_pkey_free, pkey);
-	dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
-	return ret;
-}
-
 /*
  * I had a bug where pkey bits could be set by mprotect() but
  * not cleared.  This ensures we get lots of random bit sets
  * and clears on the vma and pte pkey bits.
  */
-int alloc_random_pkey(void)
+static int alloc_random_pkey(void)
 {
 	int max_nr_pkey_allocs;
 	int ret;
@@ -629,7 +594,7 @@ struct pkey_malloc_record {
 };
 struct pkey_malloc_record *pkey_malloc_records;
 struct pkey_malloc_record *pkey_last_malloc_record;
-long nr_pkey_malloc_records;
+static long nr_pkey_malloc_records;
 void record_pkey_malloc(void *ptr, long size, int prot)
 {
 	long i;
@@ -667,7 +632,7 @@ void record_pkey_malloc(void *ptr, long size, int prot)
 	nr_pkey_malloc_records++;
 }
 
-void free_pkey_malloc(void *ptr)
+static void free_pkey_malloc(void *ptr)
 {
 	long i;
 	int ret;
@@ -694,8 +659,7 @@ void free_pkey_malloc(void *ptr)
 	pkey_assert(false);
 }
 
-
-void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
+static void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
 {
 	void *ptr;
 	int ret;
@@ -715,7 +679,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
 	return ptr;
 }
 
-void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
+static void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
 {
 	int ret;
 	void *ptr;
@@ -745,10 +709,10 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
 	return ptr;
 }
 
-int hugetlb_setup_ok;
+static int hugetlb_setup_ok;
 #define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages"
 #define GET_NR_HUGE_PAGES 10
-void setup_hugetlbfs(void)
+static void setup_hugetlbfs(void)
 {
 	int err;
 	int fd;
@@ -796,7 +760,7 @@ void setup_hugetlbfs(void)
 	hugetlb_setup_ok = 1;
 }
 
-void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
+static void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
 {
 	void *ptr;
 	int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
@@ -817,42 +781,15 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
 	return ptr;
 }
 
-void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
-{
-	void *ptr;
-	int fd;
-
-	dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
-			size, prot, pkey);
-	pkey_assert(pkey < NR_PKEYS);
-	fd = open("/dax/foo", O_RDWR);
-	pkey_assert(fd >= 0);
-
-	ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
-	pkey_assert(ptr != (void *)-1);
-
-	mprotect_pkey(ptr, size, prot, pkey);
-
-	record_pkey_malloc(ptr, size, prot);
-
-	dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
-	close(fd);
-	return ptr;
-}
-
-void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
+static void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
 
 	malloc_pkey_with_mprotect,
 	malloc_pkey_with_mprotect_subpage,
 	malloc_pkey_anon_huge,
 	malloc_pkey_hugetlb
-/* can not do direct with the pkey_mprotect() API:
-	malloc_pkey_mmap_direct,
-	malloc_pkey_mmap_dax,
-*/
 };
 
-void *malloc_pkey(long size, int prot, u16 pkey)
+static void *malloc_pkey(long size, int prot, u16 pkey)
 {
 	void *ret;
 	static int malloc_type;
@@ -882,7 +819,7 @@ void *malloc_pkey(long size, int prot, u16 pkey)
 	return ret;
 }
 
-int last_pkey_faults;
+static int last_pkey_faults;
 #define UNKNOWN_PKEY -2
 void expected_pkey_fault(int pkey)
 {
@@ -905,7 +842,7 @@ void expected_pkey_fault(int pkey)
 	 */
 	if (__read_pkey_reg() != 0)
 #elif defined(__aarch64__)
-	if (__read_pkey_reg() != PKEY_ALLOW_ALL)
+	if (__read_pkey_reg() != PKEY_REG_ALLOW_ALL)
 #else
 	if (__read_pkey_reg() != shadow_pkey_reg)
 #endif /* arch */
@@ -924,9 +861,9 @@ void expected_pkey_fault(int pkey)
 	pkey_assert(last_pkey_faults == pkey_faults);		\
 } while (0)
 
-int test_fds[10] = { -1 };
-int nr_test_fds;
-void __save_test_fd(int fd)
+static int test_fds[10] = { -1 };
+static int nr_test_fds;
+static void __save_test_fd(int fd)
 {
 	pkey_assert(fd >= 0);
 	pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
@@ -934,14 +871,14 @@ void __save_test_fd(int fd)
 	nr_test_fds++;
 }
 
-int get_test_read_fd(void)
+static int get_test_read_fd(void)
 {
 	int test_fd = open("/etc/passwd", O_RDONLY);
 	__save_test_fd(test_fd);
 	return test_fd;
 }
 
-void close_test_fds(void)
+static void close_test_fds(void)
 {
 	int i;
 
@@ -954,7 +891,7 @@ void close_test_fds(void)
 	nr_test_fds = 0;
 }
 
-void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
+static void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
 {
 	int i, err;
 	int max_nr_pkey_allocs;
@@ -1006,7 +943,7 @@ void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
 	pkey_assert(!err);
 }
 
-void test_read_of_write_disabled_region(int *ptr, u16 pkey)
+static void test_read_of_write_disabled_region(int *ptr, u16 pkey)
 {
 	int ptr_contents;
 
@@ -1016,7 +953,7 @@ void test_read_of_write_disabled_region(int *ptr, u16 pkey)
 	dprintf1("*ptr: %d\n", ptr_contents);
 	dprintf1("\n");
 }
-void test_read_of_access_disabled_region(int *ptr, u16 pkey)
+static void test_read_of_access_disabled_region(int *ptr, u16 pkey)
 {
 	int ptr_contents;
 
@@ -1028,7 +965,7 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey)
 	expected_pkey_fault(pkey);
 }
 
-void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
+static void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
 		u16 pkey)
 {
 	int ptr_contents;
@@ -1045,7 +982,7 @@ void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
 	expected_pkey_fault(pkey);
 }
 
-void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
+static void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
 		u16 pkey)
 {
 	*ptr = __LINE__;
@@ -1056,14 +993,14 @@ void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
 	expected_pkey_fault(pkey);
 }
 
-void test_write_of_write_disabled_region(int *ptr, u16 pkey)
+static void test_write_of_write_disabled_region(int *ptr, u16 pkey)
 {
 	dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
 	pkey_write_deny(pkey);
 	*ptr = __LINE__;
 	expected_pkey_fault(pkey);
 }
-void test_write_of_access_disabled_region(int *ptr, u16 pkey)
+static void test_write_of_access_disabled_region(int *ptr, u16 pkey)
 {
 	dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
 	pkey_access_deny(pkey);
@@ -1071,7 +1008,7 @@ void test_write_of_access_disabled_region(int *ptr, u16 pkey)
 	expected_pkey_fault(pkey);
 }
 
-void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
+static void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
 			u16 pkey)
 {
 	*ptr = __LINE__;
@@ -1082,7 +1019,7 @@ void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
 	expected_pkey_fault(pkey);
 }
 
-void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
+static void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
 {
 	int ret;
 	int test_fd = get_test_read_fd();
@@ -1094,7 +1031,8 @@ void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
 	dprintf1("read ret: %d\n", ret);
 	pkey_assert(ret);
 }
-void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
+
+static void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
 {
 	int ret;
 	int test_fd = get_test_read_fd();
@@ -1107,7 +1045,7 @@ void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
 	pkey_assert(ret);
 }
 
-void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
+static void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
 {
 	int pipe_ret, vmsplice_ret;
 	struct iovec iov;
@@ -1129,7 +1067,7 @@ void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
 	close(pipe_fds[1]);
 }
 
-void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
+static void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
 {
 	int ignored = 0xdada;
 	int futex_ret;
@@ -1147,7 +1085,7 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
 }
 
 /* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
+static void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
 {
 	int err;
 	int i;
@@ -1170,7 +1108,7 @@ void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
 }
 
 /* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
+static void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
 {
 	int err;
 	int bad_pkey = NR_PKEYS+99;
@@ -1180,7 +1118,7 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
 	pkey_assert(err);
 }
 
-void become_child(void)
+static void become_child(void)
 {
 	pid_t forkret;
 
@@ -1196,7 +1134,7 @@ void become_child(void)
 }
 
 /* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
+static void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
 {
 	int err;
 	int allocated_pkeys[NR_PKEYS] = {0};
@@ -1263,7 +1201,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
 	}
 }
 
-void arch_force_pkey_reg_init(void)
+static void arch_force_pkey_reg_init(void)
 {
 #if defined(__i386__) || defined(__x86_64__) /* arch */
 	u64 *buf;
@@ -1302,7 +1240,7 @@ void arch_force_pkey_reg_init(void)
  * a long-running test that continually checks the pkey
  * register.
  */
-void test_pkey_init_state(int *ptr, u16 pkey)
+static void test_pkey_init_state(int *ptr, u16 pkey)
 {
 	int err;
 	int allocated_pkeys[NR_PKEYS] = {0};
@@ -1340,7 +1278,7 @@ void test_pkey_init_state(int *ptr, u16 pkey)
  * have to call pkey_alloc() to use it first.  Make sure that it
  * is usable.
  */
-void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
+static void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
 {
 	long size;
 	int prot;
@@ -1364,7 +1302,7 @@ void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
 	mprotect_pkey(ptr, size, prot, pkey);
 }
 
-void test_ptrace_of_child(int *ptr, u16 pkey)
+static void test_ptrace_of_child(int *ptr, u16 pkey)
 {
 	__attribute__((__unused__)) int peek_result;
 	pid_t child_pid;
@@ -1440,7 +1378,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
 	free(plain_ptr_unaligned);
 }
 
-void *get_pointer_to_instructions(void)
+static void *get_pointer_to_instructions(void)
 {
 	void *p1;
 
@@ -1461,7 +1399,7 @@ void *get_pointer_to_instructions(void)
 	return p1;
 }
 
-void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+static void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
 {
 	void *p1;
 	int scratch;
@@ -1493,7 +1431,7 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
 	pkey_assert(!ret);
 }
 
-void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
+static void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
 {
 	void *p1;
 	int scratch;
@@ -1542,7 +1480,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
 }
 
 #if defined(__i386__) || defined(__x86_64__)
-void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
+static void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
 {
 	u32 new_pkru;
 	pid_t child;
@@ -1665,7 +1603,7 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
 #endif
 
 #if defined(__aarch64__)
-void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
+static void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
 {
 	pid_t child;
 	int status, ret;
@@ -1742,7 +1680,7 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
 }
 #endif
 
-void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
+static void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
 {
 	int size = PAGE_SIZE;
 	int sret;
@@ -1756,7 +1694,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
 	pkey_assert(sret < 0);
 }
 
-void (*pkey_tests[])(int *ptr, u16 pkey) = {
+static void (*pkey_tests[])(int *ptr, u16 pkey) = {
 	test_read_of_write_disabled_region,
 	test_read_of_access_disabled_region,
 	test_read_of_access_disabled_region_with_page_already_mapped,
@@ -1782,7 +1720,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
 #endif
 };
 
-void run_tests_once(void)
+static void run_tests_once(void)
 {
 	int *ptr;
 	int prot = PROT_READ|PROT_WRITE;
@@ -1816,7 +1754,7 @@ void run_tests_once(void)
 	iteration_nr++;
 }
 
-void pkey_setup_shadow(void)
+static void pkey_setup_shadow(void)
 {
 	shadow_pkey_reg = __read_pkey_reg();
 }
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 2fc290d9430c..7cc71d942f83 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -45,6 +45,8 @@ separated by spaces:
 	vmalloc smoke tests
 - hmm
 	hmm smoke tests
+- madv_guard
+	test madvise(2) MADV_GUARD_INSTALL and MADV_GUARD_REMOVE options
 - madv_populate
 	test memadvise(2) MADV_POPULATE_{READ,WRITE} options
 - memfd_secret
@@ -218,7 +220,7 @@ run_test() {
 	if test_selected ${CATEGORY}; then
 		# On memory constrainted systems some tests can fail to allocate hugepages.
 		# perform some cleanup before the test for a higher success rate.
-		if [ ${CATEGORY} == "thp" ] | [ ${CATEGORY} == "hugetlb" ]; then
+		if [ ${CATEGORY} == "thp" -o ${CATEGORY} == "hugetlb" ]; then
 			echo 3 > /proc/sys/vm/drop_caches
 			sleep 2
 			echo 1 > /proc/sys/vm/compact_memory
@@ -302,11 +304,14 @@ uffd_stress_bin=./uffd-stress
 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16
 # Hugetlb tests require source and destination huge pages. Pass in half
 # the size of the free pages we have, which is used for *each*.
-half_ufd_size_MB=$((freepgs / 2))
+# uffd-stress expects a region expressed in MiB, so we adjust
+# half_ufd_size_MB accordingly.
+half_ufd_size_MB=$(((freepgs * hpgsize_KB) / 1024 / 2))
 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32
 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32
 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16
 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem-private 20 16
+CATEGORY="userfaultfd" run_test ./uffd-wp-mremap
 
 #cleanup
 echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
@@ -375,6 +380,9 @@ CATEGORY="mremap" run_test ./mremap_dontunmap
 
 CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
 
+# MADV_GUARD_INSTALL and MADV_GUARD_REMOVE tests
+CATEGORY="madv_guard" run_test ./guard-pages
+
 # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
 CATEGORY="madv_populate" run_test ./madv_populate
 
diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c
deleted file mode 100644
index d9f8ba8d5050..000000000000
--- a/tools/testing/selftests/mm/seal_elf.c
+++ /dev/null
@@ -1,137 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-#include <sys/mman.h>
-#include <stdint.h>
-#include <asm-generic/unistd.h>
-#include <string.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <stdbool.h>
-#include "../kselftest.h"
-#include <syscall.h>
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <sys/vfs.h>
-#include <sys/stat.h>
-#include "mseal_helpers.h"
-
-/*
- * define sys_xyx to call syscall directly.
- */
-static int sys_mseal(void *start, size_t len)
-{
-	int sret;
-
-	errno = 0;
-	sret = syscall(__NR_mseal, start, len, 0);
-	return sret;
-}
-
-static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot)
-{
-	int sret;
-
-	errno = 0;
-	sret = syscall(__NR_mprotect, ptr, size, prot);
-	return sret;
-}
-
-static bool seal_support(void)
-{
-	int ret;
-	void *ptr;
-	unsigned long page_size = getpagesize();
-
-	ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-	if (ptr == (void *) -1)
-		return false;
-
-	ret = sys_mseal(ptr, page_size);
-	if (ret < 0)
-		return false;
-
-	return true;
-}
-
-const char somestr[4096] = {"READONLY"};
-
-static void test_seal_elf(void)
-{
-	int ret;
-	FILE *maps;
-	char line[512];
-	uintptr_t  addr_start, addr_end;
-	char prot[5];
-	char filename[256];
-	unsigned long page_size = getpagesize();
-	unsigned long long ptr = (unsigned long long) somestr;
-	char *somestr2 = (char *)somestr;
-
-	/*
-	 * Modify the protection of readonly somestr
-	 */
-	if (((unsigned long long)ptr % page_size) != 0)
-		ptr = (unsigned long long)ptr & ~(page_size - 1);
-
-	ksft_print_msg("somestr = %s\n", somestr);
-	ksft_print_msg("change protection to rw\n");
-	ret = sys_mprotect((void *)ptr, page_size, PROT_READ|PROT_WRITE);
-	FAIL_TEST_IF_FALSE(!ret);
-	*somestr2 = 'A';
-	ksft_print_msg("somestr is modified to: %s\n", somestr);
-	ret = sys_mprotect((void *)ptr, page_size, PROT_READ);
-	FAIL_TEST_IF_FALSE(!ret);
-
-	maps = fopen("/proc/self/maps", "r");
-	FAIL_TEST_IF_FALSE(maps);
-
-	/*
-	 * apply sealing to elf binary
-	 */
-	while (fgets(line, sizeof(line), maps)) {
-		if (sscanf(line, "%lx-%lx %4s %*x %*x:%*x %*u %255[^\n]",
-			&addr_start, &addr_end, prot, filename) == 4) {
-			if (strlen(filename)) {
-				/*
-				 * seal the mapping if read only.
-				 */
-				if (strstr(prot, "r-")) {
-					ret = sys_mseal((void *)addr_start, addr_end - addr_start);
-					FAIL_TEST_IF_FALSE(!ret);
-					ksft_print_msg("sealed: %lx-%lx %s %s\n",
-						addr_start, addr_end, prot, filename);
-					if ((uintptr_t) somestr >= addr_start &&
-						(uintptr_t) somestr <= addr_end)
-						ksft_print_msg("mapping for somestr found\n");
-				}
-			}
-		}
-	}
-	fclose(maps);
-
-	ret = sys_mprotect((void *)ptr, page_size, PROT_READ | PROT_WRITE);
-	FAIL_TEST_IF_FALSE(ret < 0);
-	ksft_print_msg("somestr is sealed, mprotect is rejected\n");
-
-	REPORT_TEST_PASS();
-}
-
-int main(int argc, char **argv)
-{
-	bool test_seal = seal_support();
-
-	ksft_print_header();
-	ksft_print_msg("pid=%d\n", getpid());
-
-	if (!test_seal)
-		ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n");
-
-	ksft_set_plan(1);
-
-	test_seal_elf();
-
-	ksft_finished();
-}
diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
index bdfa5d085f00..8e1462ce0532 100644
--- a/tools/testing/selftests/mm/soft-dirty.c
+++ b/tools/testing/selftests/mm/soft-dirty.c
@@ -128,7 +128,7 @@ static void test_mprotect(int pagemap_fd, int pagesize, bool anon)
 {
 	const char *type[] = {"file", "anon"};
 	const char *fname = "./soft-dirty-test-file";
-	int test_fd;
+	int test_fd = 0;
 	char *map;
 
 	if (anon) {
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index eb6d1b9fc362..3f353f3d070f 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -108,38 +108,28 @@ static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hp
 	unsigned long rss_anon_before, rss_anon_after;
 	size_t i;
 
-	if (!check_huge_anon(one_page, 4, pmd_pagesize)) {
-		printf("No THP is allocated\n");
-		exit(EXIT_FAILURE);
-	}
+	if (!check_huge_anon(one_page, 4, pmd_pagesize))
+		ksft_exit_fail_msg("No THP is allocated\n");
 
 	rss_anon_before = rss_anon();
-	if (!rss_anon_before) {
-		printf("No RssAnon is allocated before split\n");
-		exit(EXIT_FAILURE);
-	}
+	if (!rss_anon_before)
+		ksft_exit_fail_msg("No RssAnon is allocated before split\n");
 
 	/* split all THPs */
 	write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
 		      (uint64_t)one_page + len, 0);
 
 	for (i = 0; i < len; i++)
-		if (one_page[i] != (char)0) {
-			printf("%ld byte corrupted\n", i);
-			exit(EXIT_FAILURE);
-		}
+		if (one_page[i] != (char)0)
+			ksft_exit_fail_msg("%ld byte corrupted\n", i);
 
-	if (!check_huge_anon(one_page, 0, pmd_pagesize)) {
-		printf("Still AnonHugePages not split\n");
-		exit(EXIT_FAILURE);
-	}
+	if (!check_huge_anon(one_page, 0, pmd_pagesize))
+		ksft_exit_fail_msg("Still AnonHugePages not split\n");
 
 	rss_anon_after = rss_anon();
-	if (rss_anon_after >= rss_anon_before) {
-		printf("Incorrect RssAnon value. Before: %ld After: %ld\n",
+	if (rss_anon_after >= rss_anon_before)
+		ksft_exit_fail_msg("Incorrect RssAnon value. Before: %ld After: %ld\n",
 		       rss_anon_before, rss_anon_after);
-		exit(EXIT_FAILURE);
-	}
 }
 
 void split_pmd_zero_pages(void)
@@ -150,11 +140,11 @@ void split_pmd_zero_pages(void)
 
 	one_page = allocate_zero_filled_hugepage(len);
 	verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len);
-	printf("Split zero filled huge pages successful\n");
+	ksft_test_result_pass("Split zero filled huge pages successful\n");
 	free(one_page);
 }
 
-void split_pmd_thp(void)
+void split_pmd_thp_to_order(int order)
 {
 	char *one_page;
 	size_t len = 4 * pmd_pagesize;
@@ -174,7 +164,7 @@ void split_pmd_thp(void)
 
 	/* split all THPs */
 	write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
-		(uint64_t)one_page + len, 0);
+		(uint64_t)one_page + len, order);
 
 	for (i = 0; i < len; i++)
 		if (one_page[i] != (char)i)
@@ -184,7 +174,7 @@ void split_pmd_thp(void)
 	if (!check_huge_anon(one_page, 0, pmd_pagesize))
 		ksft_exit_fail_msg("Still AnonHugePages not split\n");
 
-	ksft_test_result_pass("Split huge pages successful\n");
+	ksft_test_result_pass("Split huge pages to order %d successful\n", order);
 	free(one_page);
 }
 
@@ -491,7 +481,7 @@ int main(int argc, char **argv)
 	if (argc > 1)
 		optional_xfs_path = argv[1];
 
-	ksft_set_plan(3+9);
+	ksft_set_plan(1+8+2+9);
 
 	pagesize = getpagesize();
 	pageshift = ffs(pagesize) - 1;
@@ -502,7 +492,11 @@ int main(int argc, char **argv)
 	fd_size = 2 * pmd_pagesize;
 
 	split_pmd_zero_pages();
-	split_pmd_thp();
+
+	for (i = 0; i < 9; i++)
+		if (i != 1)
+			split_pmd_thp_to_order(i);
+
 	split_pte_mapped_thp();
 	split_file_backed_thp();
 
diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c
index 577eaab6266f..ad872af1c81a 100644
--- a/tools/testing/selftests/mm/thp_settings.c
+++ b/tools/testing/selftests/mm/thp_settings.c
@@ -87,7 +87,7 @@ int write_file(const char *path, const char *buf, size_t buflen)
 	return (unsigned int) numwritten;
 }
 
-const unsigned long read_num(const char *path)
+unsigned long read_num(const char *path)
 {
 	char buf[21];
 
@@ -172,7 +172,7 @@ void thp_write_string(const char *name, const char *val)
 	}
 }
 
-const unsigned long thp_read_num(const char *name)
+unsigned long thp_read_num(const char *name)
 {
 	char path[PATH_MAX];
 	int ret;
diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h
index 876235a23460..fc131d23d593 100644
--- a/tools/testing/selftests/mm/thp_settings.h
+++ b/tools/testing/selftests/mm/thp_settings.h
@@ -64,12 +64,12 @@ struct thp_settings {
 
 int read_file(const char *path, char *buf, size_t buflen);
 int write_file(const char *path, const char *buf, size_t buflen);
-const unsigned long read_num(const char *path);
+unsigned long read_num(const char *path);
 void write_num(const char *path, unsigned long num);
 
 int thp_read_string(const char *name, const char * const strings[]);
 void thp_write_string(const char *name, const char *val);
-const unsigned long thp_read_num(const char *name);
+unsigned long thp_read_num(const char *name);
 void thp_write_num(const char *name, unsigned long num);
 
 void thp_write_settings(struct thp_settings *settings);
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
index 717539eddf98..7ad6ba660c7d 100644
--- a/tools/testing/selftests/mm/uffd-common.c
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -673,7 +673,11 @@ int uffd_open_dev(unsigned int flags)
 
 int uffd_open_sys(unsigned int flags)
 {
+#ifdef __NR_userfaultfd
 	return syscall(__NR_userfaultfd, flags);
+#else
+	return -1;
+#endif
 }
 
 int uffd_open(unsigned int flags)
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
index a4b83280998a..944d559ade21 100644
--- a/tools/testing/selftests/mm/uffd-stress.c
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -33,10 +33,11 @@
  * pthread_mutex_lock will also verify the atomicity of the memory
  * transfer (UFFDIO_COPY).
  */
-#include <asm-generic/unistd.h>
+
 #include "uffd-common.h"
 
 uint64_t features;
+#ifdef __NR_userfaultfd
 
 #define BOUNCE_RANDOM		(1<<0)
 #define BOUNCE_RACINGFAULTS	(1<<1)
@@ -471,3 +472,15 @@ int main(int argc, char **argv)
 	       nr_pages, nr_pages_per_cpu);
 	return userfaultfd_stress();
 }
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+	printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
+	return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index a2e71b1636e7..74c8bc02b506 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -5,11 +5,12 @@
  *  Copyright (C) 2015-2023  Red Hat, Inc.
  */
 
-#include <asm-generic/unistd.h>
 #include "uffd-common.h"
 
 #include "../../../../mm/gup_test.h"
 
+#ifdef __NR_userfaultfd
+
 /* The unit test doesn't need a large or random size, make it 32MB for now */
 #define  UFFD_TEST_MEM_SIZE               (32UL << 20)
 
@@ -1122,7 +1123,7 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size,
 	char c;
 	unsigned long long count;
 	struct uffd_args args = { 0 };
-	char *orig_area_src, *orig_area_dst;
+	char *orig_area_src = NULL, *orig_area_dst = NULL;
 	unsigned long step_size, step_count;
 	unsigned long src_offs = 0;
 	unsigned long dst_offs = 0;
@@ -1190,7 +1191,7 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size,
 				    nr, count, count_verify[src_offs + nr + i]);
 		}
 	}
-	if (step_size > page_size) {
+	if (chunk_size > page_size) {
 		area_src = orig_area_src;
 		area_dst = orig_area_dst;
 	}
@@ -1558,3 +1559,14 @@ int main(int argc, char *argv[])
 	return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS;
 }
 
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+	printf("Skipping %s (missing __NR_userfaultfd)\n", __file__);
+	return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c
new file mode 100644
index 000000000000..2c4f984bd73c
--- /dev/null
+++ b/tools/testing/selftests/mm/uffd-wp-mremap.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include "../kselftest.h"
+#include "thp_settings.h"
+#include "uffd-common.h"
+
+static int pagemap_fd;
+static size_t pagesize;
+static int nr_pagesizes = 1;
+static int nr_thpsizes;
+static size_t thpsizes[20];
+static int nr_hugetlbsizes;
+static size_t hugetlbsizes[10];
+
+static int sz2ord(size_t size)
+{
+	return __builtin_ctzll(size / pagesize);
+}
+
+static int detect_thp_sizes(size_t sizes[], int max)
+{
+	int count = 0;
+	unsigned long orders;
+	size_t kb;
+	int i;
+
+	/* thp not supported at all. */
+	if (!read_pmd_pagesize())
+		return 0;
+
+	orders = thp_supported_orders();
+
+	for (i = 0; orders && count < max; i++) {
+		if (!(orders & (1UL << i)))
+			continue;
+		orders &= ~(1UL << i);
+		kb = (pagesize >> 10) << i;
+		sizes[count++] = kb * 1024;
+		ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb);
+	}
+
+	return count;
+}
+
+static void *mmap_aligned(size_t size, int prot, int flags)
+{
+	size_t mmap_size = size * 2;
+	char *mmap_mem, *mem;
+
+	mmap_mem = mmap(NULL, mmap_size, prot, flags, -1, 0);
+	if (mmap_mem == MAP_FAILED)
+		return mmap_mem;
+
+	mem = (char *)(((uintptr_t)mmap_mem + size - 1) & ~(size - 1));
+	munmap(mmap_mem, mem - mmap_mem);
+	munmap(mem + size, mmap_mem + mmap_size - mem - size);
+
+	return mem;
+}
+
+static void *alloc_one_folio(size_t size, bool private, bool hugetlb)
+{
+	bool thp = !hugetlb && size > pagesize;
+	int flags = MAP_ANONYMOUS;
+	int prot = PROT_READ | PROT_WRITE;
+	char *mem, *addr;
+
+	assert((size & (size - 1)) == 0);
+
+	if (private)
+		flags |= MAP_PRIVATE;
+	else
+		flags |= MAP_SHARED;
+
+	/*
+	 * For THP, we must explicitly enable the THP size, allocate twice the
+	 * required space then manually align.
+	 */
+	if (thp) {
+		struct thp_settings settings = *thp_current_settings();
+
+		if (private)
+			settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS;
+		else
+			settings.shmem_hugepages[sz2ord(size)].enabled = SHMEM_ALWAYS;
+
+		thp_push_settings(&settings);
+
+		mem = mmap_aligned(size, prot, flags);
+	} else {
+		if (hugetlb) {
+			flags |= MAP_HUGETLB;
+			flags |= __builtin_ctzll(size) << MAP_HUGE_SHIFT;
+		}
+
+		mem = mmap(NULL, size, prot, flags, -1, 0);
+	}
+
+	if (mem == MAP_FAILED) {
+		mem = NULL;
+		goto out;
+	}
+
+	assert(((uintptr_t)mem & (size - 1)) == 0);
+
+	/*
+	 * Populate the folio by writing the first byte and check that all pages
+	 * are populated. Finally set the whole thing to non-zero data to avoid
+	 * kernel from mapping it back to the zero page.
+	 */
+	mem[0] = 1;
+	for (addr = mem; addr < mem + size; addr += pagesize) {
+		if (!pagemap_is_populated(pagemap_fd, addr)) {
+			munmap(mem, size);
+			mem = NULL;
+			goto out;
+		}
+	}
+	memset(mem, 1, size);
+out:
+	if (thp)
+		thp_pop_settings();
+
+	return mem;
+}
+
+static bool check_uffd_wp_state(void *mem, size_t size, bool expect)
+{
+	uint64_t pte;
+	void *addr;
+
+	for (addr = mem; addr < mem + size; addr += pagesize) {
+		pte = pagemap_get_entry(pagemap_fd, addr);
+		if (!!(pte & PM_UFFD_WP) != expect) {
+			ksft_test_result_fail("uffd-wp not %s for pte %lu!\n",
+					      expect ? "set" : "clear",
+					      (addr - mem) / pagesize);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static bool range_is_swapped(void *addr, size_t size)
+{
+	for (; size; addr += pagesize, size -= pagesize)
+		if (!pagemap_is_swapped(pagemap_fd, addr))
+			return false;
+	return true;
+}
+
+static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb)
+{
+	struct uffdio_writeprotect wp_prms;
+	uint64_t features = 0;
+	void *addr = NULL;
+	void *mem = NULL;
+
+	assert(!(hugetlb && swapout));
+
+	ksft_print_msg("[RUN] %s(size=%zu, private=%s, swapout=%s, hugetlb=%s)\n",
+				__func__,
+				size,
+				private ? "true" : "false",
+				swapout ? "true" : "false",
+				hugetlb ? "true" : "false");
+
+	/* Allocate a folio of required size and type. */
+	mem = alloc_one_folio(size, private, hugetlb);
+	if (!mem) {
+		ksft_test_result_fail("alloc_one_folio() failed\n");
+		goto out;
+	}
+
+	/* Register range for uffd-wp. */
+	if (userfaultfd_open(&features)) {
+		ksft_test_result_fail("userfaultfd_open() failed\n");
+		goto out;
+	}
+	if (uffd_register(uffd, mem, size, false, true, false)) {
+		ksft_test_result_fail("uffd_register() failed\n");
+		goto out;
+	}
+	wp_prms.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+	wp_prms.range.start = (uintptr_t)mem;
+	wp_prms.range.len = size;
+	if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_prms)) {
+		ksft_test_result_fail("ioctl(UFFDIO_WRITEPROTECT) failed\n");
+		goto out;
+	}
+
+	if (swapout) {
+		madvise(mem, size, MADV_PAGEOUT);
+		if (!range_is_swapped(mem, size)) {
+			ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
+			goto out;
+		}
+	}
+
+	/* Check that uffd-wp is set for all PTEs in range. */
+	if (!check_uffd_wp_state(mem, size, true))
+		goto out;
+
+	/*
+	 * Move the mapping to a new, aligned location. Since
+	 * UFFD_FEATURE_EVENT_REMAP is not set, we expect the uffd-wp bit for
+	 * each PTE to be cleared in the new mapping.
+	 */
+	addr = mmap_aligned(size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS);
+	if (addr == MAP_FAILED) {
+		ksft_test_result_fail("mmap_aligned() failed\n");
+		goto out;
+	}
+	if (mremap(mem, size, size, MREMAP_FIXED | MREMAP_MAYMOVE, addr) == MAP_FAILED) {
+		ksft_test_result_fail("mremap() failed\n");
+		munmap(addr, size);
+		goto out;
+	}
+	mem = addr;
+
+	/* Check that uffd-wp is cleared for all PTEs in range. */
+	if (!check_uffd_wp_state(mem, size, false))
+		goto out;
+
+	ksft_test_result_pass("%s(size=%zu, private=%s, swapout=%s, hugetlb=%s)\n",
+				__func__,
+				size,
+				private ? "true" : "false",
+				swapout ? "true" : "false",
+				hugetlb ? "true" : "false");
+out:
+	if (mem)
+		munmap(mem, size);
+	if (uffd >= 0) {
+		close(uffd);
+		uffd = -1;
+	}
+}
+
+struct testcase {
+	size_t *sizes;
+	int *nr_sizes;
+	bool private;
+	bool swapout;
+	bool hugetlb;
+};
+
+static const struct testcase testcases[] = {
+	/* base pages. */
+	{
+		.sizes = &pagesize,
+		.nr_sizes = &nr_pagesizes,
+		.private = false,
+		.swapout = false,
+		.hugetlb = false,
+	},
+	{
+		.sizes = &pagesize,
+		.nr_sizes = &nr_pagesizes,
+		.private = true,
+		.swapout = false,
+		.hugetlb = false,
+	},
+	{
+		.sizes = &pagesize,
+		.nr_sizes = &nr_pagesizes,
+		.private = false,
+		.swapout = true,
+		.hugetlb = false,
+	},
+	{
+		.sizes = &pagesize,
+		.nr_sizes = &nr_pagesizes,
+		.private = true,
+		.swapout = true,
+		.hugetlb = false,
+	},
+
+	/* thp. */
+	{
+		.sizes = thpsizes,
+		.nr_sizes = &nr_thpsizes,
+		.private = false,
+		.swapout = false,
+		.hugetlb = false,
+	},
+	{
+		.sizes = thpsizes,
+		.nr_sizes = &nr_thpsizes,
+		.private = true,
+		.swapout = false,
+		.hugetlb = false,
+	},
+	{
+		.sizes = thpsizes,
+		.nr_sizes = &nr_thpsizes,
+		.private = false,
+		.swapout = true,
+		.hugetlb = false,
+	},
+	{
+		.sizes = thpsizes,
+		.nr_sizes = &nr_thpsizes,
+		.private = true,
+		.swapout = true,
+		.hugetlb = false,
+	},
+
+	/* hugetlb. */
+	{
+		.sizes = hugetlbsizes,
+		.nr_sizes = &nr_hugetlbsizes,
+		.private = false,
+		.swapout = false,
+		.hugetlb = true,
+	},
+	{
+		.sizes = hugetlbsizes,
+		.nr_sizes = &nr_hugetlbsizes,
+		.private = true,
+		.swapout = false,
+		.hugetlb = true,
+	},
+};
+
+int main(int argc, char **argv)
+{
+	struct thp_settings settings;
+	int i, j, plan = 0;
+
+	pagesize = getpagesize();
+	nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes));
+	nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
+						    ARRAY_SIZE(hugetlbsizes));
+
+	/* If THP is supported, save THP settings and initially disable THP. */
+	if (nr_thpsizes) {
+		thp_save_settings();
+		thp_read_settings(&settings);
+		for (i = 0; i < NR_ORDERS; i++) {
+			settings.hugepages[i].enabled = THP_NEVER;
+			settings.shmem_hugepages[i].enabled = SHMEM_NEVER;
+		}
+		thp_push_settings(&settings);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(testcases); i++)
+		plan += *testcases[i].nr_sizes;
+	ksft_set_plan(plan);
+
+	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+	if (pagemap_fd < 0)
+		ksft_exit_fail_msg("opening pagemap failed\n");
+
+	for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+		const struct testcase *tc = &testcases[i];
+
+		for (j = 0; j < *tc->nr_sizes; j++)
+			test_one_folio(tc->sizes[j], tc->private, tc->swapout,
+				       tc->hugetlb);
+	}
+
+	/* If THP is supported, restore original THP settings. */
+	if (nr_thpsizes)
+		thp_restore_settings();
+
+	i = ksft_get_fail_cnt();
+	if (i)
+		ksft_exit_fail_msg("%d out of %d tests failed\n",
+				   i, ksft_test_num());
+	ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
index 2a2b69e91950..b380e102b22f 100644
--- a/tools/testing/selftests/mm/virtual_address_range.c
+++ b/tools/testing/selftests/mm/virtual_address_range.c
@@ -10,10 +10,12 @@
 #include <string.h>
 #include <unistd.h>
 #include <errno.h>
+#include <sys/prctl.h>
 #include <sys/mman.h>
 #include <sys/time.h>
 #include <fcntl.h>
 
+#include "vm_util.h"
 #include "../kselftest.h"
 
 /*
@@ -82,6 +84,24 @@ static void validate_addr(char *ptr, int high_addr)
 		ksft_exit_fail_msg("Bad address %lx\n", addr);
 }
 
+static void mark_range(char *ptr, size_t size)
+{
+	if (prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ptr, size, "virtual_address_range") == -1) {
+		if (errno == EINVAL) {
+			/* Depends on CONFIG_ANON_VMA_NAME */
+			ksft_test_result_skip("prctl(PR_SET_VMA_ANON_NAME) not supported\n");
+			ksft_finished();
+		} else {
+			ksft_exit_fail_perror("prctl(PR_SET_VMA_ANON_NAME) failed\n");
+		}
+	}
+}
+
+static int is_marked_vma(const char *vma_name)
+{
+	return vma_name && !strcmp(vma_name, "[anon:virtual_address_range]\n");
+}
+
 static int validate_lower_address_hint(void)
 {
 	char *ptr;
@@ -116,12 +136,17 @@ static int validate_complete_va_space(void)
 
 	prev_end_addr = 0;
 	while (fgets(line, sizeof(line), file)) {
+		const char *vma_name = NULL;
+		int vma_name_start = 0;
 		unsigned long hop;
 
-		if (sscanf(line, "%lx-%lx %s[rwxp-]",
-			   &start_addr, &end_addr, prot) != 3)
+		if (sscanf(line, "%lx-%lx %4s %*s %*s %*s %n",
+			   &start_addr, &end_addr, prot, &vma_name_start) != 3)
 			ksft_exit_fail_msg("cannot parse /proc/self/maps\n");
 
+		if (vma_name_start)
+			vma_name = line + vma_name_start;
+
 		/* end of userspace mappings; ignore vsyscall mapping */
 		if (start_addr & (1UL << 63))
 			return 0;
@@ -135,6 +160,9 @@ static int validate_complete_va_space(void)
 		if (prot[0] != 'r')
 			continue;
 
+		if (check_vmflag_io((void *)start_addr))
+			continue;
+
 		/*
 		 * Confirm whether MAP_CHUNK_SIZE chunk can be found or not.
 		 * If write succeeds, no need to check MAP_CHUNK_SIZE - 1
@@ -149,6 +177,9 @@ static int validate_complete_va_space(void)
 				return 1;
 			lseek(fd, 0, SEEK_SET);
 
+			if (is_marked_vma(vma_name))
+				munmap((char *)(start_addr + hop), MAP_CHUNK_SIZE);
+
 			hop += MAP_CHUNK_SIZE;
 		}
 	}
@@ -166,7 +197,7 @@ int main(int argc, char *argv[])
 	ksft_set_plan(1);
 
 	for (i = 0; i < NR_CHUNKS_LOW; i++) {
-		ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
+		ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ,
 			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 
 		if (ptr[i] == MAP_FAILED) {
@@ -175,6 +206,7 @@ int main(int argc, char *argv[])
 			break;
 		}
 
+		mark_range(ptr[i], MAP_CHUNK_SIZE);
 		validate_addr(ptr[i], 0);
 	}
 	lchunks = i;
@@ -186,12 +218,13 @@ int main(int argc, char *argv[])
 
 	for (i = 0; i < NR_CHUNKS_HIGH; i++) {
 		hint = hint_addr();
-		hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
+		hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ,
 			       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 
 		if (hptr[i] == MAP_FAILED)
 			break;
 
+		mark_range(ptr[i], MAP_CHUNK_SIZE);
 		validate_addr(hptr[i], 1);
 	}
 	hchunks = i;
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index d8d0cf04bb57..a36734fb62f3 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -2,6 +2,7 @@
 #include <string.h>
 #include <fcntl.h>
 #include <dirent.h>
+#include <inttypes.h>
 #include <sys/ioctl.h>
 #include <linux/userfaultfd.h>
 #include <linux/fs.h>
@@ -138,7 +139,7 @@ void clear_softdirty(void)
 		ksft_exit_fail_msg("opening clear_refs failed\n");
 	ret = write(fd, ctrl, strlen(ctrl));
 	close(fd);
-	if (ret != strlen(ctrl))
+	if (ret != (signed int)strlen(ctrl))
 		ksft_exit_fail_msg("writing clear_refs failed\n");
 }
 
@@ -193,13 +194,11 @@ err_out:
 	return rss_anon;
 }
 
-bool __check_huge(void *addr, char *pattern, int nr_hpages,
-		  uint64_t hpage_size)
+char *__get_smap_entry(void *addr, const char *pattern, char *buf, size_t len)
 {
-	uint64_t thp = -1;
 	int ret;
 	FILE *fp;
-	char buffer[MAX_LINE_LENGTH];
+	char *entry = NULL;
 	char addr_pattern[MAX_LINE_LENGTH];
 
 	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
@@ -211,23 +210,40 @@ bool __check_huge(void *addr, char *pattern, int nr_hpages,
 	if (!fp)
 		ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH);
 
-	if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
+	if (!check_for_pattern(fp, addr_pattern, buf, len))
 		goto err_out;
 
-	/*
-	 * Fetch the pattern in the same block and check the number of
-	 * hugepages.
-	 */
-	if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer)))
+	/* Fetch the pattern in the same block */
+	if (!check_for_pattern(fp, pattern, buf, len))
 		goto err_out;
 
-	snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern);
+	/* Trim trailing newline */
+	entry = strchr(buf, '\n');
+	if (entry)
+		*entry = '\0';
 
-	if (sscanf(buffer, addr_pattern, &thp) != 1)
-		ksft_exit_fail_msg("Reading smap error\n");
+	entry = buf + strlen(pattern);
 
 err_out:
 	fclose(fp);
+	return entry;
+}
+
+bool __check_huge(void *addr, char *pattern, int nr_hpages,
+		  uint64_t hpage_size)
+{
+	char buffer[MAX_LINE_LENGTH];
+	uint64_t thp = -1;
+	char *entry;
+
+	entry = __get_smap_entry(addr, pattern, buffer, sizeof(buffer));
+	if (!entry)
+		goto err_out;
+
+	if (sscanf(entry, "%9" SCNu64 " kB", &thp) != 1)
+		ksft_exit_fail_msg("Reading smap error\n");
+
+err_out:
 	return thp == (nr_hpages * (hpage_size >> 10));
 }
 
@@ -384,3 +400,27 @@ unsigned long get_free_hugepages(void)
 	fclose(f);
 	return fhp;
 }
+
+bool check_vmflag_io(void *addr)
+{
+	char buffer[MAX_LINE_LENGTH];
+	const char *flags;
+	size_t flaglen;
+
+	flags = __get_smap_entry(addr, "VmFlags:", buffer, sizeof(buffer));
+	if (!flags)
+		ksft_exit_fail_msg("%s: No VmFlags for %p\n", __func__, addr);
+
+	while (true) {
+		flags += strspn(flags, " ");
+
+		flaglen = strcspn(flags, " ");
+		if (!flaglen)
+			return false;
+
+		if (flaglen == strlen("io") && !memcmp(flags, "io", flaglen))
+			return true;
+
+		flags += flaglen;
+	}
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index 2eaed8209925..b60ac68a9dc8 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -53,6 +53,7 @@ int uffd_unregister(int uffd, void *addr, uint64_t len);
 int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
 			      bool miss, bool wp, bool minor, uint64_t *ioctls);
 unsigned long get_free_hugepages(void);
+bool check_vmflag_io(void *addr);
 
 /*
  * On ppc64 this will only work with radix 2M hugepage size
diff --git a/tools/testing/selftests/mm/write_to_hugetlbfs.c b/tools/testing/selftests/mm/write_to_hugetlbfs.c
index 1289d311efd7..34c91f7e6128 100644
--- a/tools/testing/selftests/mm/write_to_hugetlbfs.c
+++ b/tools/testing/selftests/mm/write_to_hugetlbfs.c
@@ -89,7 +89,7 @@ int main(int argc, char **argv)
 			size = atoi(optarg);
 			break;
 		case 'p':
-			strncpy(path, optarg, sizeof(path));
+			strncpy(path, optarg, sizeof(path) - 1);
 			break;
 		case 'm':
 			if (atoi(optarg) >= MAX_METHOD) {
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 70f65eb320a7..48a000cabc97 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -20,6 +20,7 @@
 #include <stdarg.h>
 #include <linux/mount.h>
 
+#include "../filesystems/overlayfs/wrappers.h"
 #include "../kselftest_harness.h"
 
 #ifndef CLONE_NEWNS
@@ -126,6 +127,26 @@
 	#endif
 #endif
 
+#ifndef __NR_move_mount
+	#if defined __alpha__
+		#define __NR_move_mount 539
+	#elif defined _MIPS_SIM
+		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
+			#define __NR_move_mount 4429
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
+			#define __NR_move_mount 6429
+		#endif
+		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
+			#define __NR_move_mount 5429
+		#endif
+	#elif defined __ia64__
+		#define __NR_move_mount (428 + 1024)
+	#else
+		#define __NR_move_mount 429
+	#endif
+#endif
+
 #ifndef MOUNT_ATTR_IDMAP
 #define MOUNT_ATTR_IDMAP 0x00100000
 #endif
@@ -397,6 +418,10 @@ FIXTURE_SETUP(mount_setattr)
 
 	ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
 
+	ASSERT_EQ(mkdir("/tmp/target1", 0777), 0);
+
+	ASSERT_EQ(mkdir("/tmp/target2", 0777), 0);
+
 	ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
 			"size=100000,mode=700"), 0);
 
@@ -1506,4 +1531,631 @@ TEST_F(mount_setattr, mount_attr_nosymfollow)
 	ASSERT_EQ(close(fd), 0);
 }
 
+TEST_F(mount_setattr, open_tree_detached)
+{
+	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+	struct statx stx;
+
+	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				     OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree_base, 0);
+	/*
+	 * /mnt                   testing tmpfs
+	 * |-/mnt/A               testing tmpfs
+	 * | `-/mnt/A/AA          testing tmpfs
+	 * |   `-/mnt/A/AA/B      testing tmpfs
+	 * |     `-/mnt/A/AA/B/BB testing tmpfs
+	 * `-/mnt/B               testing ramfs
+	 */
+	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				       OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree_subdir, 0);
+	/*
+	 * /AA          testing tmpfs
+	 * `-/AA/B      testing tmpfs
+	 *   `-/AA/B/BB testing tmpfs
+	 */
+	ASSERT_EQ(statx(fd_tree_subdir, "B", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(fd_tree_subdir, "B/BB", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	ASSERT_EQ(move_mount(fd_tree_subdir, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+	/*
+	 * /tmp/target1          testing tmpfs
+	 * `-/tmp/target1/B      testing tmpfs
+	 *   `-/tmp/target1/B/BB testing tmpfs
+	 */
+	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(-EBADF, "/tmp/target1/B", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(-EBADF, "/tmp/target1/B/BB", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target2", MOVE_MOUNT_F_EMPTY_PATH), 0);
+	/*
+	 * /tmp/target2                   testing tmpfs
+	 * |-/tmp/target2/A               testing tmpfs
+	 * | `-/tmp/target2/A/AA          testing tmpfs
+	 * |   `-/tmp/target2/A/AA/B      testing tmpfs
+	 * |     `-/tmp/target2/A/AA/B/BB testing tmpfs
+	 * `-/tmp/target2/B               testing ramfs
+	 */
+	ASSERT_EQ(statx(-EBADF, "/tmp/target2", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B/BB", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(-EBADF, "/tmp/target2/B", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	EXPECT_EQ(close(fd_tree_base), 0);
+	EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, open_tree_detached_fail)
+{
+	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+	struct statx stx;
+
+	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				     OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree_base, 0);
+	/*
+	 * /mnt                   testing tmpfs
+	 * |-/mnt/A               testing tmpfs
+	 * | `-/mnt/A/AA          testing tmpfs
+	 * |   `-/mnt/A/AA/B      testing tmpfs
+	 * |     `-/mnt/A/AA/B/BB testing tmpfs
+	 * `-/mnt/B               testing ramfs
+	 */
+	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+	/*
+	 * The origin mount namespace of the anonymous mount namespace
+	 * of @fd_tree_base doesn't match the caller's mount namespace
+	 * anymore so creation of another detached mounts must fail.
+	 */
+	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				       OPEN_TREE_CLONE);
+	ASSERT_LT(fd_tree_subdir, 0);
+	ASSERT_EQ(errno, EINVAL);
+}
+
+TEST_F(mount_setattr, open_tree_detached_fail2)
+{
+	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+	struct statx stx;
+
+	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				     OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree_base, 0);
+	/*
+	 * /mnt                   testing tmpfs
+	 * |-/mnt/A               testing tmpfs
+	 * | `-/mnt/A/AA          testing tmpfs
+	 * |   `-/mnt/A/AA/B      testing tmpfs
+	 * |     `-/mnt/A/AA/B/BB testing tmpfs
+	 * `-/mnt/B               testing ramfs
+	 */
+	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	EXPECT_EQ(create_and_enter_userns(), 0);
+
+	/*
+	 * The caller entered a new user namespace. They will have
+	 * CAP_SYS_ADMIN in this user namespace. However, they're still
+	 * located in a mount namespace that is owned by an ancestor
+	 * user namespace in which they hold no privilege. Creating a
+	 * detached mount must thus fail.
+	 */
+	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				       OPEN_TREE_CLONE);
+	ASSERT_LT(fd_tree_subdir, 0);
+	ASSERT_EQ(errno, EPERM);
+}
+
+TEST_F(mount_setattr, open_tree_detached_fail3)
+{
+	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+	struct statx stx;
+
+	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				     AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				     OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree_base, 0);
+	/*
+        * /mnt                   testing tmpfs
+        * |-/mnt/A               testing tmpfs
+        * | `-/mnt/A/AA          testing tmpfs
+        * |   `-/mnt/A/AA/B      testing tmpfs
+        * |     `-/mnt/A/AA/B/BB testing tmpfs
+        * `-/mnt/B               testing ramfs
+        */
+	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	EXPECT_EQ(prepare_unpriv_mountns(), 0);
+
+	/*
+        * The caller entered a new mount namespace. They will have
+        * CAP_SYS_ADMIN in the owning user namespace of their mount
+        * namespace.
+        *
+        * However, the origin mount namespace of the anonymous mount
+        * namespace of @fd_tree_base doesn't match the caller's mount
+        * namespace anymore so creation of another detached mounts must
+        * fail.
+        */
+	fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+			               AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				       AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				       OPEN_TREE_CLONE);
+	ASSERT_LT(fd_tree_subdir, 0);
+	ASSERT_EQ(errno, EINVAL);
+}
+
+TEST_F(mount_setattr, open_tree_subfolder)
+{
+	int fd_context, fd_tmpfs, fd_tree;
+
+	fd_context = sys_fsopen("tmpfs", 0);
+	ASSERT_GE(fd_context, 0);
+
+	ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+	fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+	ASSERT_GE(fd_tmpfs, 0);
+
+	EXPECT_EQ(close(fd_context), 0);
+
+	ASSERT_EQ(mkdirat(fd_tmpfs, "subdir", 0755), 0);
+
+	fd_tree = sys_open_tree(fd_tmpfs, "subdir",
+				AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree, 0);
+
+	EXPECT_EQ(close(fd_tmpfs), 0);
+
+	ASSERT_EQ(mkdirat(-EBADF, "/mnt/open_tree_subfolder", 0755), 0);
+
+	ASSERT_EQ(sys_move_mount(fd_tree, "", -EBADF, "/mnt/open_tree_subfolder", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+	EXPECT_EQ(close(fd_tree), 0);
+
+	ASSERT_EQ(umount2("/mnt/open_tree_subfolder", 0), 0);
+
+	EXPECT_EQ(rmdir("/mnt/open_tree_subfolder"), 0);
+}
+
+TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_then_close)
+{
+	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+	struct statx stx;
+
+	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree_base, 0);
+	/*
+	 * /mnt testing tmpfs
+	 */
+	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	fd_tree_subdir = sys_open_tree(fd_tree_base, "",
+				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				       AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
+				       OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree_subdir, 0);
+	/*
+	 * /mnt testing tmpfs
+	 */
+	ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
+	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	/*
+	 * /mnt   testing tmpfs
+	 * `-/mnt testing tmpfs
+	 */
+	ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+	ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+
+	EXPECT_EQ(close(fd_tree_base), 0);
+	EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_and_attach)
+{
+	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+	struct statx stx;
+	__u64 mnt_id = 0;
+
+	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree_base, 0);
+	/*
+	 * /mnt testing tmpfs
+	 */
+	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	fd_tree_subdir = sys_open_tree(fd_tree_base, "",
+				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				       AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
+				       OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree_subdir, 0);
+	/*
+	 * /mnt testing tmpfs
+	 */
+	ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
+	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	/*
+	 * /mnt   testing tmpfs
+	 * `-/mnt testing tmpfs
+	 */
+	ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+	ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
+	mnt_id = stx.stx_mnt_id;
+
+	ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+
+	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, STATX_MNT_ID_UNIQUE, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+	ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
+	ASSERT_EQ(stx.stx_mnt_id, mnt_id);
+
+	EXPECT_EQ(close(fd_tree_base), 0);
+	EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, move_mount_detached_fail)
+{
+	int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+	struct statx stx;
+
+	fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+				     AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				     OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree_base, 0);
+
+	/* Attach the mount to the caller's mount namespace. */
+	ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+	ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	fd_tree_subdir = sys_open_tree(-EBADF, "/tmp/B",
+				       AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				       OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree_subdir, 0);
+	ASSERT_EQ(statx(fd_tree_subdir, "BB", 0, 0, &stx), 0);
+	ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	/* Not allowed to move an attached mount to a detached mount. */
+	ASSERT_NE(move_mount(fd_tree_base, "", fd_tree_subdir, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+	ASSERT_EQ(errno, EINVAL);
+
+	EXPECT_EQ(close(fd_tree_base), 0);
+	EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, attach_detached_mount_then_umount_then_close)
+{
+	int fd_tree = -EBADF;
+	struct statx stx;
+
+	fd_tree = sys_open_tree(-EBADF, "/mnt",
+				AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree, 0);
+
+	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx), 0);
+	/* We copied with AT_RECURSIVE so /mnt/A must be a mountpoint. */
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	/* Attach the mount to the caller's mount namespace. */
+	ASSERT_EQ(move_mount(fd_tree, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+	ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
+	ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+	ASSERT_EQ(umount2("/tmp/target1", MNT_DETACH), 0);
+
+	/*
+	 * This tests whether dissolve_on_fput() handles a NULL mount
+	 * namespace correctly, i.e., that it doesn't splat.
+	 */
+	EXPECT_EQ(close(fd_tree), 0);
+}
+
+TEST_F(mount_setattr, mount_detached1_onto_detached2_then_close_detached1_then_mount_detached2_onto_attached)
+{
+	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
+
+	/*
+	 * |-/mnt/A               testing tmpfs
+	 *   `-/mnt/A/AA          testing tmpfs
+	 *     `-/mnt/A/AA/B      testing tmpfs
+	 *       `-/mnt/A/AA/B/BB testing tmpfs
+	 */
+	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
+				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				 OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree1, 0);
+
+	/*
+	 * `-/mnt/B testing ramfs
+	 */
+	fd_tree2 = sys_open_tree(-EBADF, "/mnt/B",
+				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
+				 OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree2, 0);
+
+	/*
+	 * Move the source detached mount tree to the target detached
+	 * mount tree. This will move all the mounts in the source mount
+	 * tree from the source anonymous mount namespace to the target
+	 * anonymous mount namespace.
+	 *
+	 * The source detached mount tree and the target detached mount
+	 * tree now both refer to the same anonymous mount namespace.
+	 *
+	 * |-""                 testing ramfs
+	 *   `-""               testing tmpfs
+	 *     `-""/AA          testing tmpfs
+	 *       `-""/AA/B      testing tmpfs
+	 *         `-""/AA/B/BB testing tmpfs
+	 */
+	ASSERT_EQ(move_mount(fd_tree1, "", fd_tree2, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+
+	/*
+	 * The source detached mount tree @fd_tree1 is now an attached
+	 * mount, i.e., it has a parent. Specifically, it now has the
+	 * root mount of the mount tree of @fd_tree2 as its parent.
+	 *
+	 * That means we are no longer allowed to attach it as we only
+	 * allow attaching the root of an anonymous mount tree, not
+	 * random bits and pieces. Verify that the kernel enforces this.
+	 */
+	ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+	/*
+	 * Closing the source detached mount tree must not unmount and
+	 * free the shared anonymous mount namespace. The kernel will
+	 * quickly yell at us because the anonymous mount namespace
+	 * won't be empty when it's freed.
+	 */
+	EXPECT_EQ(close(fd_tree1), 0);
+
+	/*
+	 * Attach the mount tree to a non-anonymous mount namespace.
+	 * This can only succeed if closing fd_tree1 had proper
+	 * semantics and didn't cause the anonymous mount namespace to
+	 * be freed. If it did this will trigger a UAF which will be
+	 * visible on any KASAN enabled kernel.
+	 *
+	 * |-/tmp/target1                 testing ramfs
+	 *   `-/tmp/target1               testing tmpfs
+	 *     `-/tmp/target1/AA          testing tmpfs
+	 *       `-/tmp/target1/AA/B      testing tmpfs
+	 *         `-/tmp/target1/AA/B/BB testing tmpfs
+	 */
+	ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+	EXPECT_EQ(close(fd_tree2), 0);
+}
+
+TEST_F(mount_setattr, two_detached_mounts_referring_to_same_anonymous_mount_namespace)
+{
+	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
+
+	/*
+	 * Copy the following mount tree:
+	 *
+	 * |-/mnt/A               testing tmpfs
+	 *   `-/mnt/A/AA          testing tmpfs
+	 *     `-/mnt/A/AA/B      testing tmpfs
+	 *       `-/mnt/A/AA/B/BB testing tmpfs
+	 */
+	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
+				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				 OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree1, 0);
+
+	/*
+	 * Create an O_PATH file descriptors with a separate struct file
+	 * that refers to the same detached mount tree as @fd_tree1
+	 */
+	fd_tree2 = sys_open_tree(fd_tree1, "",
+				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(fd_tree2, 0);
+
+	/*
+	 * Copy the following mount tree:
+	 *
+	 * |-/tmp/target1               testing tmpfs
+	 *   `-/tmp/target1/AA          testing tmpfs
+	 *     `-/tmp/target1/AA/B      testing tmpfs
+	 *       `-/tmp/target1/AA/B/BB testing tmpfs
+	 */
+	ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+	/*
+	 * This must fail as this would mean adding the same mount tree
+	 * into the same mount tree.
+	 */
+	ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+}
+
+TEST_F(mount_setattr, two_detached_subtrees_of_same_anonymous_mount_namespace)
+{
+	int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
+
+	/*
+	 * Copy the following mount tree:
+	 *
+	 * |-/mnt/A               testing tmpfs
+	 *   `-/mnt/A/AA          testing tmpfs
+	 *     `-/mnt/A/AA/B      testing tmpfs
+	 *       `-/mnt/A/AA/B/BB testing tmpfs
+	 */
+	fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
+				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				 OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree1, 0);
+
+	/*
+	 * Create an O_PATH file descriptors with a separate struct file that
+	 * refers to a subtree of the same detached mount tree as @fd_tree1
+	 */
+	fd_tree2 = sys_open_tree(fd_tree1, "AA",
+				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				 AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
+	ASSERT_GE(fd_tree2, 0);
+
+	/*
+	 * This must fail as it is only possible to attach the root of a
+	 * detached mount tree.
+	 */
+	ASSERT_NE(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+	ASSERT_EQ(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+}
+
+TEST_F(mount_setattr, detached_tree_propagation)
+{
+	int fd_tree = -EBADF;
+	struct statx stx1, stx2, stx3, stx4;
+
+	ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+	ASSERT_EQ(mount(NULL, "/mnt", NULL, MS_REC | MS_SHARED, NULL), 0);
+
+	/*
+	 * Copy the following mount tree:
+	 *
+         * /mnt                   testing tmpfs
+         * |-/mnt/A               testing tmpfs
+         * | `-/mnt/A/AA          testing tmpfs
+         * |   `-/mnt/A/AA/B      testing tmpfs
+         * |     `-/mnt/A/AA/B/BB testing tmpfs
+         * `-/mnt/B               testing ramfs
+	 */
+	fd_tree = sys_open_tree(-EBADF, "/mnt",
+				 AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+				 AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+				 OPEN_TREE_CLONE);
+	ASSERT_GE(fd_tree, 0);
+
+	ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx1), 0);
+	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx2), 0);
+
+	/*
+	 * Copying the mount namespace like done above doesn't alter the
+	 * mounts in any way so the filesystem mounted on /mnt must be
+	 * identical even though the mounts will differ. Use the device
+	 * information to verify that. Note that tmpfs will have a 0
+	 * major number so comparing the major number is misleading.
+	 */
+	ASSERT_EQ(stx1.stx_dev_minor, stx2.stx_dev_minor);
+
+	/* Mount a tmpfs filesystem over /mnt/A. */
+	ASSERT_EQ(mount(NULL, "/mnt/A", "tmpfs", 0, NULL), 0);
+
+
+	ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx3), 0);
+	ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx4), 0);
+
+	/*
+	 * A new filesystem has been mounted on top of /mnt/A which
+	 * means that the device information will be different for any
+	 * statx() that was taken from /mnt/A before the mount compared
+	 * to one after the mount.
+	 *
+	 * Since we already now that the device information between the
+	 * stx1 and stx2 samples are identical we also now that stx2 and
+	 * stx3 device information will necessarily differ.
+	 */
+	ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor);
+
+	/*
+	 * If mount propagation worked correctly then the tmpfs mount
+	 * that was created after the mount namespace was unshared will
+	 * have propagated onto /mnt/A in the detached mount tree.
+	 *
+	 * Verify that the device information for stx3 and stx4 are
+	 * identical. It is already established that stx3 is different
+	 * from both stx1 and stx2 sampled before the tmpfs mount was
+	 * done so if stx3 and stx4 are identical the proof is done.
+	 */
+	ASSERT_EQ(stx3.stx_dev_minor, stx4.stx_dev_minor);
+
+	EXPECT_EQ(close(fd_tree), 0);
+}
+
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index cb2fc601de66..8f32b4f01aee 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -32,6 +32,7 @@ TEST_PROGS += ioam6.sh
 TEST_PROGS += gro.sh
 TEST_PROGS += gre_gso.sh
 TEST_PROGS += cmsg_so_mark.sh
+TEST_PROGS += cmsg_so_priority.sh
 TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
 TEST_PROGS += netns-name.sh
 TEST_PROGS += nl_netdev.py
@@ -95,9 +96,11 @@ TEST_PROGS += test_bridge_backup_port.sh
 TEST_PROGS += fdb_flush.sh fdb_notify.sh
 TEST_PROGS += fq_band_pktlimit.sh
 TEST_PROGS += vlan_hw_filter.sh
+TEST_PROGS += vlan_bridge_binding.sh
 TEST_PROGS += bpf_offload.py
 TEST_PROGS += ipv6_route_update_soft_lockup.sh
 TEST_PROGS += busy_poll_test.sh
+TEST_PROGS += lwt_dst_cache_ref_loop.sh
 
 # YNL files, must be before "include ..lib.mk"
 YNL_GEN_FILES := busy_poller netlink-dumps
diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py
index d10f420e4ef6..fd0d959914e4 100755
--- a/tools/testing/selftests/net/bpf_offload.py
+++ b/tools/testing/selftests/net/bpf_offload.py
@@ -215,12 +215,14 @@ def bpftool_map_list_wait(expected=0, n_retry=20, ns=""):
     raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
 
 def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None,
-                      fail=True, include_stderr=False):
+                      fail=True, include_stderr=False, dev_bind=None):
     args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name)
     if prog_type is not None:
         args += " type " + prog_type
     if dev is not None:
         args += " dev " + dev
+    elif dev_bind is not None:
+        args += " xdpmeta_dev " + dev_bind
     if len(maps):
         args += " map " + " map ".join(maps)
 
@@ -980,6 +982,16 @@ try:
     rm("/sys/fs/bpf/offload")
     sim.wait_for_flush()
 
+    bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/devbound",
+                      dev_bind=sim['ifname'])
+    devbound = bpf_pinned("/sys/fs/bpf/devbound")
+    start_test("Test dev-bound program in generic mode...")
+    ret, _, err = sim.set_xdp(devbound, "generic", fail=False, include_stderr=True)
+    fail(ret == 0, "devbound program in generic mode allowed")
+    check_extack(err, "Can't attach device-bound programs in generic mode.", args)
+    rm("/sys/fs/bpf/devbound")
+    sim.wait_for_flush()
+
     start_test("Test XDP load failure...")
     sim.dfs["dev/bpf_bind_verifier_accept"] = 0
     ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload",
diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c
index 99b0e8c17fca..04c7ff577bb8 100644
--- a/tools/testing/selftests/net/busy_poller.c
+++ b/tools/testing/selftests/net/busy_poller.c
@@ -54,16 +54,16 @@ struct epoll_params {
 #define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
 #endif
 
-static uint32_t cfg_port = 8000;
+static uint16_t cfg_port = 8000;
 static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY };
 static char *cfg_outfile;
 static int cfg_max_events = 8;
-static int cfg_ifindex;
+static uint32_t cfg_ifindex;
 
 /* busy poll params */
 static uint32_t cfg_busy_poll_usecs;
-static uint32_t cfg_busy_poll_budget;
-static uint32_t cfg_prefer_busy_poll;
+static uint16_t cfg_busy_poll_budget;
+static uint8_t cfg_prefer_busy_poll;
 
 /* IRQ params */
 static uint32_t cfg_defer_hard_irqs;
@@ -79,6 +79,7 @@ static void usage(const char *filepath)
 
 static void parse_opts(int argc, char **argv)
 {
+	unsigned long long tmp;
 	int ret;
 	int c;
 
@@ -86,31 +87,40 @@ static void parse_opts(int argc, char **argv)
 		usage(argv[0]);
 
 	while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) {
+		/* most options take integer values, except o and b, so reduce
+		 * code duplication a bit for the common case by calling
+		 * strtoull here and leave bounds checking and casting per
+		 * option below.
+		 */
+		if (c != 'o' && c != 'b')
+			tmp = strtoull(optarg, NULL, 0);
+
 		switch (c) {
 		case 'u':
-			cfg_busy_poll_usecs = strtoul(optarg, NULL, 0);
-			if (cfg_busy_poll_usecs == ULONG_MAX ||
-			    cfg_busy_poll_usecs > UINT32_MAX)
+			if (tmp == ULLONG_MAX || tmp > UINT32_MAX)
 				error(1, ERANGE, "busy_poll_usecs too large");
+
+			cfg_busy_poll_usecs = (uint32_t)tmp;
 			break;
 		case 'P':
-			cfg_prefer_busy_poll = strtoul(optarg, NULL, 0);
-			if (cfg_prefer_busy_poll == ULONG_MAX ||
-			    cfg_prefer_busy_poll > 1)
+			if (tmp == ULLONG_MAX || tmp > 1)
 				error(1, ERANGE,
 				      "prefer busy poll should be 0 or 1");
+
+			cfg_prefer_busy_poll = (uint8_t)tmp;
 			break;
 		case 'g':
-			cfg_busy_poll_budget = strtoul(optarg, NULL, 0);
-			if (cfg_busy_poll_budget == ULONG_MAX ||
-			    cfg_busy_poll_budget > UINT16_MAX)
+			if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
 				error(1, ERANGE,
 				      "busy poll budget must be [0, UINT16_MAX]");
+
+			cfg_busy_poll_budget = (uint16_t)tmp;
 			break;
 		case 'p':
-			cfg_port = strtoul(optarg, NULL, 0);
-			if (cfg_port > UINT16_MAX)
+			if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
 				error(1, ERANGE, "port must be <= 65535");
+
+			cfg_port = (uint16_t)tmp;
 			break;
 		case 'b':
 			ret = inet_aton(optarg, &cfg_bind_addr);
@@ -124,41 +134,39 @@ static void parse_opts(int argc, char **argv)
 				error(1, 0, "outfile invalid");
 			break;
 		case 'm':
-			cfg_max_events = strtol(optarg, NULL, 0);
-
-			if (cfg_max_events == LONG_MIN ||
-			    cfg_max_events == LONG_MAX ||
-			    cfg_max_events <= 0)
+			if (tmp == ULLONG_MAX || tmp > INT_MAX)
 				error(1, ERANGE,
-				      "max events must be > 0 and < LONG_MAX");
+				      "max events must be > 0 and <= INT_MAX");
+
+			cfg_max_events = (int)tmp;
 			break;
 		case 'd':
-			cfg_defer_hard_irqs = strtoul(optarg, NULL, 0);
-
-			if (cfg_defer_hard_irqs == ULONG_MAX ||
-			    cfg_defer_hard_irqs > INT32_MAX)
+			if (tmp == ULLONG_MAX || tmp > INT32_MAX)
 				error(1, ERANGE,
 				      "defer_hard_irqs must be <= INT32_MAX");
+
+			cfg_defer_hard_irqs = (uint32_t)tmp;
 			break;
 		case 'r':
-			cfg_gro_flush_timeout = strtoull(optarg, NULL, 0);
-
-			if (cfg_gro_flush_timeout == ULLONG_MAX)
+			if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
 				error(1, ERANGE,
-				      "gro_flush_timeout must be < ULLONG_MAX");
+				      "gro_flush_timeout must be < UINT64_MAX");
+
+			cfg_gro_flush_timeout = (uint64_t)tmp;
 			break;
 		case 's':
-			cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0);
-
-			if (cfg_irq_suspend_timeout == ULLONG_MAX)
+			if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
 				error(1, ERANGE,
 				      "irq_suspend_timeout must be < ULLONG_MAX");
+
+			cfg_irq_suspend_timeout = (uint64_t)tmp;
 			break;
 		case 'i':
-			cfg_ifindex = strtoul(optarg, NULL, 0);
-			if (cfg_ifindex == ULONG_MAX)
+			if (tmp == ULLONG_MAX || tmp > INT_MAX)
 				error(1, ERANGE,
-				      "ifindex must be < ULONG_MAX");
+				      "ifindex must be <= INT_MAX");
+
+			cfg_ifindex = (int)tmp;
 			break;
 		}
 	}
@@ -215,7 +223,7 @@ static void setup_queue(void)
 	struct netdev_napi_set_req *set_req = NULL;
 	struct ynl_sock *ys;
 	struct ynl_error yerr;
-	uint32_t napi_id;
+	uint32_t napi_id = 0;
 
 	ys = ynl_sock_create(&ynl_netdev_family, &yerr);
 	if (!ys)
@@ -277,8 +285,8 @@ static void run_poller(void)
 	 * here
 	 */
 	epoll_params.busy_poll_usecs = cfg_busy_poll_usecs;
-	epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget;
-	epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll;
+	epoll_params.busy_poll_budget = cfg_busy_poll_budget;
+	epoll_params.prefer_busy_poll = cfg_prefer_busy_poll;
 	epoll_params.__pad = 0;
 
 	val = 1;
@@ -342,5 +350,9 @@ int main(int argc, char *argv[])
 	parse_opts(argc, argv);
 	setup_queue();
 	run_poller();
+
+	if (cfg_outfile)
+		free(cfg_outfile);
+
 	return 0;
 }
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 876c2db02a63..bc314382e4e1 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -59,6 +59,7 @@ struct options {
 		unsigned int proto;
 	} sock;
 	struct option_cmsg_u32 mark;
+	struct option_cmsg_u32 priority;
 	struct {
 		bool ena;
 		unsigned int delay;
@@ -97,6 +98,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
 	       "\n"
 	       "\t\t-m val  Set SO_MARK with given value\n"
 	       "\t\t-M val  Set SO_MARK via setsockopt\n"
+	       "\t\t-P val  Set SO_PRIORITY via setsockopt\n"
+	       "\t\t-Q val  Set SO_PRIORITY via cmsg\n"
 	       "\t\t-d val  Set SO_TXTIME with given delay (usec)\n"
 	       "\t\t-t      Enable time stamp reporting\n"
 	       "\t\t-f val  Set don't fragment via cmsg\n"
@@ -115,7 +118,7 @@ static void cs_parse_args(int argc, char *argv[])
 {
 	int o;
 
-	while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) {
+	while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:Q:")) != -1) {
 		switch (o) {
 		case 's':
 			opt.silent_send = true;
@@ -148,6 +151,10 @@ static void cs_parse_args(int argc, char *argv[])
 			opt.mark.ena = true;
 			opt.mark.val = atoi(optarg);
 			break;
+		case 'Q':
+			opt.priority.ena = true;
+			opt.priority.val = atoi(optarg);
+			break;
 		case 'M':
 			opt.sockopt.mark = atoi(optarg);
 			break;
@@ -253,6 +260,8 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
 			  SOL_SOCKET, SO_MARK, &opt.mark);
 	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+			  SOL_SOCKET, SO_PRIORITY, &opt.priority);
+	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
 			  SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag);
 	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
 			  SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass);
diff --git a/tools/testing/selftests/net/cmsg_so_priority.sh b/tools/testing/selftests/net/cmsg_so_priority.sh
new file mode 100755
index 000000000000..ee07d8653262
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_so_priority.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+readonly KSFT_SKIP=4
+
+IP4=192.0.2.1/24
+TGT4=192.0.2.2
+TGT4_RAW=192.0.2.3
+IP6=2001:db8::1/64
+TGT6=2001:db8::2
+TGT6_RAW=2001:db8::3
+PORT=1234
+TOTAL_TESTS=0
+FAILED_TESTS=0
+
+if ! command -v jq &> /dev/null; then
+    echo "SKIP cmsg_so_priroity.sh test: jq is not installed." >&2
+    exit "$KSFT_SKIP"
+fi
+
+check_result() {
+    ((TOTAL_TESTS++))
+    if [ "$1" -ne 0 ]; then
+        ((FAILED_TESTS++))
+    fi
+}
+
+cleanup()
+{
+    cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+setup_ns NS
+
+create_filter() {
+    local handle=$1
+    local vlan_prio=$2
+    local ip_type=$3
+    local proto=$4
+    local dst_ip=$5
+    local ip_proto
+
+    if [[ "$proto" == "u" ]]; then
+        ip_proto="udp"
+    elif [[ "$ip_type" == "ipv4" && "$proto" == "i" ]]; then
+        ip_proto="icmp"
+    elif [[ "$ip_type" == "ipv6" && "$proto" == "i" ]]; then
+        ip_proto="icmpv6"
+    fi
+
+    tc -n $NS filter add dev dummy1 \
+        egress pref 1 handle "$handle" proto 802.1q \
+        flower vlan_prio "$vlan_prio" vlan_ethtype "$ip_type" \
+        dst_ip "$dst_ip" ${ip_proto:+ip_proto $ip_proto} \
+        action pass
+}
+
+ip -n $NS link set dev lo up
+ip -n $NS link add name dummy1 up type dummy
+
+ip -n $NS link add link dummy1 name dummy1.10 up type vlan id 10 \
+    egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+ip -n $NS address add $IP4 dev dummy1.10
+ip -n $NS address add $IP6 dev dummy1.10 nodad
+
+ip netns exec $NS sysctl -wq net.ipv4.ping_group_range='0 2147483647'
+
+ip -n $NS neigh add $TGT4 lladdr 00:11:22:33:44:55 nud permanent \
+    dev dummy1.10
+ip -n $NS neigh add $TGT6 lladdr 00:11:22:33:44:55 nud permanent \
+    dev dummy1.10
+ip -n $NS neigh add $TGT4_RAW lladdr 00:11:22:33:44:66 nud permanent \
+    dev dummy1.10
+ip -n $NS neigh add $TGT6_RAW lladdr 00:11:22:33:44:66 nud permanent \
+    dev dummy1.10
+
+tc -n $NS qdisc add dev dummy1 clsact
+
+FILTER_COUNTER=10
+
+for i in 4 6; do
+    for proto in u i r; do
+        echo "Test IPV$i, prot: $proto"
+        for priority in {0..7}; do
+            if [[ $i == 4 && $proto == "r" ]]; then
+                TGT=$TGT4_RAW
+            elif [[ $i == 6 && $proto == "r" ]]; then
+                TGT=$TGT6_RAW
+            elif [ $i == 4 ]; then
+                TGT=$TGT4
+            else
+                TGT=$TGT6
+            fi
+
+            handle="${FILTER_COUNTER}${priority}"
+
+            create_filter $handle $priority ipv$i $proto $TGT
+
+            pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \
+                | jq ".[] | select(.options.handle == ${handle}) | \
+                .options.actions[0].stats.packets")
+
+            if [[ $pkts == 0 ]]; then
+                check_result 0
+            else
+                echo "prio $priority: expected 0, got $pkts"
+                check_result 1
+            fi
+
+            ip netns exec $NS ./cmsg_sender -$i -Q $priority \
+	            -p $proto $TGT $PORT
+
+            pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \
+                | jq ".[] | select(.options.handle == ${handle}) | \
+                .options.actions[0].stats.packets")
+            if [[ $pkts == 1 ]]; then
+                check_result 0
+            else
+                echo "prio $priority -Q: expected 1, got $pkts"
+                check_result 1
+            fi
+
+            ip netns exec $NS ./cmsg_sender -$i -P $priority \
+	            -p $proto $TGT $PORT
+
+            pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \
+                | jq ".[] | select(.options.handle == ${handle}) | \
+                .options.actions[0].stats.packets")
+            if [[ $pkts == 2 ]]; then
+                check_result 0
+            else
+                echo "prio $priority -P: expected 2, got $pkts"
+                check_result 1
+            fi
+        done
+        FILTER_COUNTER=$((FILTER_COUNTER + 10))
+    done
+done
+
+if [ $FAILED_TESTS -ne 0 ]; then
+    echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed"
+    exit 1
+else
+    echo "OK - All $TOTAL_TESTS tests passed"
+    exit 0
+fi
diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh
index 1d7e756644bc..478af0aefa97 100755
--- a/tools/testing/selftests/net/cmsg_time.sh
+++ b/tools/testing/selftests/net/cmsg_time.sh
@@ -34,13 +34,28 @@ BAD=0
 TOTAL=0
 
 check_result() {
+    local ret=$1
+    local got=$2
+    local exp=$3
+    local case=$4
+    local xfail=$5
+    local xf=
+    local inc=
+
+    if [ "$xfail" == "xfail" ]; then
+	xf="(XFAIL)"
+	inc=0
+    else
+	inc=1
+    fi
+
     ((TOTAL++))
-    if [ $1 -ne 0 ]; then
-	echo "  Case $4 returned $1, expected 0"
-	((BAD++))
+    if [ $ret -ne 0 ]; then
+	echo "  Case $case returned $ret, expected 0 $xf"
+	((BAD+=inc))
     elif [ "$2" != "$3" ]; then
-	echo "  Case $4 returned '$2', expected '$3'"
-	((BAD++))
+	echo "  Case $case returned '$got', expected '$exp' $xf"
+	((BAD+=inc))
     fi
 }
 
@@ -66,14 +81,14 @@ for i in "-4 $TGT4" "-6 $TGT6"; do
 		 awk '/SND/ { if ($3 > 1000) print "OK"; }')
 	check_result $? "$ts" "OK" "$prot - TXTIME abs"
 
-	[ "$KSFT_MACHINE_SLOW" = yes ] && delay=8000 || delay=1000
+	[ "$KSFT_MACHINE_SLOW" = yes ] && xfail=xfail
 
-	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d $delay |
+	ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
 		 awk '/SND/ {snd=$3}
 		      /SCHED/ {sch=$3}
-		      END { if (snd - sch > '$((delay/2))') print "OK";
-			    else print snd, "-", sch, "<", '$((delay/2))'; }')
-	check_result $? "$ts" "OK" "$prot - TXTIME rel"
+		      END { if (snd - sch > 500) print "OK";
+			    else print snd, "-", sch, "<", 500; }')
+	check_result $? "$ts" "OK" "$prot - TXTIME rel" $xfail
     done
 done
 
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 5b9baf708950..61e5116987f3 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -107,3 +107,5 @@ CONFIG_XFRM_INTERFACE=m
 CONFIG_XFRM_USER=m
 CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IPV6_ILA=m
+CONFIG_IPV6_RPL_LWTUNNEL=y
diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh
index c03151e7791c..c159230c9b62 100755
--- a/tools/testing/selftests/net/fdb_notify.sh
+++ b/tools/testing/selftests/net/fdb_notify.sh
@@ -49,7 +49,7 @@ test_dup_vxlan_self()
 {
 	ip_link_add br up type bridge vlan_filtering 1
 	ip_link_add vx up type vxlan id 2000 dstport 4789
-	ip_link_master vx br
+	ip_link_set_master vx br
 
 	do_test_dup add "vxlan" dev vx self dst 192.0.2.1
 	do_test_dup del "vxlan" dev vx self dst 192.0.2.1
@@ -59,7 +59,7 @@ test_dup_vxlan_master()
 {
 	ip_link_add br up type bridge vlan_filtering 1
 	ip_link_add vx up type vxlan id 2000 dstport 4789
-	ip_link_master vx br
+	ip_link_set_master vx br
 
 	do_test_dup add "vxlan master" dev vx master
 	do_test_dup del "vxlan master" dev vx master
@@ -79,7 +79,7 @@ test_dup_macvlan_master()
 	ip_link_add br up type bridge vlan_filtering 1
 	ip_link_add dd up type dummy
 	ip_link_add mv up link dd type macvlan mode passthru
-	ip_link_master mv br
+	ip_link_set_master mv br
 
 	do_test_dup add "macvlan master" dev mv self
 	do_test_dup del "macvlan master" dev mv self
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 1d58b3b87465..847936363a12 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -291,6 +291,37 @@ fib_rule6_test()
 			"$getnomatch" "iif dscp redirect to table" \
 			"iif dscp no redirect to table"
 	fi
+
+	fib_check_iproute_support "flowlabel" "flowlabel"
+	if [ $? -eq 0 ]; then
+		match="flowlabel 0xfffff"
+		getmatch="flowlabel 0xfffff"
+		getnomatch="flowlabel 0xf"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "flowlabel redirect to table" \
+			"flowlabel no redirect to table"
+
+		match="flowlabel 0xfffff"
+		getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff"
+		getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "iif flowlabel redirect to table" \
+			"iif flowlabel no redirect to table"
+
+		match="flowlabel 0x08000/0x08000"
+		getmatch="flowlabel 0xfffff"
+		getnomatch="flowlabel 0xf7fff"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "flowlabel masked redirect to table" \
+			"flowlabel masked no redirect to table"
+
+		match="flowlabel 0x08000/0x08000"
+		getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff"
+		getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf7fff"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "iif flowlabel masked redirect to table" \
+			"iif flowlabel masked no redirect to table"
+	fi
 }
 
 fib_rule6_vrf_test()
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 7d885cff8d79..00bde7b6f39e 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -105,6 +105,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
 	vxlan_bridge_1q_port_8472_ipv6.sh \
 	vxlan_bridge_1q_port_8472.sh \
 	vxlan_bridge_1q.sh \
+	vxlan_reserved.sh \
 	vxlan_symmetric_ipv6.sh \
 	vxlan_symmetric.sh
 
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index 1c8a26046589..2b5700b61ffa 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding pvid_change"
 NUM_NETIFS=4
 source lib.sh
 
@@ -77,12 +77,16 @@ cleanup()
 
 ping_ipv4()
 {
-	ping_test $h1 192.0.2.2
+	local msg=$1
+
+	ping_test $h1 192.0.2.2 "$msg"
 }
 
 ping_ipv6()
 {
-	ping6_test $h1 2001:db8:1::2
+	local msg=$1
+
+	ping6_test $h1 2001:db8:1::2 "$msg"
 }
 
 learning()
@@ -95,6 +99,21 @@ flooding()
 	flood_test $swp2 $h1 $h2
 }
 
+pvid_change()
+{
+	# Test that the changing of the VLAN-aware PVID does not affect
+	# VLAN-unaware forwarding
+	bridge vlan add vid 3 dev $swp1 pvid untagged
+
+	ping_ipv4 " with bridge port $swp1 PVID changed"
+	ping_ipv6 " with bridge port $swp1 PVID changed"
+
+	bridge vlan del vid 3 dev $swp1
+
+	ping_ipv4 " with bridge port $swp1 PVID deleted"
+	ping_ipv6 " with bridge port $swp1 PVID deleted"
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 7337f398f9cc..8de80acf249e 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -68,6 +68,7 @@ declare -A NETIFS=(
 : "${REQUIRE_JQ:=yes}"
 : "${REQUIRE_MZ:=yes}"
 : "${REQUIRE_MTOOLS:=no}"
+: "${REQUIRE_TEAMD:=no}"
 
 # Whether to override MAC addresses on interfaces participating in the test.
 : "${STABLE_MAC_ADDRS:=no}"
@@ -321,6 +322,9 @@ fi
 if [[ "$REQUIRE_MZ" = "yes" ]]; then
 	require_command $MZ
 fi
+if [[ "$REQUIRE_TEAMD" = "yes" ]]; then
+	require_command $TEAMD
+fi
 if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
 	# https://github.com/troglobit/mtools
 	require_command msend
@@ -932,13 +936,6 @@ packets_rate()
 	echo $(((t1 - t0) / interval))
 }
 
-mac_get()
-{
-	local if_name=$1
-
-	ip -j link show dev $if_name | jq -r '.[]["address"]'
-}
-
 ether_addr_to_u64()
 {
 	local addr="$1"
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
index fe4d7c906a70..a20d22d1df36 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -49,6 +49,7 @@ ALL_TESTS="
 	test_mirror_gretap_second
 "
 
+REQUIRE_TEAMD="yes"
 NUM_NETIFS=6
 source lib.sh
 source mirror_lib.sh
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
index 1261e6f46e34..ff7049582d35 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
@@ -53,6 +53,7 @@ ALL_TESTS="
 	test_mirror_gretap_second
 "
 
+REQUIRE_TEAMD="yes"
 NUM_NETIFS=6
 source lib.sh
 source mirror_lib.sh
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
index e064b946e821..16583a470ec3 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
@@ -109,6 +109,7 @@ ALL_TESTS="
 	ping_ipv4
 	ping_ipv6
 "
+REQUIRE_TEAMD="yes"
 NUM_NETIFS=8
 source lib.sh
 
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
index f05ffe213c46..2a4cd1af1b85 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
@@ -76,6 +76,7 @@
 	ping_ipv4
 	ping_ipv6
     "}
+REQUIRE_TEAMD="yes"
 NUM_NETIFS=8
 : ${lib_dir:=.}
 source $lib_dir/lib.sh
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
index 3885a2a91f7d..baed5e380dae 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
@@ -20,6 +20,7 @@ ALL_TESTS="
 	test_port_range_ipv4_tcp
 	test_port_range_ipv6_udp
 	test_port_range_ipv6_tcp
+	test_port_range_ipv4_udp_drop
 "
 
 NUM_NETIFS=4
@@ -194,6 +195,51 @@ test_port_range_ipv6_tcp()
 	__test_port_range $proto $ip_proto $sip $dip $mode "$name"
 }
 
+test_port_range_ipv4_udp_drop()
+{
+	local proto=ipv4
+	local ip_proto=udp
+	local sip=192.0.2.1
+	local dip=192.0.2.2
+	local mode="-4"
+	local name="IPv4 UDP Drop"
+	local dmac=$(mac_get $h2)
+	local smac=$(mac_get $h1)
+	local sport_min=2000
+	local sport_max=3000
+	local sport_mid=$((sport_min + (sport_max - sport_min) / 2))
+	local dport=5000
+
+	RET=0
+
+	tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \
+		flower src_ip $sip dst_ip $dip ip_proto $ip_proto \
+		src_port $sport_min-$sport_max \
+		dst_port $dport \
+		action drop
+
+	# Test ports outside range - should pass
+	$MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+		-t $ip_proto "sp=$((sport_min - 1)),dp=$dport"
+	$MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+		-t $ip_proto "sp=$((sport_max + 1)),dp=$dport"
+
+	# Test ports inside range - should be dropped
+	$MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+		-t $ip_proto "sp=$sport_min,dp=$dport"
+	$MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+		-t $ip_proto "sp=$sport_mid,dp=$dport"
+	$MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+		-t $ip_proto "sp=$sport_max,dp=$dport"
+
+	tc_check_packets "dev $swp1 ingress" 101 3
+	check_err $? "Filter did not drop the expected number of packets"
+
+	tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower
+
+	log_test "Port range matching - $name"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh
new file mode 100755
index 000000000000..46c31794b91b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh
@@ -0,0 +1,352 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+
+# | H1 (vrf)           |
+# |    + $h1           |
+# |    | 192.0.2.1/28  |
+# +----|---------------+
+#      |
+# +----|--------------------------------+
+# | SW |                                |
+# | +--|------------------------------+ |
+# | |  + $swp1           BR1 (802.1d) | |
+# | |                                 | |
+# | |  + vx1 (vxlan)                  | |
+# | |    local 192.0.2.17             | |
+# | |    id 1000 dstport $VXPORT      | |
+# | +---------------------------------+ |
+# |                                     |
+# |  192.0.2.32/28 via 192.0.2.18       |
+# |                                     |
+# |  + $rp1                             |
+# |  | 192.0.2.17/28                    |
+# +--|----------------------------------+
+#    |
+# +--|----------------------------------+
+# |  |                                  |
+# |  + $rp2                             |
+# |    192.0.2.18/28                    |
+# |                                     |
+# | VRP2 (vrf)                          |
+# +-------------------------------------+
+
+: ${VXPORT:=4789}
+: ${ALL_TESTS:="
+	default_test
+	plain_test
+	reserved_0_test
+	reserved_10_test
+	reserved_31_test
+	reserved_56_test
+	reserved_63_test
+    "}
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+	defer simple_if_fini $h1 192.0.2.1/28
+
+	tc qdisc add dev $h1 clsact
+	defer tc qdisc del dev $h1 clsact
+
+	tc filter add dev $h1 ingress pref 77 \
+	   prot ip flower skip_hw ip_proto icmp action drop
+	defer tc filter del dev $h1 ingress pref 77
+}
+
+switch_create()
+{
+	ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip_link_set_addr br1 $(mac_get $swp1)
+	ip_link_set_up br1
+
+	ip_link_set_up $rp1
+	ip_addr_add $rp1 192.0.2.17/28
+	ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18
+
+	ip_link_set_master $swp1 br1
+	ip_link_set_up $swp1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 192.0.2.18/28
+	defer simple_if_fini $rp2 192.0.2.18/28
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	vrf_prepare
+	defer vrf_cleanup
+
+	forwarding_enable
+	defer forwarding_restore
+
+	h1_create
+	switch_create
+
+	vrp2_create
+}
+
+vxlan_header_bytes()
+{
+	local vni=$1; shift
+	local -a extra_bits=("$@")
+	local -a bits
+	local i
+
+	for ((i=0; i < 64; i++)); do
+		bits[i]=0
+	done
+
+	# Bit 4 is the I flag and is always on.
+	bits[4]=1
+
+	for i in ${extra_bits[@]}; do
+		bits[i]=1
+	done
+
+	# Bits 32..55 carry the VNI
+	local mask=0x800000
+	for ((i=0; i < 24; i++)); do
+		bits[$((i + 32))]=$(((vni & mask) != 0))
+		((mask >>= 1))
+	done
+
+	local bytes
+	for ((i=0; i < 8; i++)); do
+		local byte=0
+		local j
+		for ((j=0; j < 8; j++)); do
+			local bit=${bits[8 * i + j]}
+			((byte += bit << (7 - j)))
+		done
+		bytes+=$(printf %02x $byte):
+	done
+
+	echo ${bytes%:}
+}
+
+neg_bytes()
+{
+	local bytes=$1; shift
+
+	local -A neg=([0]=f [1]=e [2]=d [3]=c [4]=b [5]=a [6]=9 [7]=8
+		      [8]=7 [9]=6 [a]=5 [b]=4 [c]=3 [d]=2 [e]=1 [f]=0 [:]=:)
+	local out
+	local i
+
+	for ((i=0; i < ${#bytes}; i++)); do
+		local c=${bytes:$i:1}
+		out+=${neg[$c]}
+	done
+	echo $out
+}
+
+vxlan_ping_do()
+{
+	local count=$1; shift
+	local dev=$1; shift
+	local next_hop_mac=$1; shift
+	local dest_ip=$1; shift
+	local dest_mac=$1; shift
+	local vni=$1; shift
+	local reserved_bits=$1; shift
+
+	local vxlan_header=$(vxlan_header_bytes $vni $reserved_bits)
+
+	$MZ $dev -c $count -d 100msec -q \
+		-b $next_hop_mac -B $dest_ip \
+		-t udp sp=23456,dp=$VXPORT,p=$(:
+		    )"$vxlan_header:"$(              : VXLAN
+		    )"$dest_mac:"$(                  : ETH daddr
+		    )"00:11:22:33:44:55:"$(          : ETH saddr
+		    )"08:00:"$(                      : ETH type
+		    )"45:"$(                         : IP version + IHL
+		    )"00:"$(                         : IP TOS
+		    )"00:54:"$(                      : IP total length
+		    )"99:83:"$(                      : IP identification
+		    )"40:00:"$(                      : IP flags + frag off
+		    )"40:"$(                         : IP TTL
+		    )"01:"$(                         : IP proto
+		    )"00:00:"$(                      : IP header csum
+		    )"$(ipv4_to_bytes 192.0.2.3):"$( : IP saddr
+		    )"$(ipv4_to_bytes 192.0.2.1):"$( : IP daddr
+		    )"08:"$(                         : ICMP type
+		    )"00:"$(                         : ICMP code
+		    )"8b:f2:"$(                      : ICMP csum
+		    )"1f:6a:"$(                      : ICMP request identifier
+		    )"00:01:"$(                      : ICMP request seq. number
+		    )"4f:ff:c5:5b:00:00:00:00:"$(    : ICMP payload
+		    )"6d:74:0b:00:00:00:00:00:"$(    :
+		    )"10:11:12:13:14:15:16:17:"$(    :
+		    )"18:19:1a:1b:1c:1d:1e:1f:"$(    :
+		    )"20:21:22:23:24:25:26:27:"$(    :
+		    )"28:29:2a:2b:2c:2d:2e:2f:"$(    :
+		    )"30:31:32:33:34:35:36:37"
+}
+
+vxlan_device_add()
+{
+	ip_link_add vx1 up type vxlan id 1000		\
+		local 192.0.2.17 dstport "$VXPORT"	\
+		nolearning noudpcsum tos inherit ttl 100 "$@"
+	ip_link_set_master vx1 br1
+}
+
+vxlan_all_reserved_bits()
+{
+	local i
+
+	for ((i=0; i < 64; i++)); do
+		if ((i == 4 || i >= 32 && i < 56)); then
+			continue
+		fi
+		echo $i
+	done
+}
+
+vxlan_ping_vanilla()
+{
+	vxlan_ping_do 10 $rp2 $(mac_get $rp1) 192.0.2.17 $(mac_get $h1) 1000
+}
+
+vxlan_ping_reserved()
+{
+	for bit in $(vxlan_all_reserved_bits); do
+		vxlan_ping_do 1 $rp2 $(mac_get $rp1) \
+			      192.0.2.17 $(mac_get $h1) 1000 "$bit"
+		((n++))
+	done
+}
+
+vxlan_ping_test()
+{
+	local what=$1; shift
+	local get_stat=$1; shift
+	local expect=$1; shift
+
+	RET=0
+
+	local t0=$($get_stat)
+
+	"$@"
+	check_err $? "Failure when running $@"
+
+	local t1=$($get_stat)
+	local delta=$((t1 - t0))
+
+	((expect == delta))
+	check_err $? "Expected to capture $expect packets, got $delta."
+
+	log_test "$what"
+}
+
+__default_test_do()
+{
+	local n_allowed_bits=$1; shift
+	local what=$1; shift
+
+	vxlan_ping_test "$what: clean packets" \
+		"tc_rule_stats_get $h1 77 ingress" \
+		10 vxlan_ping_vanilla
+
+	local t0=$(link_stats_get vx1 rx errors)
+	vxlan_ping_test "$what: mangled packets" \
+		"tc_rule_stats_get $h1 77 ingress" \
+		$n_allowed_bits vxlan_ping_reserved
+	local t1=$(link_stats_get vx1 rx errors)
+
+	RET=0
+	local expect=$((39 - n_allowed_bits))
+	local delta=$((t1 - t0))
+	((expect == delta))
+	check_err $? "Expected $expect error packets, got $delta."
+	log_test "$what: drops reported"
+}
+
+default_test_do()
+{
+	vxlan_device_add
+	__default_test_do 0 "Default"
+}
+
+default_test()
+{
+	in_defer_scope \
+	    default_test_do
+}
+
+plain_test_do()
+{
+	vxlan_device_add reserved_bits 0xf7ffffff000000ff
+	__default_test_do 0 "reserved_bits 0xf7ffffff000000ff"
+}
+
+plain_test()
+{
+	in_defer_scope \
+	    plain_test_do
+}
+
+reserved_test()
+{
+	local bit=$1; shift
+
+	local allowed_bytes=$(vxlan_header_bytes 0xffffff $bit)
+	local reserved_bytes=$(neg_bytes $allowed_bytes)
+	local reserved_bits=${reserved_bytes//:/}
+
+	vxlan_device_add reserved_bits 0x$reserved_bits
+	__default_test_do 1 "reserved_bits 0x$reserved_bits"
+}
+
+reserved_0_test()
+{
+	in_defer_scope \
+	    reserved_test 0
+}
+
+reserved_10_test()
+{
+	in_defer_scope \
+	    reserved_test 10
+}
+
+reserved_31_test()
+{
+	in_defer_scope \
+	    reserved_test 31
+}
+
+reserved_56_test()
+{
+	in_defer_scope \
+	    reserved_test 56
+}
+
+reserved_63_test()
+{
+	in_defer_scope \
+	    reserved_test 63
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index be4a30a0d02a..9b44a091802c 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -227,7 +227,8 @@ static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz,
 
 	attr->rta_len = RTA_LENGTH(size);
 	attr->rta_type = rta_type;
-	memcpy(RTA_DATA(attr), payload, size);
+	if (payload)
+		memcpy(RTA_DATA(attr), payload, size);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 8994fec1c38f..0bd9a038a1f0 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -435,6 +435,13 @@ xfail_on_veth()
 	fi
 }
 
+mac_get()
+{
+	local if_name=$1
+
+	ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
 kill_process()
 {
 	local pid=$1; shift
@@ -451,7 +458,7 @@ ip_link_add()
 	defer ip link del dev "$name"
 }
 
-ip_link_master()
+ip_link_set_master()
 {
 	local member=$1; shift
 	local master=$1; shift
@@ -459,3 +466,62 @@ ip_link_master()
 	ip link set dev "$member" master "$master"
 	defer ip link set dev "$member" nomaster
 }
+
+ip_link_set_addr()
+{
+	local name=$1; shift
+	local addr=$1; shift
+
+	local old_addr=$(mac_get "$name")
+	ip link set dev "$name" address "$addr"
+	defer ip link set dev "$name" address "$old_addr"
+}
+
+ip_link_is_up()
+{
+	local name=$1; shift
+
+	local state=$(ip -j link show "$name" |
+		      jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"')
+	[[ $state == "UP" ]]
+}
+
+ip_link_set_up()
+{
+	local name=$1; shift
+
+	if ! ip_link_is_up "$name"; then
+		ip link set dev "$name" up
+		defer ip link set dev "$name" down
+	fi
+}
+
+ip_link_set_down()
+{
+	local name=$1; shift
+
+	if ip_link_is_up "$name"; then
+		ip link set dev "$name" down
+		defer ip link set dev "$name" up
+	fi
+}
+
+ip_addr_add()
+{
+	local name=$1; shift
+
+	ip addr add dev "$name" "$@"
+	defer ip addr del dev "$name" "$@"
+}
+
+ip_route_add()
+{
+	ip route add "$@"
+	defer ip route del "$@"
+}
+
+bridge_vlan_add()
+{
+	bridge vlan add "$@"
+	defer bridge vlan del "$@"
+}
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index 18b9443454a9..c22623b9a2a5 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
-CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES)
 # Additional include paths needed by kselftest.h
 CFLAGS += -I../../
@@ -9,7 +9,10 @@ TEST_FILES := ../../../../../Documentation/netlink/specs
 TEST_FILES += ../../../../net/ynl
 
 TEST_GEN_FILES += csum
+TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
 
 TEST_INCLUDES := $(wildcard py/*.py sh/*.sh)
 
 include ../../lib.mk
+
+include ../bpf.mk
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 477ae76de93d..3efe005436cd 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -71,6 +71,11 @@ def ksft_in(a, b, comment=""):
         _fail("Check failed", a, "not in", b, comment)
 
 
+def ksft_is(a, b, comment=""):
+    if a is not b:
+        _fail("Check failed", a, "is not", b, comment)
+
+
 def ksft_ge(a, b, comment=""):
     if a < b:
         _fail("Check failed", a, "<", b, comment)
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 72590c3f90f1..9e3bcddcf3e8 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -10,7 +10,9 @@ import time
 
 
 class CmdExitFailure(Exception):
-    pass
+    def __init__(self, msg, cmd_obj):
+        super().__init__(msg)
+        self.cmd = cmd_obj
 
 
 class cmd:
@@ -48,7 +50,7 @@ class cmd:
             if len(stderr) > 0 and stderr[-1] == "\n":
                 stderr = stderr[:-1]
             raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" %
-                                 (self.proc.args, stdout, stderr))
+                                 (self.proc.args, stdout, stderr), self)
 
 
 class bkg(cmd):
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index 076a7e8dc3eb..ad1e36baee2a 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -13,14 +13,14 @@ try:
         SPEC_PATH = KSFT_DIR / "net/lib/specs"
 
         sys.path.append(tools_full_path.as_posix())
-        from net.lib.ynl.lib import YnlFamily, NlError
+        from net.lib.ynl.pyynl.lib import YnlFamily, NlError
     else:
         # Running in tree
         tools_full_path = KSRC / "tools"
         SPEC_PATH = KSRC / "Documentation/netlink/specs"
 
         sys.path.append(tools_full_path.as_posix())
-        from net.ynl.lib import YnlFamily, NlError
+        from net.ynl.pyynl.lib import YnlFamily, NlError
 except ModuleNotFoundError as e:
     ksft_pr("Failed importing `ynl` library from kernel sources")
     ksft_pr(str(e))
diff --git a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
new file mode 100644
index 000000000000..e73fab3edd9f
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define KBUILD_MODNAME "xdp_dummy"
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
+SEC("xdp.frags")
+int xdp_dummy_prog_frags(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
new file mode 100755
index 000000000000..881eb399798f
--- /dev/null
+++ b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# WARNING
+# -------
+# This is just a dummy script that triggers encap cases with possible dst cache
+# reference loops in affected lwt users (see list below). Some cases are
+# pathological configurations for simplicity, others are valid. Overall, we
+# don't want this issue to happen, no matter what. In order to catch any
+# reference loops, kmemleak MUST be used. The results alone are always blindly
+# successful, don't rely on them. Note that the following tests may crash the
+# kernel if the fix to prevent lwtunnel_{input|output|xmit}() reentry loops is
+# not present.
+#
+# Affected lwt users so far (please update accordingly if needed):
+#  - ila_lwt (output only)
+#  - ioam6_iptunnel (output only)
+#  - rpl_iptunnel (both input and output)
+#  - seg6_iptunnel (both input and output)
+
+source lib.sh
+
+check_compatibility()
+{
+	setup_ns tmp_node &>/dev/null
+	if [ $? != 0 ]; then
+		echo "SKIP: Cannot create netns."
+		exit $ksft_skip
+	fi
+
+	ip link add name veth0 netns $tmp_node type veth \
+		peer name veth1 netns $tmp_node &>/dev/null
+	local ret=$?
+
+	ip -netns $tmp_node link set veth0 up &>/dev/null
+	ret=$((ret + $?))
+
+	ip -netns $tmp_node link set veth1 up &>/dev/null
+	ret=$((ret + $?))
+
+	if [ $ret != 0 ]; then
+		echo "SKIP: Cannot configure links."
+		cleanup_ns $tmp_node
+		exit $ksft_skip
+	fi
+
+	lsmod 2>/dev/null | grep -q "ila"
+	ila_lsmod=$?
+	[ $ila_lsmod != 0 ] && modprobe ila &>/dev/null
+
+	ip -netns $tmp_node route add 2001:db8:1::/64 \
+		encap ila 1:2:3:4 csum-mode no-action ident-type luid \
+			hook-type output \
+		dev veth0 &>/dev/null
+
+	ip -netns $tmp_node route add 2001:db8:2::/64 \
+		encap ioam6 trace prealloc type 0x800000 ns 0 size 4 \
+		dev veth0 &>/dev/null
+
+	ip -netns $tmp_node route add 2001:db8:3::/64 \
+		encap rpl segs 2001:db8:3::1 dev veth0 &>/dev/null
+
+	ip -netns $tmp_node route add 2001:db8:4::/64 \
+		encap seg6 mode inline segs 2001:db8:4::1 dev veth0 &>/dev/null
+
+	ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ila"
+	skip_ila=$?
+
+	ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ioam6"
+	skip_ioam6=$?
+
+	ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap rpl"
+	skip_rpl=$?
+
+	ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap seg6"
+	skip_seg6=$?
+
+	cleanup_ns $tmp_node
+}
+
+setup()
+{
+	setup_ns alpha beta gamma &>/dev/null
+
+	ip link add name veth-alpha netns $alpha type veth \
+		peer name veth-betaL netns $beta &>/dev/null
+
+	ip link add name veth-betaR netns $beta type veth \
+		peer name veth-gamma netns $gamma &>/dev/null
+
+	ip -netns $alpha link set veth-alpha name veth0 &>/dev/null
+	ip -netns $beta link set veth-betaL name veth0 &>/dev/null
+	ip -netns $beta link set veth-betaR name veth1 &>/dev/null
+	ip -netns $gamma link set veth-gamma name veth0 &>/dev/null
+
+	ip -netns $alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null
+	ip -netns $alpha link set veth0 up &>/dev/null
+	ip -netns $alpha link set lo up &>/dev/null
+	ip -netns $alpha route add 2001:db8:2::/64 \
+		via 2001:db8:1::1 dev veth0 &>/dev/null
+
+	ip -netns $beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null
+	ip -netns $beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null
+	ip -netns $beta link set veth0 up &>/dev/null
+	ip -netns $beta link set veth1 up &>/dev/null
+	ip -netns $beta link set lo up &>/dev/null
+	ip -netns $beta route del 2001:db8:2::/64
+	ip -netns $beta route add 2001:db8:2::/64 dev veth1
+	ip netns exec $beta \
+		sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null
+
+	ip -netns $gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null
+	ip -netns $gamma link set veth0 up &>/dev/null
+	ip -netns $gamma link set lo up &>/dev/null
+	ip -netns $gamma route add 2001:db8:1::/64 \
+		via 2001:db8:2::1 dev veth0 &>/dev/null
+
+	sleep 1
+
+	ip netns exec $alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null
+	if [ $? != 0 ]; then
+		echo "SKIP: Setup failed."
+		exit $ksft_skip
+	fi
+
+	sleep 1
+}
+
+cleanup()
+{
+	cleanup_ns $alpha $beta $gamma
+	[ $ila_lsmod != 0 ] && modprobe -r ila &>/dev/null
+}
+
+run_ila()
+{
+	if [ $skip_ila != 0 ]; then
+		echo "SKIP: ila (output)"
+		return
+	fi
+
+	ip -netns $beta route del 2001:db8:2::/64
+	ip -netns $beta route add 2001:db8:2:0:0:0:0:2/128 \
+		encap ila 2001:db8:2:0 csum-mode no-action ident-type luid \
+			hook-type output \
+		dev veth1 &>/dev/null
+	sleep 1
+
+	echo "TEST: ila (output)"
+	ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+	sleep 1
+
+	ip -netns $beta route del 2001:db8:2:0:0:0:0:2/128
+	ip -netns $beta route add 2001:db8:2::/64 dev veth1
+	sleep 1
+}
+
+run_ioam6()
+{
+	if [ $skip_ioam6 != 0 ]; then
+		echo "SKIP: ioam6 (output)"
+		return
+	fi
+
+	ip -netns $beta route change 2001:db8:2::/64 \
+		encap ioam6 trace prealloc type 0x800000 ns 1 size 4 \
+		dev veth1 &>/dev/null
+	sleep 1
+
+	echo "TEST: ioam6 (output)"
+	ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+	sleep 1
+}
+
+run_rpl()
+{
+	if [ $skip_rpl != 0 ]; then
+		echo "SKIP: rpl (input)"
+		echo "SKIP: rpl (output)"
+		return
+	fi
+
+	ip -netns $beta route change 2001:db8:2::/64 \
+		encap rpl segs 2001:db8:2::2 \
+		dev veth1 &>/dev/null
+	sleep 1
+
+	echo "TEST: rpl (input)"
+	ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+	sleep 1
+
+	echo "TEST: rpl (output)"
+	ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+	sleep 1
+}
+
+run_seg6()
+{
+	if [ $skip_seg6 != 0 ]; then
+		echo "SKIP: seg6 (input)"
+		echo "SKIP: seg6 (output)"
+		return
+	fi
+
+	ip -netns $beta route change 2001:db8:2::/64 \
+		encap seg6 mode inline segs 2001:db8:2::2 \
+		dev veth1 &>/dev/null
+	sleep 1
+
+	echo "TEST: seg6 (input)"
+	ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+	sleep 1
+
+	echo "TEST: seg6 (output)"
+	ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+	sleep 1
+}
+
+run()
+{
+	run_ila
+	run_ioam6
+	run_rpl
+	run_seg6
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+	echo "SKIP: Need root privileges."
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool."
+	exit $ksft_skip
+fi
+
+check_compatibility
+
+trap cleanup EXIT
+
+setup
+run
+
+exit $ksft_pass
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 8e3fc05a5397..c76525fe2b84 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -2,7 +2,7 @@
 
 top_srcdir = ../../../../..
 
-CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
 
 TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
 	      simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 414addef9a45..d240d02fa443 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -1302,7 +1302,7 @@ again:
 		return ret;
 
 	if (cfg_truncate > 0) {
-		xdisconnect(fd);
+		shutdown(fd, SHUT_WR);
 	} else if (--cfg_repeat > 0) {
 		xdisconnect(fd);
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index b48b4e56826a..5e3c56253274 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -137,7 +137,7 @@ TEST_GROUP=""
 #shellcheck disable=SC2317
 cleanup()
 {
-	rm -f "$cin_disconnect" "$cout_disconnect"
+	rm -f "$cin_disconnect"
 	rm -f "$cin" "$cout"
 	rm -f "$sin" "$sout"
 	rm -f "$capout"
@@ -155,7 +155,6 @@ cin=$(mktemp)
 cout=$(mktemp)
 capout=$(mktemp)
 cin_disconnect="$cin".disconnect
-cout_disconnect="$cout".disconnect
 trap cleanup EXIT
 
 mptcp_lib_ns_init ns1 ns2 ns3 ns4
@@ -445,12 +444,8 @@ do_transfer()
 	printf "(duration %05sms) " "${duration}"
 	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
 		mptcp_lib_pr_fail "client exit code $retc, server $rets"
-		echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
-		ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
-		cat /tmp/${listener_ns}.out
-		echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
-		ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
-		[ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out
+		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
+			"/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
 
 		echo
 		cat "$capout"
@@ -587,7 +582,7 @@ make_file()
 	mptcp_lib_make_file $name 1024 $ksize
 	dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null
 
-	echo "Created $name (size $(du -b "$name")) containing data sent by $who"
+	echo "Created $name (size $(stat -c "%s" "$name") B) containing data sent by $who"
 }
 
 run_tests_lo()
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index c07e2bd3a315..13a3b68181ee 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1039,13 +1039,8 @@ do_transfer()
 
 	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
 		fail_test "client exit code $retc, server $rets"
-		echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
-		ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
-		cat /tmp/${listener_ns}.out
-		echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
-		ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
-		cat /tmp/${connector_ns}.out
-
+		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
+			"/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
 		return 1
 	fi
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 975d4d4c862a..051e289d7967 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -107,6 +107,27 @@ mptcp_lib_pr_info() {
 	mptcp_lib_print_info "INFO: ${*}"
 }
 
+# $1-2: listener/connector ns ; $3 port ; $4-5 listener/connector stat file
+mptcp_lib_pr_err_stats() {
+	local lns="${1}"
+	local cns="${2}"
+	local port="${3}"
+	local lstat="${4}"
+	local cstat="${5}"
+
+	echo -en "${MPTCP_LIB_COLOR_RED}"
+	{
+		printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}"
+		ip netns exec "${lns}" ss -Menitam -o "sport = :${port}"
+		cat "${lstat}"
+
+		printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}"
+		ip netns exec "${cns}" ss -Menitam -o "dport = :${port}"
+		[ "${lstat}" != "${cstat}" ] && cat "${cstat}"
+	} 1>&2
+	echo -en "${MPTCP_LIB_COLOR_RESET}"
+}
+
 # SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all
 # features using the last version of the kernel and the selftests to make sure
 # a test is not being skipped by mistake.
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 5e8d5b83e2d0..418a903c3a4d 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -169,6 +169,11 @@ do_transfer()
 		cmsg+=",TCPINQ"
 	fi
 
+	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+		nstat -n
+	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+		nstat -n
+
 	timeout ${timeout_test} \
 		ip netns exec ${listener_ns} \
 			$mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
@@ -189,14 +194,16 @@ do_transfer()
 	wait $spid
 	local rets=$?
 
+	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+		nstat | grep Tcp > /tmp/${listener_ns}.out
+	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+		nstat | grep Tcp > /tmp/${connector_ns}.out
+
 	print_title "Transfer ${ip:2}"
 	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
 		mptcp_lib_pr_fail "client exit code $retc, server $rets"
-		echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
-		ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
-
-		echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
-		ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
+			"/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
 
 		mptcp_lib_result_fail "transfer ${ip}"
 
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 8fa77c8e9b65..9c2a415976cb 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -155,6 +155,11 @@ do_transfer()
 		sleep 1
 	fi
 
+	NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \
+		nstat -n
+	NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \
+		nstat -n
+
 	timeout ${timeout_test} \
 		ip netns exec ${ns3} \
 			./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
@@ -180,25 +185,27 @@ do_transfer()
 		kill ${cappid_connector}
 	fi
 
+	NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \
+		nstat | grep Tcp > /tmp/${ns3}.out
+	NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \
+		nstat | grep Tcp > /tmp/${ns1}.out
+
 	cmp $sin $cout > /dev/null 2>&1
 	local cmps=$?
 	cmp $cin $sout > /dev/null 2>&1
 	local cmpc=$?
 
-	printf "%-16s" " max $max_time "
 	if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
 	   [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
+		printf "%-16s" " max $max_time "
 		mptcp_lib_pr_ok
 		cat "$capout"
 		return 0
 	fi
 
-	mptcp_lib_pr_fail
-	echo "client exit code $retc, server $rets" 1>&2
-	echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
-	ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
-	echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2
-	ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port"
+	mptcp_lib_pr_fail "client exit code $retc, server $rets"
+	mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" \
+		"/tmp/${ns3}.out" "/tmp/${ns1}.out"
 	ls -l $sin $cout
 	ls -l $cin $sout
 
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
index c28379a965d8..1559ba275105 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -13,6 +13,12 @@ source lib.sh
 
 checktool "nft --version" "run test without nft tool"
 
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+	echo SKIP: kernel is tainted
+	exit $ksft_skip
+fi
+
 cleanup() {
 	cleanup_all_ns
 }
@@ -165,6 +171,7 @@ if [ "$t" -eq 0 ];then
 	echo PASS: kernel not tainted
 else
 	echo ERROR: kernel is tainted
+	dmesg
 	ret=1
 fi
 
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
index 6a764d70ab06..4788641717d9 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
@@ -4,6 +4,12 @@ source lib.sh
 
 checktool "nft --version" "run test without nft tool"
 
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+	echo SKIP: kernel is tainted
+	exit $ksft_skip
+fi
+
 cleanup() {
 	cleanup_all_ns
 }
@@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then
 	echo PASS: kernel not tainted
 else
 	echo ERROR: kernel is tainted
+	dmesg
 	exit 1
 fi
 
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 785e3875a6da..784d1b46912b 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -593,6 +593,7 @@ EOF
 		echo "PASS: queue program exiting while packets queued"
 	else
 		echo "TAINT: queue program exiting while packets queued"
+		dmesg
 		ret=1
 	fi
 }
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index 93d9d914529b..93e8cb671c3d 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -18,6 +18,23 @@ def lo_check(nf) -> None:
     ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0)
 
 
+def napi_list_check(nf) -> None:
+    with NetdevSimDev(queue_count=100) as nsimdev:
+        nsim = nsimdev.nsims[0]
+
+        ip(f"link set dev {nsim.ifname} up")
+
+        napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+        ksft_eq(len(napis), 100)
+
+        for q in [50, 0, 99]:
+            for i in range(4):
+                nsim.dfs_write("queue_reset", f"{q} {i}")
+                napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+                ksft_eq(len(napis), 100,
+                        comment=f"queue count after reset queue {q} mode {i}")
+
+
 def page_pool_check(nf) -> None:
     with NetdevSimDev() as nsimdev:
         nsim = nsimdev.nsims[0]
@@ -89,7 +106,7 @@ def page_pool_check(nf) -> None:
 
 def main() -> None:
     nf = NetdevFamily()
-    ksft_run([empty_check, lo_check, page_pool_check],
+    ksft_run([empty_check, lo_check, page_pool_check, napi_list_check],
              args=(nf, ))
     ksft_exit()
 
diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile
index 2f1508abc826..3fd1da2ec07d 100644
--- a/tools/testing/selftests/net/openvswitch/Makefile
+++ b/tools/testing/selftests/net/openvswitch/Makefile
@@ -2,7 +2,7 @@
 
 top_srcdir = ../../../../..
 
-CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
 
 TEST_PROGS := openvswitch.sh
 
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index 4071c133f29e..ef8b25a606d8 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -23,7 +23,7 @@ if [ $# -ne 1 ]; then
 	ktap_exit_fail_msg "usage: $0 <script>"
 	exit "$KSFT_FAIL"
 fi
-script="$1"
+script="$(basename $1)"
 
 if [ -z "$(which packetdrill)" ]; then
 	ktap_skip_all "packetdrill not found in PATH"
@@ -31,16 +31,32 @@ if [ -z "$(which packetdrill)" ]; then
 fi
 
 declare -a optargs
+failfunc=ktap_test_fail
+
 if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
 	optargs+=('--tolerance_usecs=14000')
+
+	# xfail tests that are known flaky with dbg config, not fixable.
+	# still run them for coverage (and expect 100% pass without dbg).
+	declare -ar xfail_list=(
+		"tcp_eor_no-coalesce-retrans.pkt"
+		"tcp_fast_recovery_prr-ss.*.pkt"
+		"tcp_slow_start_slow-start-after-win-update.pkt"
+		"tcp_timestamping.*.pkt"
+		"tcp_user_timeout_user-timeout-probe.pkt"
+		"tcp_zerocopy_epoll_.*.pkt"
+		"tcp_tcp_info_tcp-info-.*-limited.pkt"
+	)
+	readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$"
+	[[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail
 fi
 
 ktap_print_header
 ktap_set_plan 2
 
-unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $(basename $script) > /dev/null \
-	&& ktap_test_pass "ipv4" || ktap_test_fail "ipv4"
-unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $(basename $script) > /dev/null \
-	&& ktap_test_pass "ipv6" || ktap_test_fail "ipv6"
+unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \
+	&& ktap_test_pass "ipv4" || $failfunc "ipv4"
+unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \
+	&& ktap_test_pass "ipv6" || $failfunc "ipv6"
 
 ktap_finished
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt
new file mode 100644
index 000000000000..38535701656e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking accept.
+
+`./defaults.sh`
+
+// Establish a connection.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0...0.200 accept(3, ..., ...) = 4
+
+  +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 257
+
+  +.1 write(4, ..., 2000) = 2000
+   +0 > P. 1:2001(2000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt
new file mode 100644
index 000000000000..3692ef102381
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking connect.
+
+`./defaults.sh`
+
+// Establish a connection.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+
+  +.1...0.200 connect(3, ..., ...) = 0
+
+   +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+  +.1 < S. 0:0(0) ack 1 win 5792 <mss 1460,nop,wscale 2,nop,nop,sackOK>
+   +0 > . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
new file mode 100644
index 000000000000..914eabab367a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking read.
+--tolerance_usecs=10000
+
+`./defaults.sh`
+
+// Establish a connection.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+  +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 257
+   +0 accept(3, ..., ...) = 4
+
+   +0...0.100 read(4, ..., 2000) = 2000
+  +.1 < P. 1:2001(2000) ack 1 win 257
+   +0 > . 1:1(0) ack 2001
+
+  +.1...0.200 read(4, ..., 2000) = 2000
+  +.1 < P. 2001:4001(2000) ack 1 win 257
+   +0 > . 1:1(0) ack 4001
+
+  +.1 < P. 4001:6001(2000) ack 1 win 257
+   +0 > . 1:1(0) ack 6001
+   +0...0.000 read(4, ..., 1000) = 1000
+   +0...0.000 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt
new file mode 100644
index 000000000000..cec5a0725d95
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking write.
+--tolerance_usecs=10000
+
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_min_tso_segs=10
+`
+
+// Establish a connection.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+  +.1 < S 0:0(0) win 50000 <mss 1000,nop,wscale 0>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 50000
+   +0 accept(3, ..., ...) = 4
+
+// Kernel doubles our value -> sk->sk_sndbuf is set to 42000
+   +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF,  [21000], 4) = 0
+   +0 getsockopt(4, SOL_SOCKET, SO_SNDBUF,  [42000], [4]) = 0
+
+// A write of 60000 does not block.
+   +0...0.300 write(4, ..., 61000) = 61000    // this write() blocks
+
+  +.1 < . 1:1(0) ack 10001 win 50000
+
+  +.1 < . 1:1(0) ack 30001 win 50000
+
+// This ACK should wakeup the write(). An ACK of 35001 does not.
+  +.1 < . 1:1(0) ack 36001 win 50000
+
+// Reset to sysctls defaults.
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt
new file mode 100644
index 000000000000..8514d6bdbb6d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test basic connection teardown where local process closes first:
+// the local process calls close() first, so we send a FIN, and receive an ACK.
+// Then we receive a FIN and ACK it.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +.01...0.011 connect(3, ..., ...) = 0
+   +0 >  S 0:0(0) <...>
+   +0 < S. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK>
+   +0 >  . 1:1(0) ack 1
+
+   +0 write(3, ..., 1000) = 1000
+   +0 >  P. 1:1001(1000) ack 1
+   +0 < . 1:1(0) ack 1001 win 257
+
+   +0 close(3) = 0
+   +0 >  F. 1001:1001(0) ack 1
+   +0 < . 1:1(0) ack 1002 win 257
+
+   +0 < F. 1:1(0) ack 1002 win 257
+   +0 >  . 1002:1002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt
new file mode 100644
index 000000000000..04103134bd99
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test to make sure no RST is being sent when close()
+// is called on a socket with SYN_SENT state.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+   +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+   +0 > S 0:0(0) <...>
+
+// Application decideds to close the socket in SYN_SENT state
+// Make sure no RST is sent after close().
+   +0 close(3) = 0
+
+// Receive syn-ack to trigger the send side packet examination:
+// If a RESET were sent right after close(), it would have failed with
+// a mismatched timestamp.
+  +.1 < S. 0:0(0) ack 1 win 32000 <mss 1460,nop,wscale 7>
+   +0 > R 1:1(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt
new file mode 100644
index 000000000000..5f3a2914213a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Verify behavior for the sequence: remote side sends FIN, then we close().
+// Since the remote side (client) closes first, we test our LAST_ACK code path.
+
+`./defaults.sh`
+
+// Initialize a server socket.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+
+// Client closes first.
+ +.01 < F. 1:1(0) ack 1 win 257
+   +0 > . 1:1(0) ack 2
+
+// App notices that client closed.
+   +0 read(4, ..., 1000) = 0
+
+// Then we close.
+ +.01 close(4) = 0
+   +0 > F. 1:1(0) ack 2
+
+// Client ACKs our FIN.
+ +.01 < . 2:2(0) ack 2 win 257
+
+// Verify that we send RST in response to any incoming segments
+// (because the kernel no longer has any record of this socket).
+ +.01 < . 2:2(0) ack 2 win 257
+   +0 > R 2:2(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt
new file mode 100644
index 000000000000..643baf3267cf
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test ECN: verify that Linux TCP ECN sending code uses ECT0 (not ECT1).
+//
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1  # fully enabled
+`
+
+// Initialize connection
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+
+// ECN handshake: send EW flags in SYN packet, E flag in SYN-ACK response
++.002 ... 0.004 connect(4, ..., ...) = 0
+
+   +0 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.002 < SE. 0:0(0) ack 1 win 32767 <mss 1000,nop,wscale 6,nop,nop,sackOK>
+   +0 > . 1:1(0) ack 1
+
+// Write 1 MSS.
++.002 write(4, ..., 1000) = 1000
+// Send 1 MSS with ect0.
+   +0 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt
new file mode 100644
index 000000000000..f95b9b3c9fa1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk. The large chunk itself should be packetized as
+// usual.
+`./defaults.sh
+`
+
+// Initialize connection
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+   +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+   +0 write(4, ..., 10400) = 10400
+
+// Write another 10040B chunk with no coalescing options.
+   +0 send(4, ..., 10400, MSG_EOR) = 10400
+
+// Write a 2KB chunk. This chunk should not be appended to the packets created
+// the previous chunk.
+   +0 write(4, ..., 2000) = 2000
+
+   +0 > P. 1:10001(10000) ack 1
++.001 < .  1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+   +0 > P. 10001:20801(10800) ack 1
++.001 < .  1:1(0) ack 20801 win 514
+// This 2KB packet should be sent alone.
+   +0 > P. 20801:22801(2000) ack 1
++.001 < .  1:1(0) ack 22801 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt
new file mode 100644
index 000000000000..2ff66075288e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk. Also, when packets are retransmitted, they
+// will not be coalesce into the same skb.
+`./defaults.sh
+`
+
+// Initialize connection
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+   +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+   +0 write(4, ..., 10400) = 10400
+
+// Write 10 400B chunks with no coalescing options.
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+// This chunk should not be appended to the skbs created for the previous chunk.
+   +0 write(4, ..., 10000) = 10000
+
+   +0 > P. 1:10001(10000) ack 1
++.001 < .  1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+   +0 > P. 10001:10801(800) ack 1
+// The 9 remaining 400B chunks should be sent as individual packets.
+   +0 > P. 10801:11201(400) ack 1
+   +0 > P. 11201:11601(400) ack 1
+   +0 > P. 11601:12001(400) ack 1
+   +0 > P. 12001:12401(400) ack 1
+   +0 > P. 12401:12801(400) ack 1
+   +0 > P. 12801:13201(400) ack 1
+   +0 > P. 13201:13601(400) ack 1
+   +0 > P. 13601:14001(400) ack 1
+   +0 > P. 14001:14401(400) ack 1
+// The last 10KB chunk should be sent separately.
+   +0 > P. 14401:24401(10000) ack 1
+
++.001 < .  1:1(0) ack 10401 win 514
++.001 < .  1:1(0) ack 10801 win 514
++.001 < .  1:1(0) ack 11201 win 514
++.001 < .  1:1(0) ack 11601 win 514
++.001 < .  1:1(0) ack 12001 win 514 <sack 13201:14401,nop,nop>
+// TCP should fill the hole but no coalescing should happen, and all
+// retransmissions should be sent out as individual packets.
+
+// Note : This is timeout based retransmit.
+// Do not put +0 here or flakes will come back.
++.004~+.008 > P. 12001:12401(400) ack 1
+
++.001 < .  1:1(0) ack 12401 win 514 <sack 13201:14401,nop,nop>
+   +0 > P. 12401:12801(400) ack 1
+   +0 > P. 12801:13201(400) ack 1
++.001 < .  1:1(0) ack 12801 win 514 <sack 13201:14401,nop,nop>
++.001 < .  1:1(0) ack 14401 win 514
++.001 < .  1:1(0) ack 24401 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt
new file mode 100644
index 000000000000..77039c5aac39
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk.
+`./defaults.sh
+`
+
+// Initialize connection
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+   +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+   +0 write(4, ..., 10400) = 10400
+
+// Write a 400B chunk with no coalescing options.
+   +0 send(4, ..., 400, MSG_EOR) = 400
+
+// This chunk should not be appended to the skbs created for the previous chunk.
+   +0 write(4, ..., 10000) = 10000
+
+   +0 > P. 1:10001(10000) ack 1
++.001 < .  1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+   +0 > P. 10001:10801(800) ack 1
+   +0 > P. 10801:20801(10000) ack 1
++.001 < .  1:1(0) ack 10401 win 514
++.001 < .  1:1(0) ack 10801 win 514
++.001 < .  1:1(0) ack 20801 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt
new file mode 100644
index 000000000000..dd5a06250595
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk even though we have 10 back-to-back small
+// writes.
+`./defaults.sh
+`
+
+// Initialize connection
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+   +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+   +0 write(4, ..., 10400) = 10400
+
+// Write 10 400B chunks with no coalescing options.
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+   +0 send(4, ..., 400, MSG_EOR) = 400
+// This chunk should not be appended to the skbs created for the previous chunk.
+   +0 write(4, ..., 10000) = 10000
+
+   +0 > P. 1:10001(10000) ack 1
++.001 < .  1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+   +0 > P. 10001:10801(800) ack 1
+// The 9 remaining 400B chunks should be sent as individual packets.
+   +0 > P. 10801:11201(400) ack 1
+   +0 > P. 11201:11601(400) ack 1
+   +0 > P. 11601:12001(400) ack 1
+   +0 > P. 12001:12401(400) ack 1
+   +0 > P. 12401:12801(400) ack 1
+   +0 > P. 12801:13201(400) ack 1
+   +0 > P. 13201:13601(400) ack 1
+   +0 > P. 13601:14001(400) ack 1
+   +0 > P. 14001:14401(400) ack 1
+// The last 10KB chunk should be sent separately.
+   +0 > P. 14401:24401(10000) ack 1
+
++.001 < .  1:1(0) ack 10401 win 514
++.001 < .  1:1(0) ack 10801 win 514
++.001 < .  1:1(0) ack 11201 win 514
++.001 < .  1:1(0) ack 11601 win 514
++.001 < .  1:1(0) ack 12001 win 514
++.001 < .  1:1(0) ack 12401 win 514
++.001 < .  1:1(0) ack 12801 win 514
++.001 < .  1:1(0) ack 13201 win 514
++.001 < .  1:1(0) ack 13601 win 514
++.001 < .  1:1(0) ack 14001 win 514
++.001 < .  1:1(0) ack 14401 win 514
++.001 < .  1:1(0) ack 24401 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt
new file mode 100644
index 000000000000..0d3c8077e830
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation.
+// In this variant we test a simple case where in-flight == ssthresh
+// all the way through recovery, so during fast recovery we send one segment
+// for each segment SACKed/ACKed.
+
+// Set up config.
+`./defaults.sh`
+
+// Establish a connection.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+  +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// RTT 100ms
+  +.1 < . 1:1(0) ack 1 win 320
+   +0 accept(3, ..., ...) = 4
+
+// Send 10 data segments.
+   +0 write(4, ..., 10000) = 10000
+   +0 > P. 1:10001(10000) ack 1
+
+// Lost packet 1:1001.
+ +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop>
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop>
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:4001,nop,nop>
+// Enter fast recovery.
+   +0 > . 1:1001(1000) ack 1
+ +.01 %{
+assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state
+assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd
+assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh
+}%
+
+// Write some more, which we will send 1 MSS at a time,
+// as in-flight segments are SACKed or ACKed.
+ +.01 write(4, ..., 7000) = 7000
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:5001,nop,nop>
+   +0 > . 10001:11001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:6001,nop,nop>
+   +0 > . 11001:12001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:7001,nop,nop>
+   +0 > . 12001:13001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:8001,nop,nop>
+   +0 > . 13001:14001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:9001,nop,nop>
+   +0 > . 14001:15001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:10001,nop,nop>
+   +0 > . 15001:16001(1000) ack 1
+
+ +.02 < . 1:1(0) ack 10001 win 320
+   +0 > P. 16001:17001(1000) ack 1
+// Leave fast recovery.
+ +.01 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd
+assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh
+}%
+
+ +.03 < . 1:1(0) ack 12001 win 320
+ +.02 < . 1:1(0) ack 14001 win 320
+ +.02 < . 1:1(0) ack 16001 win 320
+ +.02 < . 1:1(0) ack 17001 win 320
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt
new file mode 100644
index 000000000000..7842a10b6967
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation. The sender sends 20 packets. Packet
+// 1 to 4, and 11 to 16 are dropped.
+`./defaults.sh`
+
+// Establish a connection.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+ +.01 < . 1:1(0) ack 1 win 320
+   +0 accept(3, ..., ...) = 4
+
+// Write 20 data segments.
+   +0 write(4, ..., 20000) = 20000
+   +0 > P. 1:10001(10000) ack 1
+
+// Receive first DUPACK, entering PRR part
+ +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop>
+   +0 > . 10001:11001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop>
+   +0 > . 11001:12001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop>
+   +0 > . 1:1001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop>
+   +0 > . 1001:2001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop>
+   +0 > . 2001:3001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop>
+   +0 > . 3001:4001(1000) ack 1
+// Enter PRR CRB
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:11001,nop,nop>
+   +0 > . 12001:13001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:12001,nop,nop>
+   +0 > . 13001:14001(1000) ack 1
+// Enter PRR slow start
+ +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:12001,nop,nop>
+   +0 > P. 14001:16001(2000) ack 1
++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:12001,nop,nop>
+   +0 > . 1001:2001(1000) ack 1
+   +0 > . 16001:17001(1000) ack 1
+// inflight reaches ssthresh, goes into packet conservation mode
++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:13001,nop,nop>
+   +0 > . 17001:18001(1000) ack 1
++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:14001,nop,nop>
+   +0 > . 18001:19001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt
new file mode 100644
index 000000000000..b66d7644c3b6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation. The sender sends 20 packets. Packet
+// 1 to 4 are lost. The sender writes another 10 packets.
+`./defaults.sh`
+
+// Establish a connection.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+ +.01 < . 1:1(0) ack 1 win 320
+   +0 accept(3, ..., ...) = 4
+
+// Send 20 data segments.
+   +0 write(4, ..., 10000) = 10000
+   +0 > P. 1:10001(10000) ack 1
+
+// Lost packet 1,2,3,4
+ +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop>
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop>
+   +0 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop>
+   +0 > . 1:1001(1000) ack 1
+   +0 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop>
+   +0 > . 1001:2001(1000) ack 1
+   +0 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop>
+   +0 > . 2001:3001(1000) ack 1
+   +0 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop>
+   +0 > . 3001:4001(1000) ack 1
+
+// Receiver ACKs all data.
+ +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:10001,nop,nop>
+   +0 < . 1:1(0) ack 2001 win 320 <sack 4001:10001,nop,nop>
+   +0 < . 1:1(0) ack 3001 win 320 <sack 4001:10001,nop,nop>
+   +0 < . 1:1(0) ack 10001 win 320
+
+// Writes another 10 packets, which the ssthresh*mss amount
+// should be sent right away
+ +.01 write(4, ..., 10000) = 10000
+   +0 > . 10001:17001(7000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt
new file mode 100644
index 000000000000..8e87bfecabb5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation.
+// In this variant we verify that the sender uses SACK info on an ACK
+// below snd_una.
+
+// Set up config.
+`./defaults.sh`
+
+// Establish a connection.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 8>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// RTT 10ms
+ +.01 < . 1:1(0) ack 1 win 320
+   +0 accept(3, ..., ...) = 4
+
+// Send 10 data segments.
+   +0 write(4, ..., 10000) = 10000
+   +0 > P. 1:10001(10000) ack 1
+
+// Lost packet 1:1001,4001:5001,7001:8001.
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop>
+   +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop>
+   +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001 8001:9001,nop,nop>
+   +0 > . 1:1001(1000) ack 1
+
++.012 < . 1:1(0) ack 4001 win 320 <sack 8001:9001,nop,nop>
+   +0 > . 4001:7001(3000) ack 1
+
+   +0 write(4, ..., 10000) = 10000
+
+// The following ACK was reordered - delayed so that it arrives with
+// an ACK field below snd_una. Here we check that the newly-SACKed
+// 2MSS at 5001:7001 cause us to send out 2 more MSS.
++.002 < . 1:1(0) ack 3001 win 320 <sack 5001:7001,nop,nop>
+   +0 > . 7001:8001(1000) ack 1
+   +0 > . 10001:11001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt
new file mode 100644
index 000000000000..96b01eb5b7a4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test RFC 3042 "Limited Transmit": "sending a new data segment in
+// response to each of the first two duplicate acknowledgments that
+// arrive at the sender".
+// This variation tests a receiver that doesn't support SACK.
+
+`./defaults.sh`
+
+// Establish a connection.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+  +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 320
+   +0 accept(3, ..., ...) = 4
+
+// Write some data, and send the initial congestion window.
+   +0 write(4, ..., 15000) = 15000
+   +0 > P. 1:10001(10000) ack 1
+
+// Limited transmit: on first dupack, send a new data segment.
+ +.11 < . 1:1(0) ack 1 win 320
+   +0 > . 10001:11001(1000) ack 1
+
+// Limited transmit: on second dupack, send a new data segment.
+ +.01 < . 1:1(0) ack 1 win 320
+   +0 > . 11001:12001(1000) ack 1
+
+// It turned out to be reordering, not loss.
+// We have one packet newly acked (1001:3001 were DUP-ACK'd)
+// So we revert state back to Open. Slow start cwnd from 10 to 11
+// and send 11 - 9 = 2 packets
+ +.01 < . 1:1(0) ack 3001 win 320
+   +0 > P. 12001:14001(2000) ack 1
+
+ +.02 < . 1:1(0) ack 5001 win 320
+   +0 > P. 14001:15001(1000) ack 1
+
+// Client gradually ACKs all data.
+ +.02 < . 1:1(0) ack 7001 win 320
+ +.02 < . 1:1(0) ack 9001 win 320
+ +.02 < . 1:1(0) ack 11001 win 320
+ +.02 < . 1:1(0) ack 13001 win 320
+ +.02 < . 1:1(0) ack 15001 win 320
+
+// Clean up.
+ +.17 close(4) = 0
+   +0 > F. 15001:15001(0) ack 1
+  +.1 < F. 1:1(0) ack 15002 win 257
+   +0 > . 15002:15002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt
new file mode 100644
index 000000000000..642da51ec3a4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test RFC 3042 "Limited Transmit": "sending a new data segment in
+// response to each of the first two duplicate acknowledgments that
+// arrive at the sender".
+// This variation tests a receiver that supports SACK.
+
+`./defaults.sh`
+
+// Establish a connection.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+  +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 320
+   +0 accept(3, ..., ...) = 4
+
+// Write some data, and send the initial congestion window.
+   +0 write(4, ..., 15000) = 15000
+   +0 > P. 1:10001(10000) ack 1
+
+// Limited transmit: on first dupack, send a new data segment.
+ +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop>
+   +0 > . 10001:11001(1000) ack 1
+
+// Limited transmit: on second dupack, send a new data segment.
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop>
+   +0 > . 11001:12001(1000) ack 1
+
+// It turned out to be reordering, not loss.
+ +.01 < . 1:1(0) ack 3001 win 320
+   +0 > P. 12001:14001(2000) ack 1
+
+ +.02 < . 1:1(0) ack 5001 win 320
+   +0 > P. 14001:15001(1000) ack 1
+
+// Client gradually ACKs all data.
+ +.02 < . 1:1(0) ack 7001 win 320
+ +.02 < . 1:1(0) ack 9001 win 320
+ +.02 < . 1:1(0) ack 11001 win 320
+ +.02 < . 1:1(0) ack 13001 win 320
+ +.02 < . 1:1(0) ack 15001 win 320
+
+// Clean up.
+ +.17 close(4) = 0
+   +0 > F. 15001:15001(0) ack 1
+  +.1 < F. 1:1(0) ack 15002 win 257
+   +0 > . 15002:15002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt
new file mode 100644
index 000000000000..7adae7a9ef4a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// This is a test inspired by an Android client app using SSL. This
+// test verifies using TCP_NODELAY would save application latency
+// (Perhaps even better with TCP_NAGLE).
+//
+`./defaults.sh
+ethtool -K tun0 tso off gso off
+./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+   +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+   +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+
+   +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+   +0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < S. 0:0(0) ack 1 win 5792 <mss 974,nop,nop,sackOK,nop,wscale 7>
+   +0 > . 1:1(0) ack 1
+
+// SSL handshake (resumed session)
+   +0 write(4, ..., 517) = 517
+   +0 > P. 1:518(517) ack 1
+  +.1 < . 1:1(0) ack 518 win 229
+
+   +0 < P. 1:144(143) ack 1 win 229
+   +0 > . 518:518(0) ack 144
+   +0 read(4, ..., 1000) = 143
+
+// Application POST header (51B) and body (2002B)
+   +0 write(4, ..., 51) = 51
+   +0 > P. 518:569(51) ack 144
+ +.03 write(4, ..., 2002) = 2002
+   +0 > . 569:1543(974) ack 144
+   +0 > P. 1543:2517(974) ack 144
+// Without disabling Nagle, this packet will not happen until the remote ACK.
+   +0 > P. 2517:2571(54) ack 144
+
+  +.1 < . 1:1(0) ack 2571 win 229
+
+// Reset sysctls
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt
new file mode 100644
index 000000000000..fa9c01813996
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test the MSG_MORE flag will correctly corks the tiny writes
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 257
+   +0 accept(3, ..., ...) = 4
+// Disable Nagle by default on this socket.
+   +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+
+// Test the basic case: MSG_MORE overwrites TCP_NODELAY and enables Nagle.
+   +0 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{..., 40}], msg_flags=0}, MSG_MORE) = 40
+ +.21~+.215 > P. 1:41(40) ack 1
+ +.01 < . 1:1(0) ack 41 win 257
+
+// Test unsetting MSG_MORE releases the packet
+   +0 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{..., 100}], msg_flags=0}, MSG_MORE) = 100
++.005 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{..., 160}], msg_flags=0}, MSG_MORE) = 160
+ +.01 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(3)=[{..., 100}, {..., 200}, {..., 195}],
+		  msg_flags=0}, MSG_MORE) = 495
++.008 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{..., 5}], msg_flags=0}, 0) = 5
+   +0 > P. 41:801(760) ack 1
+ +.02 < . 1:1(0) ack 801 win 257
+
+
+// Test >MSS write will unleash MSS packets but hold on the remaining data.
+  +.1 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{..., 3100}], msg_flags=0}, MSG_MORE) = 3100
+   +0 > . 801:3801(3000) ack 1
++.003 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{..., 50}], msg_flags=0}, MSG_MORE) = 50
+
+ +.01 < . 1:1(0) ack 2801 win 257
+// Err... we relase the remaining right after the ACK? note that PUSH is reset
+   +0 > . 3801:3951(150) ack 1
+
+// Test we'll hold on the subsequent writes when inflight (3801:3951) > 0
++.001 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{..., 1}], msg_flags=0}, MSG_MORE) = 1
++.002 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{..., 2}], msg_flags=0}, MSG_MORE) = 2
++.003 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{..., 3}], msg_flags=0}, MSG_MORE) = 3
++.004 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{..., 4}], msg_flags=0}, MSG_MORE) = 4
+ +.02 < . 1:1(0) ack 3951 win 257
+   +0 > . 3951:3961(10) ack 1
+ +.02 < . 1:1(0) ack 3961 win 257
+
+
+// Test the case a MSG_MORE send followed by a write flushes the data
+   +0 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(1)=[{..., 20}], msg_flags=0}, MSG_MORE) = 20
+ +.05 write(4, ..., 20) = 20
+   +0 > P. 3961:4001(40) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt
new file mode 100644
index 000000000000..0ddec5f7dc1a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP_CORK and TCP_NODELAY sockopt behavior
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 257
+   +0 accept(3, ..., ...) = 4
+// Set TCP_CORK sockopt to hold small packets
+   +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0
+
+   +0 write(4, ..., 40) = 40
+ +.05 write(4, ..., 40) = 40
+
+// Unset TCP_CORK should push pending bytes out
+ +.01 setsockopt(4, SOL_TCP, TCP_CORK, [0], 4) = 0
+   +0 > P. 1:81(80) ack 1
+ +.01 < . 1:1(0) ack 81 win 257
+
+// Set TCP_CORK sockopt to hold small packets
+   +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0
+
+   +0 write(4, ..., 40) = 40
+ +.05 write(4, ..., 40) = 40
+
+// Set TCP_NODELAY sockopt should push pending bytes out
+   +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+   +0 > P. 81:161(80) ack 1
+ +.01 < . 1:1(0) ack 161 win 257
+
+// Set MSG_MORE to hold small packets
+   +0 send(4, ..., 40, MSG_MORE) = 40
+ +.05 send(4, ..., 40, MSG_MORE) = 40
+
+// Set TCP_NODELAY sockopt should push pending bytes out
+ +.01 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+   +0 > . 161:241(80) ack 1
+ +.01 < . 1:1(0) ack 241 win 257
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt
new file mode 100644
index 000000000000..310ef31518da
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+// Verify that setsockopt calls that force a route refresh do not
+// cause problems matching SACKs with packets in the write queue.
+// This variant tests IP_TOS.
+
+`./defaults.sh`
+
+// Establish a connection.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_IP, IP_MTU_DISCOVER, [IP_PMTUDISC_DONT], 1) = 0
+   +0...0.010 connect(3, ..., ...) = 0
+
+   +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 65535 <mss 1460,nop,wscale 2,nop,nop,sackOK>
+   +0 > . 1:1(0) ack 1
+
+ +.01 write(3, ..., 5840) = 5840
+   +0 > P. 1:5841(5840) ack 1
+ +.01 < . 1:1(0) ack 5841 win 65535
+
+ +.01 write(3, ..., 5840) = 5840
+   +0 > P. 5841:11681(5840) ack 1
+ +.01 < . 1:1(0) ack 11681 win 65535
+
+ +.01 write(3, ..., 14600) = 14600
+   +0 > P. 11681:26281(14600) ack 1
+
+// Try the socket option that we know can force a route refresh.
+   +0 setsockopt(3, SOL_IP, IP_TOS, [4], 1) = 0
+// Then revert to avoid routing/mangling/etc implications of that setting.
+   +0 setsockopt(3, SOL_IP, IP_TOS, [0], 1) = 0
+
+// Verify that we do not retransmit the SACKed segments.
+ +.01 < . 1:1(0) ack 13141 win 65535 <sack 16061:17521 20441:26281,nop,nop>
+   +0 > . 13141:16061(2920) ack 1
+   +0 > P. 17521:20441(2920) ack 1
+ +.01 < . 1:1(0) ack 26281 win 65535
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt
new file mode 100644
index 000000000000..f185e1ac57ea
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb.
+// This variant tests non-FACK SACK with SACKs coming in the order
+// 2 6 8 3 9, to test what happens when we get a new SACKed range
+// (for packet 3) that is on the right of an existing SACKed range
+// (for packet 2).
+
+`./defaults.sh`
+
+// Establish a connection and send 10 MSS.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 1024
+   +0 accept(3, ..., ...) = 4
+
+   +0 write(4, ..., 10000) = 10000
+   +0 > P. 1:10001(10000) ack 1
+
+  +.1 < . 1:1(0) ack 1 win 257 <sack 2001:3001,nop,nop>
++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001,nop,nop>
++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001 8001:9001,nop,nop>
+
+// 3 SACKed packets, so we enter Fast Recovery.
+   +0 > . 1:1001(1000) ack 1
+   +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }%
+   +0 %{ assert tcpi_lost == 6, tcpi_lost }%
+
+// SACK for 3001:4001.
+// This SACK for an adjacent range causes the sender to
+// shift the newly-SACKed range onto the previous skb.
++.007 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:9001,nop,nop>
+   +0 > . 1001:2001(1000) ack 1
+   +0 %{ assert tcpi_lost == 5, tcpi_lost }%
+   +0 %{ assert tcpi_reordering == 6, tcpi_reordering }%   // 8001:9001 -> 3001:4001 is 6
+
+// SACK for 9001:10001.
+ +.01 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop>
+   +0 %{ assert tcpi_lost == 5, tcpi_lost }%
+
+// ACK for 1:1001 as packets from t=0.303 arrive.
++.083 < . 1:1(0) ack 1001 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop>
+   +0 %{ assert tcpi_lost == 4,tcpi_lost }%
+
+// ACK for 1:4001 as packets from t=0.310 arrive.
++.017 < . 1:1(0) ack 4001 win 257 <sack 6001:7001 8001:10001,nop,nop>
+   +0 %{ assert tcpi_lost == 3,tcpi_lost }%
+
+// ACK for 1:7001 as packets from t=0.320 arrive.
+ +.01 < . 1:1(0) ack 7001 win 257 <sack 8001:10001,nop,nop>
+
+// ACK for all data as packets from t=0.403 arrive.
+  +.1 < . 1:1(0) ack 10001 win 257
+   +0 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_unacked == 0, tcpi_unacked
+assert tcpi_sacked == 0, tcpi_sacked
+assert tcpi_lost == 0, tcpi_lost
+assert tcpi_retrans == 0, tcpi_retrans
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt
new file mode 100644
index 000000000000..0093b4973934
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb.
+// This variant tests the case where we mark packets 0-4 lost, then
+// get a SACK for 3, and then a SACK for 4.
+
+`./defaults.sh`
+
+// Establish a connection and send 10 MSS.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 1024
+   +0 accept(3, ..., ...) = 4
+
+   +0 write(4, ..., 10000) = 10000
+   +0 > P. 1:10001(10000) ack 1
+
+// SACK for 7001:8001. Using RACK we delay the fast retransmit.
+  +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop>
+// RACK reordering timer
++.027 > . 1:1001(1000) ack 1
+   +0 %{
+assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state
+assert tcpi_lost == 7, tcpi_lost  # RACK thinks 1:7001 are lost
+assert tcpi_reordering == 3, tcpi_reordering
+}%
+
+// SACK for 3001:4001.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:4001 7001:8001,nop,nop>
+   +0 > . 1001:2001(1000) ack 1
+   +0 %{
+assert tcpi_lost == 6, tcpi_lost              # since 3001:4001 is no longer lost
+assert tcpi_reordering == 5, tcpi_reordering  # 7001:8001 -> 3001:4001
+}%
+
+// SACK for 4001:5001.
+// This SACK for an adjacent range causes the sender to
+// shift the newly-SACKed range onto the previous skb.
+// It uses the RFC3517 algorithm to mark 1:3001 lost
+// because >=3 higher-sequence packets are SACKed.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:8001,nop,nop>
+   +0 > . 2001:3001(1000) ack 1
+   +0 %{
+assert tcpi_lost == 5,tcpi_lost         # SACK/RFC3517 thinks 1:3001 are lost
+}%
+
+// SACK for 8001:9001.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:9001,nop,nop>
+
+// SACK for 9001:10001.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:10001,nop,nop>
+   +0 > . 5001:6001(1000) ack 1
+
+// To simplify clean-up, say we get an ACK for all data.
+  +.1 < . 1:1(0) ack 10001 win 257
+   +0 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_unacked == 0, tcpi_unacked
+assert tcpi_sacked == 0, tcpi_sacked
+assert tcpi_lost == 0, tcpi_lost
+assert tcpi_retrans == 0, tcpi_retrans
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt
new file mode 100644
index 000000000000..980a832dc81c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb.
+// This variant tests the case where we mark packets 0-4 lost, then
+// get a SACK for 5, and then a SACK for 6.
+
+`./defaults.sh`
+
+// Establish a connection and send 10 MSS.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 1024
+   +0 accept(3, ..., ...) = 4
+
+   +0 write(4, ..., 10000) = 10000
+   +0 > P. 1:10001(10000) ack 1
+
+// SACK for 7001:8001. Using RACK we delay a fast retransmit.
+  +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop>
++.027 > . 1:1001(1000) ack 1
+   +0 %{
+assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state
+assert tcpi_lost == 7,tcpi_lost         # RACK thinks 1:7001 are lost
+assert tcpi_reordering == 3, tcpi_reordering
+}%
+
+// SACK for 5001:6001.
+   +0 < . 1:1(0) ack 1 win 257 <sack 5001:6001 7001:8001,nop,nop>
+   +0 > . 1001:2001(1000) ack 1
+   +0 %{
+assert tcpi_lost == 6, tcpi_lost
+assert tcpi_reordering == 3, tcpi_reordering  # 7001:8001 -> 5001:6001 is 3
+}%
+
+// SACK for 6001:7001.
+// This SACK for an adjacent range causes the sender to
+// shift the newly-SACKed range onto the previous skb.
+   +0 < . 1:1(0) ack 1 win 257 <sack 5001:8001,nop,nop>
+   +0 > . 2001:3001(1000) ack 1
+   +0 %{ assert tcpi_lost == 5, tcpi_lost }%
+
+// SACK for 8001:9001.
+   +0 < . 1:1(0) ack 1 win 257 <sack 5001:9001,nop,nop>
+   +0 > . 3001:4001(1000) ack 1
+
+// SACK for 9001:10001.
+   +0 < . 1:1(0) ack 1 win 257 <sack 5001:10001,nop,nop>
+   +0 > . 4001:5001(1000) ack 1
+
+// To simplify clean-up, say we get an ACK for all data.
+  +.1 < . 1:1(0) ack 10001 win 257
+   +0 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_unacked == 0, tcpi_unacked
+assert tcpi_sacked == 0, tcpi_sacked
+assert tcpi_lost == 0, tcpi_lost
+assert tcpi_retrans == 0, tcpi_retrans
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt
new file mode 100644
index 000000000000..6740859a1360
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+// Simplest possible test of open() and then sendfile().
+// We write some zeroes into a file (since packetdrill expects payloads
+// to be all zeroes) and then open() the file, then use sendfile()
+// and verify that the correct number of zeroes goes out.
+
+`./defaults.sh
+/bin/rm -f /tmp/testfile
+/bin/dd bs=1 count=5 if=/dev/zero of=/tmp/testfile status=none
+`
+
+// Initialize connection
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 514
+
+   +0 accept(3, ..., ...) = 4
+
+   +0 open("/tmp/testfile", O_RDONLY) = 5
+   +0 sendfile(4, 5, [0], 5) = 5
+   +0 > P. 1:6(5) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt
new file mode 100644
index 000000000000..0cbd43253236
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+`./defaults.sh`
+
+// Initialize a server socket
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 setsockopt(3, SOL_IP, IP_FREEBIND, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+// Connection should get accepted
+   +0 < S 0:0(0) win 32972 <mss 1460,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <...>
+   +0 < . 1:1(0) ack 1 win 257
+   +0 accept(3, ..., ...) = 4
+
+   +0 pipe([5, 6]) = 0
+   +0 < U. 1:101(100) ack 1 win 257 urg 100
+   +0 splice(4, NULL, 6, NULL, 99, 0) = 99
+   +0 splice(4, NULL, 6, NULL, 1, 0) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt
new file mode 100644
index 000000000000..8940726a3ec2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP fastopen behavior with NULL as buffer pointer, but a non-zero
+// buffer length.
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0`
+
+// Cache warmup: send a Fast Open cookie request
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress)
++0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO,nop,nop>
++0 < S. 123:123(0) ack 1 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6,FO abcd1234,nop,nop>
++0 > . 1:1(0) ack 1
++0 close(3) = 0
++0 > F. 1:1(0) ack 1
++0 < F. 1:1(0) ack 2 win 92
++0 > .  2:2(0) ack 2
+
+// Test with MSG_FASTOPEN without TCP_FASTOPEN_CONNECT.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 sendto(4, NULL, 1, MSG_FASTOPEN, ..., ...) = -1
++0 close(4) = 0
+
+// Test with TCP_FASTOPEN_CONNECT without MSG_FASTOPEN.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5
++0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 setsockopt(5, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
++0 connect(5, ..., ...) = 0
++0 sendto(5, NULL, 1, 0, ..., ...) = -1
++0 close(5) = 0
+
+// Test with both TCP_FASTOPEN_CONNECT and MSG_FASTOPEN.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 6
++0 fcntl(6, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 setsockopt(6, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
++0 connect(6, ..., ...) = 0
++0 sendto(6, NULL, 1, MSG_FASTOPEN, ..., ...) = -1
++0 close(6) = 0
+
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
new file mode 100644
index 000000000000..b2b2cdf27e20
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we correctly skip zero-length IOVs.
+`./defaults.sh`
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 257
+   +0 accept(3, ..., ...) = 4
+   +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+
+   +0 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(4)=[{..., 0}, {..., 40}, {..., 0}, {..., 20}],
+                  msg_flags=0}, 0) = 60
+   +0 > P. 1:61(60) ack 1
+ +.01 < . 1:1(0) ack 61 win 257
+
+   +0 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(4)=[{..., 0}, {..., 0}, {..., 0}, {..., 0}],
+                  msg_flags=0}, MSG_ZEROCOPY) = 0
+
+   +0 sendmsg(4, {msg_name(...)=...,
+                  msg_iov(4)=[{..., 0}, {..., 10}, {..., 0}, {..., 50}],
+                  msg_flags=0}, MSG_ZEROCOPY) = 60
+   +0 > P. 61:121(60) ack 1
+ +.01 < . 1:1(0) ack 121 win 257
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt
new file mode 100644
index 000000000000..59f5903f285c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test kernel behavior with NULL as buffer pointer
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.2 < . 1:1(0) ack 1 win 514
+
+   +0 accept(3, ..., ...) = 4
+
+   +0 write(4, NULL, 1000) = -1 EFAULT (Bad address)
+   +0 send(4, NULL, 1000, 0) = -1 EFAULT (Bad address)
+   +0 sendto(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address)
+
+   +0 < . 1:1001(1000) ack 1 win 200
+   +0 read(4, NULL, 1000) = -1 EFAULT (Bad address)
+   +0 recv(4, NULL, 1000, 0) = -1 EFAULT (Bad address)
+   +0 recvfrom(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt
new file mode 100644
index 000000000000..d7fdb43a8e89
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test tcpi_last_data_recv for active session
+`./defaults.sh`
+
+// Create a socket and set it to non-blocking.
++0    socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0    fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
++0    fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
++0    connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
++0    > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.030 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8>
++0    > . 1:1(0) ack 1
+
++1 %{ assert 990 <= tcpi_last_data_recv <= 1010, tcpi_last_data_recv }%
+
++0    < . 1:1001(1000) ack 1 win 300
++0    > . 1:1(0) ack 1001
+
++0 %{ assert tcpi_last_data_recv <= 10, tcpi_last_data_recv }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt
new file mode 100644
index 000000000000..a9bcd46f6cb6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test rwnd limited time in tcp_info for client side.
+
+`./defaults.sh`
+
+// Create a socket and set it to non-blocking.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+   +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+   +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+   +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+
+// Server advertises 0 receive window.
+ +.01 < S. 0:0(0) ack 1 win 0 <mss 1000,nop,nop,sackOK>
+
+   +0 > . 1:1(0) ack 1
+   +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+   +0 fcntl(3, F_SETFL, O_RDWR) = 0   // set back to blocking
+
+// Make sure that initial rwnd limited time is 0.
+   +0 %{ assert tcpi_rwnd_limited == 0, tcpi_rwnd_limited }%
+
+// Receive window limited time starts here.
+   +0 write(3, ..., 1000) = 1000
+
+// Check that rwnd limited time in tcp_info is around 0.1s.
+  +.1 %{ assert 98000 <= tcpi_rwnd_limited <= 110000, tcpi_rwnd_limited }%
+
+// Server opens the receive window.
+  +.1 < . 1:1(0) ack 1 win 2000
+
+// Check that rwnd limited time in tcp_info is around 0.2s.
+   +0 %{ assert 198000 <= tcpi_rwnd_limited <= 210000, tcpi_rwnd_limited }%
+
+   +0 > P. 1:1001(1000) ack 1
+
+// Server advertises a very small receive window.
+ +.03 < . 1:1(0) ack 1001 win 10
+
+// Receive window limited time starts again.
+   +0 write(3, ..., 1000) = 1000
+
+// Server opens the receive window again.
+  +.1 < . 1:1(0) ack 1001 win 2000
+// Check that rwnd limited time in tcp_info is around 0.3s
+// and busy time is 0.3 + 0.03 (server opened small window temporarily).
+   +0 %{ assert 298000 <= tcpi_rwnd_limited <= 310000, tcpi_rwnd_limited;\
+         assert 328000 <= tcpi_busy_time <= 340000, tcpi_busy_time;\
+}%
+
+   +0 > P. 1001:2001(1000) ack 1
+ +.02 < . 1:1(0) ack 2001 win 2000
+   +0 %{ assert 348000 <= tcpi_busy_time <= 360000, tcpi_busy_time }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt
new file mode 100644
index 000000000000..f0de2acd0f8e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test send-buffer-limited time in tcp_info for client side.
+`./defaults.sh`
+
+// Create a socket and set it to non-blocking.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+   +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+   +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+   +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8>
+   +0 > . 1:1(0) ack 1
+   +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+   +0 fcntl(3, F_SETFL, O_RDWR) = 0   // set back to blocking
+   +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [10000], 4) = 0
+   +0 getsockopt(3, SOL_SOCKET, SO_SNDBUF, [20000], [4]) = 0
+
+ +.09...0.14 write(3, ..., 150000) = 150000
+
+ +.01 < . 1:1(0) ack 10001 win 10000
+
+ +.01 < . 1:1(0) ack 30001 win 10000
+
+// cwnd goes from 40(60KB) to 80(120KB), and that we hit the tiny sndbuf limit 10KB
+ +.01 < . 1:1(0) ack 70001 win 10000
+
+ +.02 < . 1:1(0) ack 95001 win 10000
+   +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited; \
+	 assert 49000 <= tcpi_busy_time <= 52000, tcpi_busy_time; \
+	 assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }%
+
+// This ack frees up enough buffer so we are no longer
+// buffer limited (socket flag SOCK_NOSPACE is cleared)
+ +.02 < . 1:1(0) ack 150001 win 10000
+   +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited;\
+	 assert 69000 <= tcpi_busy_time <= 73000, tcpi_busy_time;\
+	 assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt
new file mode 100644
index 000000000000..2087ec0c746a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that tx timestamping sends timestamps only for
+// the last byte of each sendmsg.
+`./defaults.sh
+`
+
+// Create a socket and set it to non-blocking.
+    0	socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0	fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+   +0	fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Establish connection and verify that there was no error.
+   +0	connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+   +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 20000 <mss 1000,nop,nop,sackOK>
+   +0 > . 1:1(0) ack 1
+   +0	getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+   +0	fcntl(3, F_SETFL, O_RDWR) = 0   // set back to blocking
+
+   +0	setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING,
+		   [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
+		    SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+		    SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+   +0	write(3, ..., 11000) = 11000
+   +0	> P. 1:10001(10000) ack 1
+ +.01	< . 1:1(0) ack 10001 win 4000
+   +0	> P. 10001:11001(1000) ack 1
+ +.01	< . 1:1(0) ack 11001 win 4000
+
+// Make sure that internal TCP timestamps are not overwritten and we have sane
+// RTT measurement.
+   +0	%{
+assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt
+}%
+
+// SCM_TSTAMP_SCHED for the last byte should be received almost immediately
+// once 10001 is acked at t=20ms.
+// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...)
+// is called after when SYN is acked. So, we expect the last byte of the first
+// chunk to have a timestamp key of 10999 (i.e., 11000 - 1).
+   +0	recvmsg(3, {msg_name(...)=...,
+		    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+                    msg_control=[
+			{cmsg_level=SOL_SOCKET,
+			 cmsg_type=SCM_TIMESTAMPING,
+			 cmsg_data={scm_sec=0,scm_nsec=20000000}},
+			{cmsg_level=CMSG_LEVEL_IP,
+			 cmsg_type=CMSG_TYPE_RECVERR,
+			 cmsg_data={ee_errno=ENOMSG,
+				    ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+				    ee_type=0,
+				    ee_code=0,
+				    ee_info=SCM_TSTAMP_SCHED,
+				    ee_data=10999}}
+		    ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the last byte should be received almost immediately
+// once 10001 is acked at t=20ms.
+   +0	recvmsg(3, {msg_name(...)=...,
+		    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+                    msg_control=[
+			{cmsg_level=SOL_SOCKET,
+			 cmsg_type=SCM_TIMESTAMPING,
+			 cmsg_data={scm_sec=0,scm_nsec=20000000}},
+			{cmsg_level=CMSG_LEVEL_IP,
+			 cmsg_type=CMSG_TYPE_RECVERR,
+			 cmsg_data={ee_errno=ENOMSG,
+				    ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+				    ee_type=0,
+				    ee_code=0,
+				    ee_info=SCM_TSTAMP_SND,
+				    ee_data=10999}}
+		    ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the last byte should be received at t=30ms.
+   +0	recvmsg(3, {msg_name(...)=...,
+		    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+                    msg_control=[
+			{cmsg_level=SOL_SOCKET,
+			 cmsg_type=SCM_TIMESTAMPING,
+			 cmsg_data={scm_sec=0,scm_nsec=30000000}},
+			{cmsg_level=CMSG_LEVEL_IP,
+			 cmsg_type=CMSG_TYPE_RECVERR,
+			 cmsg_data={ee_errno=ENOMSG,
+				    ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+				    ee_type=0,
+				    ee_code=0,
+				    ee_info=SCM_TSTAMP_ACK,
+				    ee_data=10999}}
+		    ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt
new file mode 100644
index 000000000000..876024a31110
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test tx timestamping for partial writes (IPv4).
+`./defaults.sh
+`
+
+// Create a socket and set it to non-blocking.
+    0	socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0	fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+   +0	fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Establish connection and verify that there was no error.
+   +0	connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+   +0	> S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01	< S. 0:0(0) ack 1 win 2000 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7>
+   +0	> . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700>
+   +0	getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+
+   +0	setsockopt(3, SOL_SOCKET, SO_SNDBUF, [1000], 4) = 0
+   +0	setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING,
+		   [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
+		    SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+		    SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+// We have a partial write.
+   +0	write(3, ..., 10000) = 2964
+   +0	> . 1:989(988) ack 1 <nop,nop,TS val 110 ecr 700>
+   +0	> P. 989:1977(988) ack 1 <nop,nop,TS val 110 ecr 700>
+ +.01	< . 1:1(0) ack 1977 win 92 <nop,nop,TS val 800 ecr 200>
+   +0	> P. 1977:2965(988) ack 1 <nop,nop,TS val 114 ecr 800>
+ +.01	< . 1:1(0) ack 2965 win 92 <nop,nop,TS val 800 ecr 200>
+
+// Make sure that internal TCP timestamps are not overwritten and we have sane
+// RTT measurement.
+   +0	%{
+assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt
+}%
+
+// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately
+// after the first ack at t=20ms.
+   +0	recvmsg(3, {msg_name(...)=...,
+		    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+                    msg_control=[
+			{cmsg_level=SOL_SOCKET,
+			 cmsg_type=SCM_TIMESTAMPING,
+			 cmsg_data={scm_sec=0,scm_nsec=20000000}},
+			{cmsg_level=CMSG_LEVEL_IP,
+			 cmsg_type=CMSG_TYPE_RECVERR,
+			 cmsg_data={ee_errno=ENOMSG,
+				    ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+				    ee_type=0,
+				    ee_code=0,
+				    ee_info=SCM_TSTAMP_SCHED,
+				    ee_data=2963}}
+		    ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the first chunk should be received almost immediately
+// after the first ack at t=20ms.
+   +0	recvmsg(3, {msg_name(...)=...,
+		    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+                    msg_control=[
+			{cmsg_level=SOL_SOCKET,
+			 cmsg_type=SCM_TIMESTAMPING,
+			 cmsg_data={scm_sec=0,scm_nsec=20000000}},
+			{cmsg_level=CMSG_LEVEL_IP,
+			 cmsg_type=CMSG_TYPE_RECVERR,
+			 cmsg_data={ee_errno=ENOMSG,
+				    ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+				    ee_type=0,
+				    ee_code=0,
+				    ee_info=SCM_TSTAMP_SND,
+				    ee_data=2963}}
+		    ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the first chunk should be received after the last ack at
+// t=30ms.
+   +0	recvmsg(3, {msg_name(...)=...,
+		    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+                    msg_control=[
+			{cmsg_level=SOL_SOCKET,
+			 cmsg_type=SCM_TIMESTAMPING,
+			 cmsg_data={scm_sec=0,scm_nsec=30000000}},
+			{cmsg_level=CMSG_LEVEL_IP,
+			 cmsg_type=CMSG_TYPE_RECVERR,
+			 cmsg_data={ee_errno=ENOMSG,
+				    ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+				    ee_type=0,
+				    ee_code=0,
+				    ee_info=SCM_TSTAMP_ACK,
+				    ee_data=2963}}
+		    ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt
new file mode 100644
index 000000000000..84d94780e6be
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test tx timestamping for server-side (IPv4).
+`./defaults.sh
+`
+
+// Initialize connection
+    0	socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0	setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0	bind(3, ..., ...) = 0
+   +0	listen(3, 1) = 0
+
+   +0	< S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+   +0	> S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01	< . 1:1(0) ack 1 win 514
+
+   +0	accept(3, ..., ...) = 4
+   +0	setsockopt(4, SOL_SOCKET, SO_TIMESTAMPING,
+		   [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
+		    SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+		    SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+// Write two 2KB chunks.
+// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...)
+// is called after when SYN is acked. So, we expect the last byte of the first
+// and the second chunks to have timestamp keys of 1999 (i.e., 2000 - 1) and
+// 3999 (i.e., 4000 - 1) respectively.
+   +0	write(4, ..., 2000) = 2000
+   +0	write(4, ..., 2000) = 2000
+   +0	> P. 1:2001(2000) ack 1
+   +0	> P. 2001:4001(2000) ack 1
+ +.01	< .  1:1(0) ack 2001 win 514
+ +.01	< .  1:1(0) ack 4001 win 514
+
+// Make sure that internal TCP timestamps are not overwritten and we have sane
+// RTT measurement.
+   +0	%{
+assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt
+}%
+
+// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately
+// after write at t=10ms.
+   +0	recvmsg(4, {msg_name(...)=...,
+		    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+                    msg_control=[
+			{cmsg_level=SOL_SOCKET,
+			 cmsg_type=SCM_TIMESTAMPING,
+			 cmsg_data={scm_sec=0,scm_nsec=10000000}},
+			{cmsg_level=CMSG_LEVEL_IP,
+			 cmsg_type=CMSG_TYPE_RECVERR,
+			 cmsg_data={ee_errno=ENOMSG,
+				    ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+				    ee_type=0,
+				    ee_code=0,
+				    ee_info=SCM_TSTAMP_SCHED,
+				    ee_data=1999}}
+		    ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the first chunk should be received almost immediately
+// after write at t=10ms.
+   +0	recvmsg(4, {msg_name(...)=...,
+		    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+                    msg_control=[
+			{cmsg_level=SOL_SOCKET,
+			 cmsg_type=SCM_TIMESTAMPING,
+			 cmsg_data={scm_sec=0,scm_nsec=10000000}},
+			{cmsg_level=CMSG_LEVEL_IP,
+			 cmsg_type=CMSG_TYPE_RECVERR,
+			 cmsg_data={ee_errno=ENOMSG,
+				    ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+				    ee_type=0,
+				    ee_code=0,
+				    ee_info=SCM_TSTAMP_SND,
+				    ee_data=1999}}
+		    ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SCHED for the second chunk should be received almost immediately
+// after that at t=10ms.
+   +0	recvmsg(4, {msg_name(...)=...,
+		    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+                    msg_control=[
+			{cmsg_level=SOL_SOCKET,
+			 cmsg_type=SCM_TIMESTAMPING,
+			 cmsg_data={scm_sec=0,scm_nsec=10000000}},
+			{cmsg_level=CMSG_LEVEL_IP,
+			 cmsg_type=CMSG_TYPE_RECVERR,
+			 cmsg_data={ee_errno=ENOMSG,
+				    ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+				    ee_type=0,
+				    ee_code=0,
+				    ee_info=SCM_TSTAMP_SCHED,
+				    ee_data=3999}}
+		    ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the second chunk should be received almost immediately
+// after that at t=10ms.
+   +0	recvmsg(4, {msg_name(...)=...,
+		    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+                    msg_control=[
+			{cmsg_level=SOL_SOCKET,
+			 cmsg_type=SCM_TIMESTAMPING,
+			 cmsg_data={scm_sec=0,scm_nsec=10000000}},
+			{cmsg_level=CMSG_LEVEL_IP,
+			 cmsg_type=CMSG_TYPE_RECVERR,
+			 cmsg_data={ee_errno=ENOMSG,
+				    ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+				    ee_type=0,
+				    ee_code=0,
+				    ee_info=SCM_TSTAMP_SND,
+				    ee_data=3999}}
+		    ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the first chunk should be received at t=20ms.
+   +0	recvmsg(4, {msg_name(...)=...,
+		    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+                    msg_control=[
+			{cmsg_level=SOL_SOCKET,
+			 cmsg_type=SCM_TIMESTAMPING,
+			 cmsg_data={scm_sec=0,scm_nsec=20000000}},
+			{cmsg_level=CMSG_LEVEL_IP,
+			 cmsg_type=CMSG_TYPE_RECVERR,
+			 cmsg_data={ee_errno=ENOMSG,
+				    ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+				    ee_type=0,
+				    ee_code=0,
+				    ee_info=SCM_TSTAMP_ACK,
+				    ee_data=1999}}
+		    ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the second chunk should be received at t=30ms.
+   +0	recvmsg(4, {msg_name(...)=...,
+		    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+                    msg_control=[
+			{cmsg_level=SOL_SOCKET,
+			 cmsg_type=SCM_TIMESTAMPING,
+			 cmsg_data={scm_sec=0,scm_nsec=30000000}},
+			{cmsg_level=CMSG_LEVEL_IP,
+			 cmsg_type=CMSG_TYPE_RECVERR,
+			 cmsg_data={ee_errno=ENOMSG,
+				    ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+				    ee_type=0,
+				    ee_code=0,
+				    ee_info=SCM_TSTAMP_ACK,
+				    ee_data=3999}}
+		    ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt
new file mode 100644
index 000000000000..e61424a7bd0a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we send FIN packet with correct TSval
+--tcp_ts_tick_usecs=1000
+--tolerance_usecs=7000
+
+`./defaults.sh`
+
+// Create a socket.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+// Establish a connection.
+   +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0>
+   +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100>
+  +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100>
+   +0 accept(3, ..., ...) = 4
+
+   +1 close(4) = 0
+// Check that FIN TSval is updated properly, one second has passed since last sent packet.
+   +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1200 ecr 200>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt
new file mode 100644
index 000000000000..174ce9a1bfc0
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we reject TS val updates on a packet with invalid ACK sequence
+
+`./defaults.sh
+`
+
+// Create a socket.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+// Establish a connection.
+  +.1 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0>
+   +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100>
+  +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100>
+   +0 accept(3, ..., ...) = 4
+
+// bad packet with high tsval (its ACK sequence is above our sndnxt)
+   +0 < F. 1:1(0) ack 9999 win 20000 <nop,nop,TS val 200000 ecr 100>
+
+
+   +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100>
+   +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt
new file mode 100644
index 000000000000..2e3b3bb7493a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we send RST packet with correct TSval
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh`
+
+// Create a socket.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+// Establish a connection.
+   +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0>
+   +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100>
+  +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100>
+   +0 accept(3, ..., ...) = 4
+
+   +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100>
+   +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201>
+
+   +1 close(4) = 0
+// Check that RST TSval is updated properly, one second has passed since last sent packet.
+   +0 > R. 1:1(0) ack 1001 <nop,nop,TS val 1200 ecr 201>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
new file mode 100644
index 000000000000..183051ba0cae
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+
+   +0 < S 0:0(0) win 0 <mss 1460>
+   +0 > S. 0:0(0) ack 1 <mss 1460>
+
+  +.1 < . 1:1(0) ack 1 win 65530
+   +0 accept(3, ..., ...) = 4
+
+   +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0
+   +0 write(4, ..., 24) = 24
+   +0 > P. 1:25(24) ack 1
+   +.1 < . 1:1(0) ack 25 win 65530
+   +0 %{ assert tcpi_probes == 0, tcpi_probes; \
+         assert tcpi_backoff == 0, tcpi_backoff }%
+
+// install a qdisc dropping all packets
+   +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0`
+   +0 write(4, ..., 24) = 24
+   // When qdisc is congested we retry every 500ms
+   // (TCP_RESOURCE_PROBE_INTERVAL) and therefore
+   // we retry 6 times before hitting 3s timeout.
+   // First verify that the connection is alive:
++3.250 write(4, ..., 24) = 24
+   // Now verify that shortly after that the socket is dead:
+ +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out)
+
+   +0 %{ assert tcpi_probes == 6, tcpi_probes; \
+         assert tcpi_backoff == 0, tcpi_backoff }%
+   +0 close(4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt
new file mode 100644
index 000000000000..2efe02bfba9c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+`./defaults.sh`
+
+// Initialize connection
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+  +.1 < . 1:1(0) ack 1 win 32792
+
+
+   +0 accept(3, ..., ...) = 4
+
+// Okay, we received nothing, and decide to close this idle socket.
+// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth
+// trying hard to cleanly close this flow, at the price of keeping
+// a TCP structure in kernel for about 1 minute !
+   +2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0
+   +0 close(4) = 0
+
+   +0 > F. 1:1(0) ack 1
+  +.3~+.400 > F. 1:1(0) ack 1
+  +.3~+.400 > F. 1:1(0) ack 1
+  +.6~+.800 > F. 1:1(0) ack 1
+
+// We finally receive something from the peer, but it is way too late
+// Our socket vanished because TCP_USER_TIMEOUT was really small
+   +0 < . 1:2(1) ack 1 win 32792
+   +0 > R 1:1(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt
new file mode 100644
index 000000000000..8bd60226ccfc
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Verify that established connections drop a segment without the ACK flag set.
+
+`./defaults.sh`
+
+// Create a socket.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+// Establish a connection.
+   +0 < S 0:0(0) win 20000 <mss 1000,sackOK,nop,nop>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +.01 < . 1:1(0) ack 1 win 20000
+   +0 accept(3, ..., ...) = 4
+
+// Receive a segment with no flags set, verify that it's not enqueued.
+ +.01 < - 1:1001(1000) win 20000
+   +0 ioctl(4, SIOCINQ, [0]) = 0
+
+// Receive a segment with ACK flag set, verify that it is enqueued.
+ +.01 < . 1:1001(1000) ack 1 win 20000
+   +0 ioctl(4, SIOCINQ, [1000]) = 0
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 1a706d03bb6b..9a85f93c33d8 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -44,9 +44,11 @@ struct tls_crypto_info_keys {
 };
 
 static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type,
-				 struct tls_crypto_info_keys *tls12)
+				 struct tls_crypto_info_keys *tls12,
+				 char key_generation)
 {
-	memset(tls12, 0, sizeof(*tls12));
+	memset(tls12, key_generation, sizeof(*tls12));
+	memset(tls12, 0, sizeof(struct tls_crypto_info));
 
 	switch (cipher_type) {
 	case TLS_CIPHER_CHACHA20_POLY1305:
@@ -275,7 +277,7 @@ TEST_F(tls_basic, recseq_wrap)
 	if (self->notls)
 		SKIP(return, "no TLS support");
 
-	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12);
+	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0);
 	memset(&tls12.aes128.rec_seq, 0xff, sizeof(tls12.aes128.rec_seq));
 
 	ASSERT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
@@ -391,7 +393,7 @@ FIXTURE_SETUP(tls)
 		SKIP(return, "Unsupported cipher in FIPS mode");
 
 	tls_crypto_info_init(variant->tls_version, variant->cipher_type,
-			     &tls12);
+			     &tls12, 0);
 
 	ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
 
@@ -1175,7 +1177,7 @@ TEST_F(tls, bidir)
 		struct tls_crypto_info_keys tls12;
 
 		tls_crypto_info_init(variant->tls_version, variant->cipher_type,
-				     &tls12);
+				     &tls12, 0);
 
 		ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
 				 tls12.len);
@@ -1614,7 +1616,7 @@ TEST_F(tls, getsockopt)
 	EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type);
 
 	/* get the full crypto_info */
-	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect);
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect, 0);
 	len = expect.len;
 	memrnd(&get, sizeof(get));
 	EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0);
@@ -1668,6 +1670,464 @@ TEST_F(tls, recv_efault)
 		EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0);
 }
 
+#define TLS_RECORD_TYPE_HANDSHAKE      0x16
+/* key_update, length 1, update_not_requested */
+static const char key_update_msg[] = "\x18\x00\x00\x01\x00";
+static void tls_send_keyupdate(struct __test_metadata *_metadata, int fd)
+{
+	size_t len = sizeof(key_update_msg);
+
+	EXPECT_EQ(tls_send_cmsg(fd, TLS_RECORD_TYPE_HANDSHAKE,
+				(char *)key_update_msg, len, 0),
+		  len);
+}
+
+static void tls_recv_keyupdate(struct __test_metadata *_metadata, int fd, int flags)
+{
+	char buf[100];
+
+	EXPECT_EQ(tls_recv_cmsg(_metadata, fd, TLS_RECORD_TYPE_HANDSHAKE, buf, sizeof(buf), flags),
+		  sizeof(key_update_msg));
+	EXPECT_EQ(memcmp(buf, key_update_msg, sizeof(key_update_msg)), 0);
+}
+
+/* set the key to 0 then 1 for RX, immediately to 1 for TX */
+TEST_F(tls_basic, rekey_rx)
+{
+	struct tls_crypto_info_keys tls12_0, tls12_1;
+	char const *test_str = "test_message";
+	int send_len = strlen(test_str) + 1;
+	char buf[20];
+	int ret;
+
+	if (self->notls)
+		return;
+
+	tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+			     &tls12_0, 0);
+	tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+			     &tls12_1, 1);
+
+	ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len);
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_0, tls12_0.len);
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len);
+	EXPECT_EQ(ret, 0);
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+/* set the key to 0 then 1 for TX, immediately to 1 for RX */
+TEST_F(tls_basic, rekey_tx)
+{
+	struct tls_crypto_info_keys tls12_0, tls12_1;
+	char const *test_str = "test_message";
+	int send_len = strlen(test_str) + 1;
+	char buf[20];
+	int ret;
+
+	if (self->notls)
+		return;
+
+	tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+			     &tls12_0, 0);
+	tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+			     &tls12_1, 1);
+
+	ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_0, tls12_0.len);
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len);
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len);
+	EXPECT_EQ(ret, 0);
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, rekey)
+{
+	char const *test_str_1 = "test_message_before_rekey";
+	char const *test_str_2 = "test_message_after_rekey";
+	struct tls_crypto_info_keys tls12;
+	int send_len;
+	char buf[100];
+
+	if (variant->tls_version != TLS_1_3_VERSION)
+		return;
+
+	/* initial send/recv */
+	send_len = strlen(test_str_1) + 1;
+	EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+	EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+	/* update TX key */
+	tls_send_keyupdate(_metadata, self->fd);
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+	/* send after rekey */
+	send_len = strlen(test_str_2) + 1;
+	EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len);
+
+	/* can't receive the KeyUpdate without a control message */
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+
+	/* get KeyUpdate */
+	tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+	/* recv blocking -> -EKEYEXPIRED */
+	EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EKEYEXPIRED);
+
+	/* recv non-blocking -> -EKEYEXPIRED */
+	EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1);
+	EXPECT_EQ(errno, EKEYEXPIRED);
+
+	/* update RX key */
+	EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+	/* recv after rekey */
+	EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0);
+}
+
+TEST_F(tls, rekey_fail)
+{
+	char const *test_str_1 = "test_message_before_rekey";
+	char const *test_str_2 = "test_message_after_rekey";
+	struct tls_crypto_info_keys tls12;
+	int send_len;
+	char buf[100];
+
+	/* initial send/recv */
+	send_len = strlen(test_str_1) + 1;
+	EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+	EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+	/* update TX key */
+	tls_send_keyupdate(_metadata, self->fd);
+
+	if (variant->tls_version != TLS_1_3_VERSION) {
+		/* just check that rekey is not supported and return */
+		tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+		EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1);
+		EXPECT_EQ(errno, EBUSY);
+		return;
+	}
+
+	/* successful update */
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+	/* invalid update: change of version */
+	tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1);
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	/* invalid update (RX socket): change of version */
+	tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1);
+	EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	/* invalid update: change of cipher */
+	if (variant->cipher_type == TLS_CIPHER_AES_GCM_256)
+		tls_crypto_info_init(variant->tls_version, TLS_CIPHER_CHACHA20_POLY1305, &tls12, 1);
+	else
+		tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_256, &tls12, 1);
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	/* send after rekey, the invalid updates shouldn't have an effect */
+	send_len = strlen(test_str_2) + 1;
+	EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len);
+
+	/* can't receive the KeyUpdate without a control message */
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+
+	/* get KeyUpdate */
+	tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+	/* recv blocking -> -EKEYEXPIRED */
+	EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1);
+	EXPECT_EQ(errno, EKEYEXPIRED);
+
+	/* recv non-blocking -> -EKEYEXPIRED */
+	EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1);
+	EXPECT_EQ(errno, EKEYEXPIRED);
+
+	/* update RX key */
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+	EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+	/* recv after rekey */
+	EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0);
+}
+
+TEST_F(tls, rekey_peek)
+{
+	char const *test_str_1 = "test_message_before_rekey";
+	struct tls_crypto_info_keys tls12;
+	int send_len;
+	char buf[100];
+
+	if (variant->tls_version != TLS_1_3_VERSION)
+		return;
+
+	send_len = strlen(test_str_1) + 1;
+	EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+
+	/* update TX key */
+	tls_send_keyupdate(_metadata, self->fd);
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+	EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+	EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+	EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+	/* can't receive the KeyUpdate without a control message */
+	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+
+	/* peek KeyUpdate */
+	tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK);
+
+	/* get KeyUpdate */
+	tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+	/* update RX key */
+	EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+}
+
+TEST_F(tls, splice_rekey)
+{
+	int send_len = TLS_PAYLOAD_MAX_LEN / 2;
+	char mem_send[TLS_PAYLOAD_MAX_LEN];
+	char mem_recv[TLS_PAYLOAD_MAX_LEN];
+	struct tls_crypto_info_keys tls12;
+	int p[2];
+
+	if (variant->tls_version != TLS_1_3_VERSION)
+		return;
+
+	memrnd(mem_send, sizeof(mem_send));
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+
+	/* update TX key */
+	tls_send_keyupdate(_metadata, self->fd);
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+	EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+
+	EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len);
+	EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+
+	/* can't splice the KeyUpdate */
+	EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	/* peek KeyUpdate */
+	tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK);
+
+	/* get KeyUpdate */
+	tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+	/* can't splice before updating the key */
+	EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1);
+	EXPECT_EQ(errno, EKEYEXPIRED);
+
+	/* update RX key */
+	EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+	EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len);
+	EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, rekey_peek_splice)
+{
+	char const *test_str_1 = "test_message_before_rekey";
+	struct tls_crypto_info_keys tls12;
+	int send_len;
+	char buf[100];
+	char mem_recv[TLS_PAYLOAD_MAX_LEN];
+	int p[2];
+
+	if (variant->tls_version != TLS_1_3_VERSION)
+		return;
+
+	ASSERT_GE(pipe(p), 0);
+
+	send_len = strlen(test_str_1) + 1;
+	EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+
+	/* update TX key */
+	tls_send_keyupdate(_metadata, self->fd);
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+	EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+	EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+	EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len);
+	EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+	EXPECT_EQ(memcmp(mem_recv, test_str_1, send_len), 0);
+}
+
+TEST_F(tls, rekey_getsockopt)
+{
+	struct tls_crypto_info_keys tls12;
+	struct tls_crypto_info_keys tls12_get;
+	socklen_t len;
+
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 0);
+
+	len = tls12.len;
+	EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0);
+	EXPECT_EQ(len, tls12.len);
+	EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+
+	len = tls12.len;
+	EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0);
+	EXPECT_EQ(len, tls12.len);
+	EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+
+	if (variant->tls_version != TLS_1_3_VERSION)
+		return;
+
+	tls_send_keyupdate(_metadata, self->fd);
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+	tls_recv_keyupdate(_metadata, self->cfd, 0);
+	EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+	len = tls12.len;
+	EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0);
+	EXPECT_EQ(len, tls12.len);
+	EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+
+	len = tls12.len;
+	EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0);
+	EXPECT_EQ(len, tls12.len);
+	EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+}
+
+TEST_F(tls, rekey_poll_pending)
+{
+	char const *test_str = "test_message_after_rekey";
+	struct tls_crypto_info_keys tls12;
+	struct pollfd pfd = { };
+	int send_len;
+	int ret;
+
+	if (variant->tls_version != TLS_1_3_VERSION)
+		return;
+
+	/* update TX key */
+	tls_send_keyupdate(_metadata, self->fd);
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+	/* get KeyUpdate */
+	tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+	/* send immediately after rekey */
+	send_len = strlen(test_str) + 1;
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+
+	/* key hasn't been updated, expect cfd to be non-readable */
+	pfd.fd = self->cfd;
+	pfd.events = POLLIN;
+	EXPECT_EQ(poll(&pfd, 1, 0), 0);
+
+	ret = fork();
+	ASSERT_GE(ret, 0);
+
+	if (ret) {
+		int pid2, status;
+
+		/* wait before installing the new key */
+		sleep(1);
+
+		/* update RX key while poll() is sleeping */
+		EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+		pid2 = wait(&status);
+		EXPECT_EQ(pid2, ret);
+		EXPECT_EQ(status, 0);
+	} else {
+		pfd.fd = self->cfd;
+		pfd.events = POLLIN;
+		EXPECT_EQ(poll(&pfd, 1, 5000), 1);
+
+		exit(!__test_passed(_metadata));
+	}
+}
+
+TEST_F(tls, rekey_poll_delay)
+{
+	char const *test_str = "test_message_after_rekey";
+	struct tls_crypto_info_keys tls12;
+	struct pollfd pfd = { };
+	int send_len;
+	int ret;
+
+	if (variant->tls_version != TLS_1_3_VERSION)
+		return;
+
+	/* update TX key */
+	tls_send_keyupdate(_metadata, self->fd);
+	tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+	EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+	/* get KeyUpdate */
+	tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+	ret = fork();
+	ASSERT_GE(ret, 0);
+
+	if (ret) {
+		int pid2, status;
+
+		/* wait before installing the new key */
+		sleep(1);
+
+		/* update RX key while poll() is sleeping */
+		EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+		sleep(1);
+		send_len = strlen(test_str) + 1;
+		EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+
+		pid2 = wait(&status);
+		EXPECT_EQ(pid2, ret);
+		EXPECT_EQ(status, 0);
+	} else {
+		pfd.fd = self->cfd;
+		pfd.events = POLLIN;
+		EXPECT_EQ(poll(&pfd, 1, 5000), 1);
+		exit(!__test_passed(_metadata));
+	}
+}
+
 FIXTURE(tls_err)
 {
 	int fd, cfd;
@@ -1696,7 +2156,7 @@ FIXTURE_SETUP(tls_err)
 	int ret;
 
 	tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128,
-			     &tls12);
+			     &tls12, 0);
 
 	ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
 	ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls);
@@ -2118,7 +2578,7 @@ TEST(tls_v6ops) {
 	int sfd, ret, fd;
 	socklen_t len, len2;
 
-	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12);
+	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0);
 
 	addr.sin6_family = AF_INET6;
 	addr.sin6_addr = in6addr_any;
@@ -2177,7 +2637,7 @@ TEST(prequeue) {
 	len = sizeof(addr);
 	memrnd(buf, sizeof(buf));
 
-	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12);
+	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12, 0);
 
 	addr.sin_family = AF_INET;
 	addr.sin_addr.s_addr = htonl(INADDR_ANY);
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 3f2fca02fec5..36ff28af4b19 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -103,6 +103,19 @@ struct testcase testcases_v4[] = {
 		.r_num_mss = 1,
 	},
 	{
+		/* datalen <= MSS < gso_len: will fall back to no GSO */
+		.tlen = CONST_MSS_V4,
+		.gso_len = CONST_MSS_V4 + 1,
+		.r_num_mss = 0,
+		.r_len_last = CONST_MSS_V4,
+	},
+	{
+		/* MSS < datalen < gso_len: fail */
+		.tlen = CONST_MSS_V4 + 1,
+		.gso_len = CONST_MSS_V4 + 2,
+		.tfail = true,
+	},
+	{
 		/* send a single MSS + 1B */
 		.tlen = CONST_MSS_V4 + 1,
 		.gso_len = CONST_MSS_V4,
@@ -206,6 +219,19 @@ struct testcase testcases_v6[] = {
 		.r_num_mss = 1,
 	},
 	{
+		/* datalen <= MSS < gso_len: will fall back to no GSO */
+		.tlen = CONST_MSS_V6,
+		.gso_len = CONST_MSS_V6 + 1,
+		.r_num_mss = 0,
+		.r_len_last = CONST_MSS_V6,
+	},
+	{
+		/* MSS < datalen < gso_len: fail */
+		.tlen = CONST_MSS_V6 + 1,
+		.gso_len = CONST_MSS_V6 + 2,
+		.tfail = true
+	},
+	{
 		/* send a single MSS + 1B */
 		.tlen = CONST_MSS_V6 + 1,
 		.gso_len = CONST_MSS_V6,
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index 640bc43452fa..88fa1d53ba2b 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -92,6 +92,9 @@ run_udp() {
 	echo "udp"
 	run_in_netns ${args}
 
+	echo "udp sendmmsg"
+	run_in_netns ${args} -m
+
 	echo "udp gso"
 	run_in_netns ${args} -S 0
 
diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh
new file mode 100755
index 000000000000..e7cb8c678bde
--- /dev/null
+++ b/tools/testing/selftests/net/vlan_bridge_binding.sh
@@ -0,0 +1,256 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+ALL_TESTS="
+	test_binding_on
+	test_binding_off
+	test_binding_toggle_on
+	test_binding_toggle_off
+	test_binding_toggle_on_when_upper_down
+	test_binding_toggle_off_when_upper_down
+	test_binding_toggle_on_when_lower_down
+	test_binding_toggle_off_when_lower_down
+"
+
+setup_prepare()
+{
+	local port
+
+	ip_link_add br up type bridge vlan_filtering 1
+
+	for port in d1 d2 d3; do
+		ip_link_add $port type veth peer name r$port
+		ip_link_set_up $port
+		ip_link_set_up r$port
+		ip_link_set_master $port br
+	done
+
+	bridge_vlan_add vid 11 dev br self
+	bridge_vlan_add vid 11 dev d1 master
+
+	bridge_vlan_add vid 12 dev br self
+	bridge_vlan_add vid 12 dev d2 master
+
+	bridge_vlan_add vid 13 dev br self
+	bridge_vlan_add vid 13 dev d1 master
+	bridge_vlan_add vid 13 dev d2 master
+
+	bridge_vlan_add vid 14 dev br self
+	bridge_vlan_add vid 14 dev d1 master
+	bridge_vlan_add vid 14 dev d2 master
+	bridge_vlan_add vid 14 dev d3 master
+}
+
+operstate_is()
+{
+	local dev=$1; shift
+	local expect=$1; shift
+
+	local operstate=$(ip -j link show $dev | jq -r .[].operstate)
+	if [[ $operstate == UP ]]; then
+		operstate=1
+	elif [[ $operstate == DOWN || $operstate == LOWERLAYERDOWN ]]; then
+		operstate=0
+	fi
+	echo -n $operstate
+	[[ $operstate == $expect ]]
+}
+
+check_operstate()
+{
+	local dev=$1; shift
+	local expect=$1; shift
+	local operstate
+
+	operstate=$(busywait 1000 \
+			operstate_is "$dev" "$expect")
+	check_err $? "Got operstate of $operstate, expected $expect"
+}
+
+add_one_vlan()
+{
+	local link=$1; shift
+	local id=$1; shift
+
+	ip_link_add $link.$id link $link type vlan id $id "$@"
+}
+
+add_vlans()
+{
+	add_one_vlan br 11 "$@"
+	add_one_vlan br 12 "$@"
+	add_one_vlan br 13 "$@"
+	add_one_vlan br 14 "$@"
+}
+
+set_vlans()
+{
+	ip link set dev br.11 "$@"
+	ip link set dev br.12 "$@"
+	ip link set dev br.13 "$@"
+	ip link set dev br.14 "$@"
+}
+
+down_netdevs()
+{
+	local dev
+
+	for dev in "$@"; do
+		ip_link_set_down $dev
+	done
+}
+
+check_operstates()
+{
+	local opst_11=$1; shift
+	local opst_12=$1; shift
+	local opst_13=$1; shift
+	local opst_14=$1; shift
+
+	check_operstate br.11 $opst_11
+	check_operstate br.12 $opst_12
+	check_operstate br.13 $opst_13
+	check_operstate br.14 $opst_14
+}
+
+do_test_binding()
+{
+	local inject=$1; shift
+	local what=$1; shift
+	local opsts_d1=$1; shift
+	local opsts_d2=$1; shift
+	local opsts_d12=$1; shift
+	local opsts_d123=$1; shift
+
+	RET=0
+
+	defer_scope_push
+		down_netdevs d1
+		$inject
+		check_operstates $opsts_d1
+	defer_scope_pop
+
+	defer_scope_push
+		down_netdevs d2
+		$inject
+		check_operstates $opsts_d2
+	defer_scope_pop
+
+	defer_scope_push
+		down_netdevs d1 d2
+		$inject
+		check_operstates $opsts_d12
+	defer_scope_pop
+
+	defer_scope_push
+		down_netdevs d1 d2 d3
+		$inject
+		check_operstates $opsts_d123
+	defer_scope_pop
+
+	log_test "Test bridge_binding $what"
+}
+
+do_test_binding_on()
+{
+	local inject=$1; shift
+	local what=$1; shift
+
+	do_test_binding "$inject" "$what"	\
+			"0 1 1 1"		\
+			"1 0 1 1"		\
+			"0 0 0 1"		\
+			"0 0 0 0"
+}
+
+do_test_binding_off()
+{
+	local inject=$1; shift
+	local what=$1; shift
+
+	do_test_binding "$inject" "$what"	\
+			"1 1 1 1"		\
+			"1 1 1 1"		\
+			"1 1 1 1"		\
+			"0 0 0 0"
+}
+
+test_binding_on()
+{
+	add_vlans bridge_binding on
+	set_vlans up
+	do_test_binding_on : "on"
+}
+
+test_binding_off()
+{
+	add_vlans bridge_binding off
+	set_vlans up
+	do_test_binding_off : "off"
+}
+
+test_binding_toggle_on()
+{
+	add_vlans bridge_binding off
+	set_vlans up
+	set_vlans type vlan bridge_binding on
+	do_test_binding_on : "off->on"
+}
+
+test_binding_toggle_off()
+{
+	add_vlans bridge_binding on
+	set_vlans up
+	set_vlans type vlan bridge_binding off
+	do_test_binding_off : "on->off"
+}
+
+dfr_set_binding_on()
+{
+	set_vlans type vlan bridge_binding on
+	defer set_vlans type vlan bridge_binding off
+}
+
+dfr_set_binding_off()
+{
+	set_vlans type vlan bridge_binding off
+	defer set_vlans type vlan bridge_binding on
+}
+
+test_binding_toggle_on_when_lower_down()
+{
+	add_vlans bridge_binding off
+	set_vlans up
+	do_test_binding_on dfr_set_binding_on "off->on when lower down"
+}
+
+test_binding_toggle_off_when_lower_down()
+{
+	add_vlans bridge_binding on
+	set_vlans up
+	do_test_binding_off dfr_set_binding_off "on->off when lower down"
+}
+
+test_binding_toggle_on_when_upper_down()
+{
+	add_vlans bridge_binding off
+	set_vlans type vlan bridge_binding on
+	set_vlans up
+	do_test_binding_on : "off->on when upper down"
+}
+
+test_binding_toggle_off_when_upper_down()
+{
+	add_vlans bridge_binding on
+	set_vlans type vlan bridge_binding off
+	set_vlans up
+	do_test_binding_off : "on->off when upper down"
+}
+
+trap defer_scopes_cleanup EXIT
+setup_prepare
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk
index d43afe243779..12e7cae251be 100644
--- a/tools/testing/selftests/net/ynl.mk
+++ b/tools/testing/selftests/net/ynl.mk
@@ -31,7 +31,8 @@ $(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig
 	$(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a
 
 EXTRA_CLEAN += \
-	$(top_srcdir)/tools/net/ynl/lib/__pycache__ \
+	$(top_srcdir)/tools/net/ynl/pyynl/__pycache__ \
+	$(top_srcdir)/tools/net/ynl/pyynl/lib/__pycache__ \
 	$(top_srcdir)/tools/net/ynl/lib/*.[ado] \
 	$(OUTPUT)/.libynl-*.sig \
 	$(OUTPUT)/libynl.a
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index e92e0b885861..58bcbbd029bc 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -43,14 +43,19 @@ cc-option = $(call __cc-option, $(CC),$(CLANG_CROSS_FLAGS),$(1),$(2))
 # configure default variants for target kernel supported architectures
 XARCH_powerpc    = ppc
 XARCH_mips       = mips32le
+XARCH_riscv      = riscv64
 XARCH            = $(or $(XARCH_$(ARCH)),$(ARCH))
 
 # map from user input variants to their kernel supported architectures
+ARCH_armthumb    = arm
 ARCH_ppc         = powerpc
 ARCH_ppc64       = powerpc
 ARCH_ppc64le     = powerpc
 ARCH_mips32le    = mips
 ARCH_mips32be    = mips
+ARCH_riscv32     = riscv
+ARCH_riscv64     = riscv
+ARCH_s390x       = s390
 ARCH            := $(or $(ARCH_$(XARCH)),$(XARCH))
 
 # kernel image names by architecture
@@ -59,12 +64,16 @@ IMAGE_x86_64     = arch/x86/boot/bzImage
 IMAGE_x86        = arch/x86/boot/bzImage
 IMAGE_arm64      = arch/arm64/boot/Image
 IMAGE_arm        = arch/arm/boot/zImage
+IMAGE_armthumb   = arch/arm/boot/zImage
 IMAGE_mips32le   = vmlinuz
 IMAGE_mips32be   = vmlinuz
 IMAGE_ppc        = vmlinux
 IMAGE_ppc64      = vmlinux
 IMAGE_ppc64le    = arch/powerpc/boot/zImage
 IMAGE_riscv      = arch/riscv/boot/Image
+IMAGE_riscv32    = arch/riscv/boot/Image
+IMAGE_riscv64    = arch/riscv/boot/Image
+IMAGE_s390x      = arch/s390/boot/bzImage
 IMAGE_s390       = arch/s390/boot/bzImage
 IMAGE_loongarch  = arch/loongarch/boot/vmlinuz.efi
 IMAGE            = $(objtree)/$(IMAGE_$(XARCH))
@@ -76,17 +85,20 @@ DEFCONFIG_x86_64     = defconfig
 DEFCONFIG_x86        = defconfig
 DEFCONFIG_arm64      = defconfig
 DEFCONFIG_arm        = multi_v7_defconfig
+DEFCONFIG_armthumb   = multi_v7_defconfig
 DEFCONFIG_mips32le   = malta_defconfig
-DEFCONFIG_mips32be   = malta_defconfig
+DEFCONFIG_mips32be   = malta_defconfig generic/eb.config
 DEFCONFIG_ppc        = pmac32_defconfig
 DEFCONFIG_ppc64      = powernv_be_defconfig
 DEFCONFIG_ppc64le    = powernv_defconfig
 DEFCONFIG_riscv      = defconfig
-DEFCONFIG_s390       = defconfig
+DEFCONFIG_riscv32    = rv32_defconfig
+DEFCONFIG_riscv64    = defconfig
+DEFCONFIG_s390x      = defconfig
+DEFCONFIG_s390       = defconfig compat.config
 DEFCONFIG_loongarch  = defconfig
 DEFCONFIG            = $(DEFCONFIG_$(XARCH))
 
-EXTRACONFIG_mips32be = -d CONFIG_CPU_LITTLE_ENDIAN -e CONFIG_CPU_BIG_ENDIAN
 EXTRACONFIG           = $(EXTRACONFIG_$(XARCH))
 
 # optional tests to run (default = all)
@@ -98,12 +110,16 @@ QEMU_ARCH_x86_64     = x86_64
 QEMU_ARCH_x86        = x86_64
 QEMU_ARCH_arm64      = aarch64
 QEMU_ARCH_arm        = arm
+QEMU_ARCH_armthumb   = arm
 QEMU_ARCH_mips32le   = mipsel  # works with malta_defconfig
 QEMU_ARCH_mips32be  = mips
 QEMU_ARCH_ppc        = ppc
 QEMU_ARCH_ppc64      = ppc64
 QEMU_ARCH_ppc64le    = ppc64
 QEMU_ARCH_riscv      = riscv64
+QEMU_ARCH_riscv32    = riscv32
+QEMU_ARCH_riscv64    = riscv64
+QEMU_ARCH_s390x      = s390x
 QEMU_ARCH_s390       = s390x
 QEMU_ARCH_loongarch  = loongarch64
 QEMU_ARCH            = $(QEMU_ARCH_$(XARCH))
@@ -124,12 +140,16 @@ QEMU_ARGS_x86_64     = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(
 QEMU_ARGS_x86        = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_arm64      = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_arm        = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_armthumb   = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_mips32le   = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_mips32be   = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_ppc        = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_ppc64      = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_ppc64le    = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_riscv      = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_riscv32    = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_riscv64    = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_s390x      = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_s390       = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_loongarch  = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS            = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA)
@@ -145,15 +165,18 @@ Q=@
 endif
 
 CFLAGS_i386 = $(call cc-option,-m32)
+CFLAGS_arm = -marm
+CFLAGS_armthumb = -mthumb -march=armv6t2
 CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
 CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
 CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
-CFLAGS_s390 = -m64
+CFLAGS_s390x = -m64
+CFLAGS_s390 = -m31
 CFLAGS_mips32le = -EL -mabi=32 -fPIC
 CFLAGS_mips32be = -EB -mabi=32
 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))
 CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
-		$(call cc-option,-fno-stack-protector) \
+		$(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \
 		$(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_EXTRA)
 LDFLAGS :=
 
@@ -209,7 +232,7 @@ all: run
 
 sysroot: sysroot/$(ARCH)/include
 
-sysroot/$(ARCH)/include:
+sysroot/$(ARCH)/include: | defconfig
 	$(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot
 	$(QUIET_MKDIR)mkdir -p sysroot
 	$(Q)$(MAKE) -C $(srctree) outputmakefile
@@ -253,16 +276,16 @@ initramfs: nolibc-test
 	$(Q)cp nolibc-test initramfs/init
 
 defconfig:
-	$(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) mrproper $(DEFCONFIG) prepare
+	$(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG)
 	$(Q)if [ -n "$(EXTRACONFIG)" ]; then \
 		$(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \
 		$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \
 	fi
 
-kernel:
+kernel: | defconfig
 	$(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null
 
-kernel-standalone: initramfs
+kernel-standalone: initramfs | defconfig
 	$(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null
 
 # run the tests after building the kernel
diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.c b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
index 5ff4c8a1db2a..a7ca8325863f 100644
--- a/tools/testing/selftests/nolibc/nolibc-test-linkage.c
+++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
@@ -11,16 +11,16 @@ void *linkage_test_errno_addr(void)
 	return &errno;
 }
 
-int linkage_test_constructor_test_value;
+int linkage_test_constructor_test_value = 0;
 
 __attribute__((constructor))
 static void constructor1(void)
 {
-	linkage_test_constructor_test_value = 2;
+	linkage_test_constructor_test_value |= 1 << 0;
 }
 
 __attribute__((constructor))
 static void constructor2(void)
 {
-	linkage_test_constructor_test_value *= 3;
+	linkage_test_constructor_test_value |= 1 << 1;
 }
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 6fba7025c5e3..5884a891c491 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -43,6 +43,8 @@
 #endif
 #endif
 
+#pragma GCC diagnostic ignored "-Wmissing-prototypes"
+
 #include "nolibc-test-linkage.h"
 
 /* for the type of int_fast16_t and int_fast32_t, musl differs from glibc and nolibc */
@@ -302,7 +304,10 @@ int expect_syszr(int expr, int llen)
 {
 	int ret = 0;
 
-	if (expr) {
+	if (errno == ENOSYS) {
+		llen += printf(" = ENOSYS");
+		result(llen, SKIPPED);
+	} else if (expr) {
 		ret = 1;
 		llen += printf(" = %d %s ", expr, errorname(errno));
 		result(llen, FAIL);
@@ -342,7 +347,10 @@ int expect_sysne(int expr, int llen, int val)
 {
 	int ret = 0;
 
-	if (expr == val) {
+	if (errno == ENOSYS) {
+		llen += printf(" = ENOSYS");
+		result(llen, SKIPPED);
+	} else if (expr == val) {
 		ret = 1;
 		llen += printf(" = %d %s ", expr, errorname(errno));
 		result(llen, FAIL);
@@ -367,7 +375,9 @@ int expect_syserr2(int expr, int expret, int experr1, int experr2, int llen)
 	int _errno = errno;
 
 	llen += printf(" = %d %s ", expr, errorname(_errno));
-	if (expr != expret || (_errno != experr1 && _errno != experr2)) {
+	if (errno == ENOSYS) {
+		result(llen, SKIPPED);
+	} else if (expr != expret || (_errno != experr1 && _errno != experr2)) {
 		ret = 1;
 		if (experr2 == 0)
 			llen += printf(" != (%d %s) ", expret, errorname(experr1));
@@ -682,14 +692,14 @@ int expect_strtox(int llen, void *func, const char *input, int base, intmax_t ex
 __attribute__((constructor))
 static void constructor1(void)
 {
-	constructor_test_value = 1;
+	constructor_test_value |= 1 << 0;
 }
 
 __attribute__((constructor))
 static void constructor2(int argc, char **argv, char **envp)
 {
 	if (argc && argv && envp)
-		constructor_test_value *= 2;
+		constructor_test_value |= 1 << 1;
 }
 
 int run_startup(int min, int max)
@@ -728,9 +738,9 @@ int run_startup(int min, int max)
 		CASE_TEST(environ_HOME);     EXPECT_PTRNZ(1, getenv("HOME")); break;
 		CASE_TEST(auxv_addr);        EXPECT_PTRGT(test_auxv != (void *)-1, test_auxv, brk); break;
 		CASE_TEST(auxv_AT_UID);      EXPECT_EQ(1, getauxval(AT_UID), getuid()); break;
-		CASE_TEST(constructor);      EXPECT_EQ(1, constructor_test_value, 2); break;
+		CASE_TEST(constructor);      EXPECT_EQ(is_nolibc, constructor_test_value, 0x3); break;
 		CASE_TEST(linkage_errno);    EXPECT_PTREQ(1, linkage_test_errno_addr(), &errno); break;
-		CASE_TEST(linkage_constr);   EXPECT_EQ(1, linkage_test_constructor_test_value, 6); break;
+		CASE_TEST(linkage_constr);   EXPECT_EQ(1, linkage_test_constructor_test_value, 0x3); break;
 		case __LINE__:
 			return ret; /* must be last */
 		/* note: do not set any defaults so as to permit holes above */
@@ -759,6 +769,44 @@ int test_getdents64(const char *dir)
 	return ret;
 }
 
+static int test_dirent(void)
+{
+	int comm = 0, cmdline = 0;
+	struct dirent dirent, *result;
+	DIR *dir;
+	int ret;
+
+	dir = opendir("/proc/self");
+	if (!dir)
+		return 1;
+
+	while (1) {
+		errno = 0;
+		ret = readdir_r(dir, &dirent, &result);
+		if (ret != 0)
+			return 1;
+		if (!result)
+			break;
+
+		if (strcmp(dirent.d_name, "comm") == 0)
+			comm++;
+		else if (strcmp(dirent.d_name, "cmdline") == 0)
+			cmdline++;
+	}
+
+	if (errno)
+		return 1;
+
+	ret = closedir(dir);
+	if (ret)
+		return 1;
+
+	if (comm != 1 || cmdline != 1)
+		return 1;
+
+	return 0;
+}
+
 int test_getpagesize(void)
 {
 	int x = getpagesize();
@@ -980,6 +1028,22 @@ int test_rlimit(void)
 	return 0;
 }
 
+int test_openat(void)
+{
+	int dev, null;
+
+	dev = openat(AT_FDCWD, "/dev", O_DIRECTORY);
+	if (dev < 0)
+		return -1;
+
+	null = openat(dev, "null", O_RDONLY);
+	close(dev);
+	if (null < 0)
+		return -1;
+
+	close(null);
+	return 0;
+}
 
 /* Run syscall tests between IDs <min> and <max>.
  * Return 0 on success, non-zero on failure.
@@ -1051,6 +1115,7 @@ int run_syscall(int min, int max)
 		CASE_TEST(fork);              EXPECT_SYSZR(1, test_fork()); break;
 		CASE_TEST(getdents64_root);   EXPECT_SYSNE(1, test_getdents64("/"), -1); break;
 		CASE_TEST(getdents64_null);   EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break;
+		CASE_TEST(directories);       EXPECT_SYSZR(proc, test_dirent()); break;
 		CASE_TEST(gettimeofday_tv);   EXPECT_SYSZR(1, gettimeofday(&tv, NULL)); break;
 		CASE_TEST(gettimeofday_tv_tz);EXPECT_SYSZR(1, gettimeofday(&tv, &tz)); break;
 		CASE_TEST(getpagesize);       EXPECT_SYSZR(1, test_getpagesize()); break;
@@ -1065,8 +1130,9 @@ int run_syscall(int min, int max)
 		CASE_TEST(mmap_bad);          EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break;
 		CASE_TEST(munmap_bad);        EXPECT_SYSER(1, munmap(NULL, 0), -1, EINVAL); break;
 		CASE_TEST(mmap_munmap_good);  EXPECT_SYSZR(1, test_mmap_munmap()); break;
-		CASE_TEST(open_tty);          EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break;
-		CASE_TEST(open_blah);         EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break;
+		CASE_TEST(open_tty);          EXPECT_SYSNE(1, tmp = open("/dev/null", O_RDONLY), -1); if (tmp != -1) close(tmp); break;
+		CASE_TEST(open_blah);         EXPECT_SYSER(1, tmp = open("/proc/self/blah", O_RDONLY), -1, ENOENT); if (tmp != -1) close(tmp); break;
+		CASE_TEST(openat_dir);        EXPECT_SYSZR(1, test_openat()); break;
 		CASE_TEST(pipe);              EXPECT_SYSZR(1, test_pipe()); break;
 		CASE_TEST(poll_null);         EXPECT_SYSZR(1, poll(NULL, 0, 0)); break;
 		CASE_TEST(poll_stdout);       EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break;
@@ -1229,19 +1295,20 @@ int run_stdlib(int min, int max)
 
 static int expect_vfprintf(int llen, int c, const char *expected, const char *fmt, ...)
 {
-	int ret, fd;
+	int ret, pipefd[2];
 	ssize_t w, r;
 	char buf[100];
 	FILE *memfile;
 	va_list args;
 
-	fd = open("/tmp", O_TMPFILE | O_EXCL | O_RDWR, 0600);
-	if (fd == -1) {
-		result(llen, SKIPPED);
-		return 0;
+	ret = pipe(pipefd);
+	if (ret == -1) {
+		llen += printf(" pipe() != %s", strerror(errno));
+		result(llen, FAIL);
+		return 1;
 	}
 
-	memfile = fdopen(fd, "w+");
+	memfile = fdopen(pipefd[1], "w");
 	if (!memfile) {
 		result(llen, FAIL);
 		return 1;
@@ -1257,13 +1324,10 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm
 		return 1;
 	}
 
-	fflush(memfile);
-	lseek(fd, 0, SEEK_SET);
-
-	r = read(fd, buf, sizeof(buf) - 1);
-
 	fclose(memfile);
 
+	r = read(pipefd[0], buf, sizeof(buf) - 1);
+
 	if (r != w) {
 		llen += printf(" written(%d) != read(%d)", (int)w, (int)r);
 		result(llen, FAIL);
@@ -1278,6 +1342,73 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm
 	return ret;
 }
 
+static int test_scanf(void)
+{
+	unsigned long long ull;
+	unsigned long ul;
+	unsigned int u;
+	long long ll;
+	long l;
+	void *p;
+	int i;
+
+	/* return __LINE__ to point to the specific failure */
+
+	/* test EOF */
+	if (sscanf("", "foo") != EOF)
+		return __LINE__;
+
+	/* test simple literal without placeholder */
+	if (sscanf("foo", "foo") != 0)
+		return __LINE__;
+
+	/* test single placeholder */
+	if (sscanf("123", "%d", &i) != 1)
+		return __LINE__;
+
+	if (i != 123)
+		return __LINE__;
+
+	/* test multiple place holders and separators */
+	if (sscanf("a123b456c0x90", "a%db%uc%p", &i, &u, &p) != 3)
+		return __LINE__;
+
+	if (i != 123)
+		return __LINE__;
+
+	if (u != 456)
+		return __LINE__;
+
+	if (p != (void *)0x90)
+		return __LINE__;
+
+	/* test space handling */
+	if (sscanf("a    b1", "a b%d", &i) != 1)
+		return __LINE__;
+
+	if (i != 1)
+		return __LINE__;
+
+	/* test literal percent */
+	if (sscanf("a%1", "a%%%d", &i) != 1)
+		return __LINE__;
+
+	if (i != 1)
+		return __LINE__;
+
+	/* test stdint.h types */
+	if (sscanf("1|2|3|4|5|6",
+		   "%d|%ld|%lld|%u|%lu|%llu",
+		   &i, &l, &ll, &u, &ul, &ull) != 6)
+		return __LINE__;
+
+	if (i != 1 || l != 2 || ll != 3 ||
+	    u != 4 || ul != 5 || ull != 6)
+		return __LINE__;
+
+	return 0;
+}
+
 static int run_vfprintf(int min, int max)
 {
 	int test;
@@ -1299,6 +1430,7 @@ static int run_vfprintf(int min, int max)
 		CASE_TEST(char);         EXPECT_VFPRINTF(1, "c", "%c", 'c'); break;
 		CASE_TEST(hex);          EXPECT_VFPRINTF(1, "f", "%x", 0xf); break;
 		CASE_TEST(pointer);      EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break;
+		CASE_TEST(scanf);        EXPECT_ZR(1, test_scanf()); break;
 		case __LINE__:
 			return ret; /* must be last */
 		/* note: do not set any defaults so as to permit holes above */
@@ -1323,7 +1455,8 @@ static int run_protection(int min __attribute__((unused)),
 			  int max __attribute__((unused)))
 {
 	pid_t pid;
-	int llen = 0, status;
+	int llen = 0, ret;
+	siginfo_t siginfo = {};
 	struct rlimit rlimit = { 0, 0 };
 
 	llen += printf("0 -fstackprotector ");
@@ -1361,10 +1494,11 @@ static int run_protection(int min __attribute__((unused)),
 		return 1;
 
 	default:
-		pid = waitpid(pid, &status, 0);
+		ret = waitid(P_PID, pid, &siginfo, WEXITED);
 
-		if (pid == -1 || !WIFSIGNALED(status) || WTERMSIG(status) != SIGABRT) {
-			llen += printf("waitpid()");
+		if (ret != 0 || siginfo.si_signo != SIGCHLD ||
+		    siginfo.si_code != CLD_KILLED || siginfo.si_status != SIGABRT) {
+			llen += printf("waitid()");
 			result(llen, FAIL);
 			return 1;
 		}
diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh
index e7ecda4ae796..0299a0912d40 100755
--- a/tools/testing/selftests/nolibc/run-tests.sh
+++ b/tools/testing/selftests/nolibc/run-tests.sh
@@ -17,7 +17,16 @@ perform_download=0
 test_mode=system
 werror=1
 llvm=
-archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv s390 loongarch"
+all_archs=(
+	i386 x86_64
+	arm64 arm armthumb
+	mips32le mips32be
+	ppc ppc64 ppc64le
+	riscv32 riscv64
+	s390x s390
+	loongarch
+)
+archs="${all_archs[@]}"
 
 TEMP=$(getopt -o 'j:d:c:b:a:m:pelh' -n "$0" -- "$@")
 
@@ -94,19 +103,21 @@ fi
 crosstool_arch() {
 	case "$1" in
 	arm64) echo aarch64;;
+	armthumb) echo arm;;
 	ppc) echo powerpc;;
 	ppc64) echo powerpc64;;
 	ppc64le) echo powerpc64;;
 	riscv) echo riscv64;;
 	loongarch) echo loongarch64;;
 	mips*) echo mips;;
+	s390*) echo s390;;
 	*) echo "$1";;
 	esac
 }
 
 crosstool_abi() {
 	case "$1" in
-	arm) echo linux-gnueabi;;
+	arm | armthumb) echo linux-gnueabi;;
 	*) echo linux;;
 	esac
 }
@@ -143,6 +154,13 @@ test_arch() {
 	arch=$1
 	ct_arch=$(crosstool_arch "$arch")
 	ct_abi=$(crosstool_abi "$1")
+
+	if [ ! -d "${download_location}gcc-${crosstool_version}-nolibc/${ct_arch}-${ct_abi}/bin/." ]; then
+		echo "No toolchain found in ${download_location}gcc-${crosstool_version}-nolibc/${ct_arch}-${ct_abi}."
+		echo "Did you install the toolchains or set the correct arch ? Rerun with -h for help."
+		return 1
+	fi
+
 	cross_compile=$(realpath "${download_location}gcc-${crosstool_version}-nolibc/${ct_arch}-${ct_abi}/bin/${ct_arch}-${ct_abi}-")
 	build_dir="${build_location}/${arch}"
 	if [ "$werror" -ne 0 ]; then
@@ -150,10 +168,6 @@ test_arch() {
 	fi
 	MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}")
 
-	mkdir -p "$build_dir"
-	if [ "$test_mode" = "system" ] && [ ! -f "${build_dir}/.config" ]; then
-		swallow_output "${MAKE[@]}" defconfig
-	fi
 	case "$test_mode" in
 		'system')
 			test_target=run
@@ -166,6 +180,13 @@ test_arch() {
 			exit 1
 	esac
 	printf '%-15s' "$arch:"
+	if [ "$arch" = "s390" ] && ([ "$llvm" = "1" ] || [ "$test_mode" = "user" ]); then
+		echo "Unsupported configuration"
+		return
+	fi
+
+	mkdir -p "$build_dir"
+	swallow_output "${MAKE[@]}" defconfig
 	swallow_output "${MAKE[@]}" CFLAGS_EXTRA="$CFLAGS_EXTRA" "$test_target" V=1
 	cp run.out run.out."${arch}"
 	"${MAKE[@]}" report | grep passed
diff --git a/tools/testing/selftests/pci_endpoint/.gitignore b/tools/testing/selftests/pci_endpoint/.gitignore
new file mode 100644
index 000000000000..6a4837a3e034
--- /dev/null
+++ b/tools/testing/selftests/pci_endpoint/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+pci_endpoint_test
diff --git a/tools/testing/selftests/pci_endpoint/Makefile b/tools/testing/selftests/pci_endpoint/Makefile
new file mode 100644
index 000000000000..bf21ebf20b4a
--- /dev/null
+++ b/tools/testing/selftests/pci_endpoint/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O2 -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
+LDFLAGS += -lrt -lpthread -lm
+
+TEST_GEN_PROGS = pci_endpoint_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/pci_endpoint/config b/tools/testing/selftests/pci_endpoint/config
new file mode 100644
index 000000000000..7cdcf117db8d
--- /dev/null
+++ b/tools/testing/selftests/pci_endpoint/config
@@ -0,0 +1,4 @@
+CONFIG_PCI_ENDPOINT=y
+CONFIG_PCI_ENDPOINT_CONFIGFS=y
+CONFIG_PCI_EPF_TEST=m
+CONFIG_PCI_ENDPOINT_TEST=m
diff --git a/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c
new file mode 100644
index 000000000000..c267b822c108
--- /dev/null
+++ b/tools/testing/selftests/pci_endpoint/pci_endpoint_test.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Kselftest for PCI Endpoint Subsystem
+ *
+ * Copyright (c) 2022 Samsung Electronics Co., Ltd.
+ *             https://www.samsung.com
+ * Author: Aman Gupta <aman1.gupta@samsung.com>
+ *
+ * Copyright (c) 2024, Linaro Ltd.
+ * Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include "../../../../include/uapi/linux/pcitest.h"
+
+#include "../kselftest_harness.h"
+
+#define pci_ep_ioctl(cmd, arg)			\
+({						\
+	ret = ioctl(self->fd, cmd, arg);	\
+	ret = ret < 0 ? -errno : 0;		\
+})
+
+static const char *test_device = "/dev/pci-endpoint-test.0";
+static const unsigned long test_size[5] = { 1, 1024, 1025, 1024000, 1024001 };
+
+FIXTURE(pci_ep_bar)
+{
+	int fd;
+};
+
+FIXTURE_SETUP(pci_ep_bar)
+{
+	self->fd = open(test_device, O_RDWR);
+
+	ASSERT_NE(-1, self->fd) TH_LOG("Can't open PCI Endpoint Test device");
+}
+
+FIXTURE_TEARDOWN(pci_ep_bar)
+{
+	close(self->fd);
+}
+
+FIXTURE_VARIANT(pci_ep_bar)
+{
+	int barno;
+};
+
+FIXTURE_VARIANT_ADD(pci_ep_bar, BAR0) { .barno = 0 };
+FIXTURE_VARIANT_ADD(pci_ep_bar, BAR1) { .barno = 1 };
+FIXTURE_VARIANT_ADD(pci_ep_bar, BAR2) { .barno = 2 };
+FIXTURE_VARIANT_ADD(pci_ep_bar, BAR3) { .barno = 3 };
+FIXTURE_VARIANT_ADD(pci_ep_bar, BAR4) { .barno = 4 };
+FIXTURE_VARIANT_ADD(pci_ep_bar, BAR5) { .barno = 5 };
+
+TEST_F(pci_ep_bar, BAR_TEST)
+{
+	int ret;
+
+	pci_ep_ioctl(PCITEST_BAR, variant->barno);
+	EXPECT_FALSE(ret) TH_LOG("Test failed for BAR%d", variant->barno);
+}
+
+FIXTURE(pci_ep_basic)
+{
+	int fd;
+};
+
+FIXTURE_SETUP(pci_ep_basic)
+{
+	self->fd = open(test_device, O_RDWR);
+
+	ASSERT_NE(-1, self->fd) TH_LOG("Can't open PCI Endpoint Test device");
+}
+
+FIXTURE_TEARDOWN(pci_ep_basic)
+{
+	close(self->fd);
+}
+
+TEST_F(pci_ep_basic, CONSECUTIVE_BAR_TEST)
+{
+	int ret;
+
+	pci_ep_ioctl(PCITEST_BARS, 0);
+	EXPECT_FALSE(ret) TH_LOG("Consecutive BAR test failed");
+}
+
+TEST_F(pci_ep_basic, LEGACY_IRQ_TEST)
+{
+	int ret;
+
+	pci_ep_ioctl(PCITEST_SET_IRQTYPE, 0);
+	ASSERT_EQ(0, ret) TH_LOG("Can't set Legacy IRQ type");
+
+	pci_ep_ioctl(PCITEST_LEGACY_IRQ, 0);
+	EXPECT_FALSE(ret) TH_LOG("Test failed for Legacy IRQ");
+}
+
+TEST_F(pci_ep_basic, MSI_TEST)
+{
+	int ret, i;
+
+	pci_ep_ioctl(PCITEST_SET_IRQTYPE, 1);
+	ASSERT_EQ(0, ret) TH_LOG("Can't set MSI IRQ type");
+
+	for (i = 1; i <= 32; i++) {
+		pci_ep_ioctl(PCITEST_MSI, i);
+		EXPECT_FALSE(ret) TH_LOG("Test failed for MSI%d", i);
+	}
+}
+
+TEST_F(pci_ep_basic, MSIX_TEST)
+{
+	int ret, i;
+
+	pci_ep_ioctl(PCITEST_SET_IRQTYPE, 2);
+	ASSERT_EQ(0, ret) TH_LOG("Can't set MSI-X IRQ type");
+
+	for (i = 1; i <= 2048; i++) {
+		pci_ep_ioctl(PCITEST_MSIX, i);
+		EXPECT_FALSE(ret) TH_LOG("Test failed for MSI-X%d", i);
+	}
+}
+
+FIXTURE(pci_ep_data_transfer)
+{
+	int fd;
+};
+
+FIXTURE_SETUP(pci_ep_data_transfer)
+{
+	self->fd = open(test_device, O_RDWR);
+
+	ASSERT_NE(-1, self->fd) TH_LOG("Can't open PCI Endpoint Test device");
+}
+
+FIXTURE_TEARDOWN(pci_ep_data_transfer)
+{
+	close(self->fd);
+}
+
+FIXTURE_VARIANT(pci_ep_data_transfer)
+{
+	bool use_dma;
+};
+
+FIXTURE_VARIANT_ADD(pci_ep_data_transfer, memcpy)
+{
+	.use_dma = false,
+};
+
+FIXTURE_VARIANT_ADD(pci_ep_data_transfer, dma)
+{
+	.use_dma = true,
+};
+
+TEST_F(pci_ep_data_transfer, READ_TEST)
+{
+	struct pci_endpoint_test_xfer_param param = {};
+	int ret, i;
+
+	if (variant->use_dma)
+		param.flags = PCITEST_FLAGS_USE_DMA;
+
+	pci_ep_ioctl(PCITEST_SET_IRQTYPE, 1);
+	ASSERT_EQ(0, ret) TH_LOG("Can't set MSI IRQ type");
+
+	for (i = 0; i < ARRAY_SIZE(test_size); i++) {
+		param.size = test_size[i];
+		pci_ep_ioctl(PCITEST_READ, &param);
+		EXPECT_FALSE(ret) TH_LOG("Test failed for size (%ld)",
+					 test_size[i]);
+	}
+}
+
+TEST_F(pci_ep_data_transfer, WRITE_TEST)
+{
+	struct pci_endpoint_test_xfer_param param = {};
+	int ret, i;
+
+	if (variant->use_dma)
+		param.flags = PCITEST_FLAGS_USE_DMA;
+
+	pci_ep_ioctl(PCITEST_SET_IRQTYPE, 1);
+	ASSERT_EQ(0, ret) TH_LOG("Can't set MSI IRQ type");
+
+	for (i = 0; i < ARRAY_SIZE(test_size); i++) {
+		param.size = test_size[i];
+		pci_ep_ioctl(PCITEST_WRITE, &param);
+		EXPECT_FALSE(ret) TH_LOG("Test failed for size (%ld)",
+					 test_size[i]);
+	}
+}
+
+TEST_F(pci_ep_data_transfer, COPY_TEST)
+{
+	struct pci_endpoint_test_xfer_param param = {};
+	int ret, i;
+
+	if (variant->use_dma)
+		param.flags = PCITEST_FLAGS_USE_DMA;
+
+	pci_ep_ioctl(PCITEST_SET_IRQTYPE, 1);
+	ASSERT_EQ(0, ret) TH_LOG("Can't set MSI IRQ type");
+
+	for (i = 0; i < ARRAY_SIZE(test_size); i++) {
+		param.size = test_size[i];
+		pci_ep_ioctl(PCITEST_COPY, &param);
+		EXPECT_FALSE(ret) TH_LOG("Test failed for size (%ld)",
+					 test_size[i]);
+	}
+}
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index bf92481f925c..0406a065deb4 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -8,3 +8,5 @@ pidfd_getfd_test
 pidfd_setns_test
 pidfd_file_handle_test
 pidfd_bind_mount
+pidfd_info_test
+pidfd_exec_helper
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index 301343a11b62..fcbefc0d77f6 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -3,7 +3,9 @@ CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall
 
 TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
 	pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \
-	pidfd_file_handle_test pidfd_bind_mount
+	pidfd_file_handle_test pidfd_bind_mount pidfd_info_test
+
+TEST_GEN_PROGS_EXTENDED := pidfd_exec_helper
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index 0b96ac4b8ce5..cec22aa11cdf 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -12,6 +12,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <syscall.h>
+#include <sys/ioctl.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 
@@ -50,6 +51,107 @@
 #define PIDFD_NONBLOCK O_NONBLOCK
 #endif
 
+#ifndef PIDFD_SELF_THREAD
+#define PIDFD_SELF_THREAD		-10000 /* Current thread. */
+#endif
+
+#ifndef PIDFD_SELF_THREAD_GROUP
+#define PIDFD_SELF_THREAD_GROUP		-20000 /* Current thread group leader. */
+#endif
+
+#ifndef PIDFD_SELF
+#define PIDFD_SELF		PIDFD_SELF_THREAD
+#endif
+
+#ifndef PIDFD_SELF_PROCESS
+#define PIDFD_SELF_PROCESS	PIDFD_SELF_THREAD_GROUP
+#endif
+
+#ifndef PIDFS_IOCTL_MAGIC
+#define PIDFS_IOCTL_MAGIC 0xFF
+#endif
+
+#ifndef PIDFD_GET_CGROUP_NAMESPACE
+#define PIDFD_GET_CGROUP_NAMESPACE            _IO(PIDFS_IOCTL_MAGIC, 1)
+#endif
+
+#ifndef PIDFD_GET_IPC_NAMESPACE
+#define PIDFD_GET_IPC_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 2)
+#endif
+
+#ifndef PIDFD_GET_MNT_NAMESPACE
+#define PIDFD_GET_MNT_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 3)
+#endif
+
+#ifndef PIDFD_GET_NET_NAMESPACE
+#define PIDFD_GET_NET_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 4)
+#endif
+
+#ifndef PIDFD_GET_PID_NAMESPACE
+#define PIDFD_GET_PID_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 5)
+#endif
+
+#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE
+#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE  _IO(PIDFS_IOCTL_MAGIC, 6)
+#endif
+
+#ifndef PIDFD_GET_TIME_NAMESPACE
+#define PIDFD_GET_TIME_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 7)
+#endif
+
+#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE
+#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
+#endif
+
+#ifndef PIDFD_GET_USER_NAMESPACE
+#define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
+#endif
+
+#ifndef PIDFD_GET_UTS_NAMESPACE
+#define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
+#endif
+
+#ifndef PIDFD_GET_INFO
+#define PIDFD_GET_INFO			      _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
+#endif
+
+#ifndef PIDFD_INFO_PID
+#define PIDFD_INFO_PID			(1UL << 0) /* Always returned, even if not requested */
+#endif
+
+#ifndef PIDFD_INFO_CREDS
+#define PIDFD_INFO_CREDS		(1UL << 1) /* Always returned, even if not requested */
+#endif
+
+#ifndef PIDFD_INFO_CGROUPID
+#define PIDFD_INFO_CGROUPID		(1UL << 2) /* Always returned if available, even if not requested */
+#endif
+
+#ifndef PIDFD_INFO_EXIT
+#define PIDFD_INFO_EXIT			(1UL << 3) /* Always returned if available, even if not requested */
+#endif
+
+#ifndef PIDFD_THREAD
+#define PIDFD_THREAD O_EXCL
+#endif
+
+struct pidfd_info {
+	__u64 mask;
+	__u64 cgroupid;
+	__u32 pid;
+	__u32 tgid;
+	__u32 ppid;
+	__u32 ruid;
+	__u32 rgid;
+	__u32 euid;
+	__u32 egid;
+	__u32 suid;
+	__u32 sgid;
+	__u32 fsuid;
+	__u32 fsgid;
+	__s32 exit_code;
+};
+
 /*
  * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
  * That means, when it wraps around any pid < 300 will be skipped.
@@ -152,4 +254,11 @@ static inline ssize_t write_nointr(int fd, const void *buf, size_t count)
 	return ret;
 }
 
+static inline int sys_execveat(int dirfd, const char *pathname,
+			       char *const argv[], char *const envp[],
+			       int flags)
+{
+        return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+
 #endif /* __PIDFD_H */
diff --git a/tools/testing/selftests/pidfd/pidfd_exec_helper.c b/tools/testing/selftests/pidfd/pidfd_exec_helper.c
new file mode 100644
index 000000000000..5516808c95f2
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_exec_helper.c
@@ -0,0 +1,12 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+int main(int argc, char *argv[])
+{
+	if (pause())
+		_exit(EXIT_FAILURE);
+
+	_exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
index f062a986e382..f718aac75068 100644
--- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
@@ -13,6 +13,7 @@
 #include <syscall.h>
 #include <sys/wait.h>
 #include <sys/mman.h>
+#include <sys/mount.h>
 
 #include "pidfd.h"
 #include "../kselftest.h"
diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c
new file mode 100644
index 000000000000..1758a1b0457b
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_info_test.c
@@ -0,0 +1,692 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <poll.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <linux/kcmp.h>
+#include <sys/stat.h>
+
+#include "pidfd.h"
+#include "../kselftest_harness.h"
+
+FIXTURE(pidfd_info)
+{
+	pid_t child_pid1;
+	int child_pidfd1;
+
+	pid_t child_pid2;
+	int child_pidfd2;
+
+	pid_t child_pid3;
+	int child_pidfd3;
+
+	pid_t child_pid4;
+	int child_pidfd4;
+};
+
+FIXTURE_SETUP(pidfd_info)
+{
+	int ret;
+	int ipc_sockets[2];
+	char c;
+
+	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	EXPECT_EQ(ret, 0);
+
+	self->child_pid1 = create_child(&self->child_pidfd1, 0);
+	EXPECT_GE(self->child_pid1, 0);
+
+	if (self->child_pid1 == 0) {
+		close(ipc_sockets[0]);
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+			_exit(EXIT_FAILURE);
+
+		close(ipc_sockets[1]);
+
+		pause();
+		_exit(EXIT_SUCCESS);
+	}
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	/* SIGKILL but don't reap. */
+	EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0), 0);
+
+	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	EXPECT_EQ(ret, 0);
+
+	self->child_pid2 = create_child(&self->child_pidfd2, 0);
+	EXPECT_GE(self->child_pid2, 0);
+
+	if (self->child_pid2 == 0) {
+		close(ipc_sockets[0]);
+
+		if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+			_exit(EXIT_FAILURE);
+
+		close(ipc_sockets[1]);
+
+		pause();
+		_exit(EXIT_SUCCESS);
+	}
+
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	/* SIGKILL and reap. */
+	EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0), 0);
+	EXPECT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0);
+
+	self->child_pid3 = create_child(&self->child_pidfd3, CLONE_NEWUSER | CLONE_NEWPID);
+	EXPECT_GE(self->child_pid3, 0);
+
+	if (self->child_pid3 == 0)
+		_exit(EXIT_SUCCESS);
+
+	self->child_pid4 = create_child(&self->child_pidfd4, CLONE_NEWUSER | CLONE_NEWPID);
+	EXPECT_GE(self->child_pid4, 0);
+
+	if (self->child_pid4 == 0)
+		_exit(EXIT_SUCCESS);
+
+	EXPECT_EQ(sys_waitid(P_PID, self->child_pid4, NULL, WEXITED), 0);
+}
+
+FIXTURE_TEARDOWN(pidfd_info)
+{
+	sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0);
+	if (self->child_pidfd1 >= 0)
+		EXPECT_EQ(0, close(self->child_pidfd1));
+
+	sys_waitid(P_PID, self->child_pid1, NULL, WEXITED);
+
+	sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0);
+	if (self->child_pidfd2 >= 0)
+		EXPECT_EQ(0, close(self->child_pidfd2));
+
+	sys_waitid(P_PID, self->child_pid2, NULL, WEXITED);
+	sys_waitid(P_PID, self->child_pid3, NULL, WEXITED);
+	sys_waitid(P_PID, self->child_pid4, NULL, WEXITED);
+}
+
+TEST_F(pidfd_info, sigkill_exit)
+{
+	struct pidfd_info info = {
+		.mask = PIDFD_INFO_CGROUPID,
+	};
+
+	/* Process has exited but not been reaped so this must work. */
+	ASSERT_EQ(ioctl(self->child_pidfd1, PIDFD_GET_INFO, &info), 0);
+
+	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+	ASSERT_EQ(ioctl(self->child_pidfd1, PIDFD_GET_INFO, &info), 0);
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+	/* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+}
+
+TEST_F(pidfd_info, sigkill_reaped)
+{
+	struct pidfd_info info = {
+		.mask = PIDFD_INFO_CGROUPID,
+	};
+
+	/* Process has already been reaped and PIDFD_INFO_EXIT hasn't been set. */
+	ASSERT_NE(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0);
+	ASSERT_EQ(errno, ESRCH);
+
+	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+	ASSERT_EQ(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0);
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+	ASSERT_TRUE(WIFSIGNALED(info.exit_code));
+	ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
+}
+
+TEST_F(pidfd_info, success_exit)
+{
+	struct pidfd_info info = {
+		.mask = PIDFD_INFO_CGROUPID,
+	};
+
+	/* Process has exited but not been reaped so this must work. */
+	ASSERT_EQ(ioctl(self->child_pidfd3, PIDFD_GET_INFO, &info), 0);
+
+	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+	ASSERT_EQ(ioctl(self->child_pidfd3, PIDFD_GET_INFO, &info), 0);
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+	/* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+}
+
+TEST_F(pidfd_info, success_reaped)
+{
+	struct pidfd_info info = {
+		.mask = PIDFD_INFO_CGROUPID,
+	};
+
+	/* Process has already been reaped and PIDFD_INFO_EXIT hasn't been set. */
+	ASSERT_NE(ioctl(self->child_pidfd4, PIDFD_GET_INFO, &info), 0);
+	ASSERT_EQ(errno, ESRCH);
+
+	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+	ASSERT_EQ(ioctl(self->child_pidfd4, PIDFD_GET_INFO, &info), 0);
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+	ASSERT_TRUE(WIFEXITED(info.exit_code));
+	ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+}
+
+TEST_F(pidfd_info, success_reaped_poll)
+{
+	struct pidfd_info info = {
+		.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
+	};
+	struct pollfd fds = {};
+	int nevents;
+
+	fds.events = POLLIN;
+	fds.fd = self->child_pidfd2;
+
+	nevents = poll(&fds, 1, -1);
+	ASSERT_EQ(nevents, 1);
+	ASSERT_TRUE(!!(fds.revents & POLLIN));
+	ASSERT_TRUE(!!(fds.revents & POLLHUP));
+
+	ASSERT_EQ(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0);
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+	ASSERT_TRUE(WIFSIGNALED(info.exit_code));
+	ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
+}
+
+static void *pidfd_info_pause_thread(void *arg)
+{
+	pid_t pid_thread = gettid();
+	int ipc_socket = *(int *)arg;
+
+	/* Inform the grand-parent what the tid of this thread is. */
+	if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+		return NULL;
+
+	close(ipc_socket);
+
+	/* Sleep untill we're killed. */
+	pause();
+	return NULL;
+}
+
+TEST_F(pidfd_info, thread_group)
+{
+	pid_t pid_leader, pid_poller, pid_thread;
+	pthread_t thread;
+	int nevents, pidfd_leader, pidfd_thread, pidfd_leader_thread, ret;
+	int ipc_sockets[2];
+	struct pollfd fds = {};
+	struct pidfd_info info = {
+		.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
+	}, info2;
+
+	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	EXPECT_EQ(ret, 0);
+
+	pid_leader = create_child(&pidfd_leader, 0);
+	EXPECT_GE(pid_leader, 0);
+
+	if (pid_leader == 0) {
+		close(ipc_sockets[0]);
+
+		/* The thread will outlive the thread-group leader. */
+		if (pthread_create(&thread, NULL, pidfd_info_pause_thread, &ipc_sockets[1]))
+			syscall(__NR_exit, EXIT_FAILURE);
+
+		/* Make the thread-group leader exit prematurely. */
+		syscall(__NR_exit, EXIT_SUCCESS);
+	}
+
+	/*
+	 * Opening a PIDFD_THREAD aka thread-specific pidfd based on a
+	 * thread-group leader must succeed.
+	 */
+	pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
+	ASSERT_GE(pidfd_leader_thread, 0);
+
+	pid_poller = fork();
+	ASSERT_GE(pid_poller, 0);
+	if (pid_poller == 0) {
+		/*
+		 * We can't poll and wait for the old thread-group
+		 * leader to exit using a thread-specific pidfd. The
+		 * thread-group leader exited prematurely and
+		 * notification is delayed until all subthreads have
+		 * exited.
+		 */
+		fds.events = POLLIN;
+		fds.fd = pidfd_leader_thread;
+		nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
+		if (nevents != 0)
+			_exit(EXIT_FAILURE);
+		if (fds.revents & POLLIN)
+			_exit(EXIT_FAILURE);
+		if (fds.revents & POLLHUP)
+			_exit(EXIT_FAILURE);
+		_exit(EXIT_SUCCESS);
+	}
+
+	/* Retrieve the tid of the thread. */
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	/* Opening a thread as a thread-group leader must fail. */
+	pidfd_thread = sys_pidfd_open(pid_thread, 0);
+	ASSERT_LT(pidfd_thread, 0);
+
+	/* Opening a thread as a PIDFD_THREAD must succeed. */
+	pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
+	ASSERT_GE(pidfd_thread, 0);
+
+	ASSERT_EQ(wait_for_pid(pid_poller), 0);
+
+	/*
+	 * Note that pidfd_leader is a thread-group pidfd, so polling on it
+	 * would only notify us once all thread in the thread-group have
+	 * exited. So we can't poll before we have taken down the whole
+	 * thread-group.
+	 */
+
+	/* Get PIDFD_GET_INFO using the thread-group leader pidfd. */
+	ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+	/* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+	ASSERT_EQ(info.pid, pid_leader);
+
+	/*
+	 * Now retrieve the same info using the thread specific pidfd
+	 * for the thread-group leader.
+	 */
+	info2.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+	ASSERT_EQ(ioctl(pidfd_leader_thread, PIDFD_GET_INFO, &info2), 0);
+	ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_CREDS));
+	/* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
+	ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_EXIT));
+	ASSERT_EQ(info2.pid, pid_leader);
+
+	/* Now try the thread-specific pidfd. */
+	ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+	/* The thread hasn't exited, so no PIDFD_INFO_EXIT information yet. */
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+	ASSERT_EQ(info.pid, pid_thread);
+
+	/*
+	 * Take down the whole thread-group. The thread-group leader
+	 * exited successfully but the thread will now be SIGKILLed.
+	 * This must be reflected in the recorded exit information.
+	 */
+	EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
+	EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
+
+	fds.events = POLLIN;
+	fds.fd = pidfd_leader;
+	nevents = poll(&fds, 1, -1);
+	ASSERT_EQ(nevents, 1);
+	ASSERT_TRUE(!!(fds.revents & POLLIN));
+	/* The thread-group leader has been reaped. */
+	ASSERT_TRUE(!!(fds.revents & POLLHUP));
+
+	/*
+	 * Retrieve exit information for the thread-group leader via the
+	 * thread-group leader pidfd.
+	 */
+	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+	ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+	/* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */
+	ASSERT_TRUE(WIFEXITED(info.exit_code));
+	ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+
+	/*
+	 * Retrieve exit information for the thread-group leader via the
+	 * thread-specific pidfd.
+	 */
+	info2.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+	ASSERT_EQ(ioctl(pidfd_leader_thread, PIDFD_GET_INFO, &info2), 0);
+	ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_CREDS));
+	ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_EXIT));
+
+	/* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */
+	ASSERT_TRUE(WIFEXITED(info2.exit_code));
+	ASSERT_EQ(WEXITSTATUS(info2.exit_code), 0);
+
+	/* Retrieve exit information for the thread. */
+	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+	ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+
+	/* The thread got SIGKILLed. */
+	ASSERT_TRUE(WIFSIGNALED(info.exit_code));
+	ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
+
+	EXPECT_EQ(close(pidfd_leader), 0);
+	EXPECT_EQ(close(pidfd_thread), 0);
+}
+
+static void *pidfd_info_thread_exec(void *arg)
+{
+	pid_t pid_thread = gettid();
+	int ipc_socket = *(int *)arg;
+
+	/* Inform the grand-parent what the tid of this thread is. */
+	if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+		return NULL;
+
+	if (read_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+		return NULL;
+
+	close(ipc_socket);
+
+	sys_execveat(AT_FDCWD, "pidfd_exec_helper", NULL, NULL, 0);
+	return NULL;
+}
+
+TEST_F(pidfd_info, thread_group_exec)
+{
+	pid_t pid_leader, pid_poller, pid_thread;
+	pthread_t thread;
+	int nevents, pidfd_leader, pidfd_leader_thread, pidfd_thread, ret;
+	int ipc_sockets[2];
+	struct pollfd fds = {};
+	struct pidfd_info info = {
+		.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
+	};
+
+	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	EXPECT_EQ(ret, 0);
+
+	pid_leader = create_child(&pidfd_leader, 0);
+	EXPECT_GE(pid_leader, 0);
+
+	if (pid_leader == 0) {
+		close(ipc_sockets[0]);
+
+		/* The thread will outlive the thread-group leader. */
+		if (pthread_create(&thread, NULL, pidfd_info_thread_exec, &ipc_sockets[1]))
+			syscall(__NR_exit, EXIT_FAILURE);
+
+		/* Make the thread-group leader exit prematurely. */
+		syscall(__NR_exit, EXIT_SUCCESS);
+	}
+
+	/* Open a thread-specific pidfd for the thread-group leader. */
+	pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
+	ASSERT_GE(pidfd_leader_thread, 0);
+
+	pid_poller = fork();
+	ASSERT_GE(pid_poller, 0);
+	if (pid_poller == 0) {
+		/*
+		 * We can't poll and wait for the old thread-group
+		 * leader to exit using a thread-specific pidfd. The
+		 * thread-group leader exited prematurely and
+		 * notification is delayed until all subthreads have
+		 * exited.
+		 *
+		 * When the thread has execed it will taken over the old
+		 * thread-group leaders struct pid. Calling poll after
+		 * the thread execed will thus block again because a new
+		 * thread-group has started.
+		 */
+		fds.events = POLLIN;
+		fds.fd = pidfd_leader_thread;
+		nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
+		if (nevents != 0)
+			_exit(EXIT_FAILURE);
+		if (fds.revents & POLLIN)
+			_exit(EXIT_FAILURE);
+		if (fds.revents & POLLHUP)
+			_exit(EXIT_FAILURE);
+		_exit(EXIT_SUCCESS);
+	}
+
+	/* Retrieve the tid of the thread. */
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+
+	/* Opening a thread as a PIDFD_THREAD must succeed. */
+	pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
+	ASSERT_GE(pidfd_thread, 0);
+
+	/* Now that we've opened a thread-specific pidfd the thread can exec. */
+	ASSERT_EQ(write_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+	ASSERT_EQ(wait_for_pid(pid_poller), 0);
+
+	/* Wait until the kernel has SIGKILLed the thread. */
+	fds.events = POLLHUP;
+	fds.fd = pidfd_thread;
+	nevents = poll(&fds, 1, -1);
+	ASSERT_EQ(nevents, 1);
+	/* The thread has been reaped. */
+	ASSERT_TRUE(!!(fds.revents & POLLHUP));
+
+	/* Retrieve thread-specific exit info from pidfd. */
+	ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+	/*
+	 * While the kernel will have SIGKILLed the whole thread-group
+	 * during exec it will cause the individual threads to exit
+	 * cleanly.
+	 */
+	ASSERT_TRUE(WIFEXITED(info.exit_code));
+	ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+
+	/*
+	 * The thread-group leader is still alive, the thread has taken
+	 * over its struct pid and thus its pid number.
+	 */
+	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+	ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+	ASSERT_EQ(info.pid, pid_leader);
+
+	/* Take down the thread-group leader. */
+	EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
+
+	/*
+	 * Afte the exec we're dealing with an empty thread-group so now
+	 * we must see an exit notification on the thread-specific pidfd
+	 * for the thread-group leader as there's no subthread that can
+	 * revive the struct pid.
+	 */
+	fds.events = POLLIN;
+	fds.fd = pidfd_leader_thread;
+	nevents = poll(&fds, 1, -1);
+	ASSERT_EQ(nevents, 1);
+	ASSERT_TRUE(!!(fds.revents & POLLIN));
+	ASSERT_FALSE(!!(fds.revents & POLLHUP));
+
+	EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
+
+	/* Retrieve exit information for the thread-group leader. */
+	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+	ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+
+	EXPECT_EQ(close(pidfd_leader), 0);
+	EXPECT_EQ(close(pidfd_thread), 0);
+}
+
+static void *pidfd_info_thread_exec_sane(void *arg)
+{
+	pid_t pid_thread = gettid();
+	int ipc_socket = *(int *)arg;
+
+	/* Inform the grand-parent what the tid of this thread is. */
+	if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+		return NULL;
+
+	if (read_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+		return NULL;
+
+	close(ipc_socket);
+
+	sys_execveat(AT_FDCWD, "pidfd_exec_helper", NULL, NULL, 0);
+	return NULL;
+}
+
+TEST_F(pidfd_info, thread_group_exec_thread)
+{
+	pid_t pid_leader, pid_poller, pid_thread;
+	pthread_t thread;
+	int nevents, pidfd_leader, pidfd_leader_thread, pidfd_thread, ret;
+	int ipc_sockets[2];
+	struct pollfd fds = {};
+	struct pidfd_info info = {
+		.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
+	};
+
+	ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+	EXPECT_EQ(ret, 0);
+
+	pid_leader = create_child(&pidfd_leader, 0);
+	EXPECT_GE(pid_leader, 0);
+
+	if (pid_leader == 0) {
+		close(ipc_sockets[0]);
+
+		/* The thread will outlive the thread-group leader. */
+		if (pthread_create(&thread, NULL, pidfd_info_thread_exec_sane, &ipc_sockets[1]))
+			syscall(__NR_exit, EXIT_FAILURE);
+
+		/*
+		 * Pause the thread-group leader. It will be killed once
+		 * the subthread execs.
+		 */
+		pause();
+		syscall(__NR_exit, EXIT_SUCCESS);
+	}
+
+	/* Retrieve the tid of the thread. */
+	EXPECT_EQ(close(ipc_sockets[1]), 0);
+	ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+
+	/* Opening a thread as a PIDFD_THREAD must succeed. */
+	pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
+	ASSERT_GE(pidfd_thread, 0);
+
+	/* Open a thread-specific pidfd for the thread-group leader. */
+	pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
+	ASSERT_GE(pidfd_leader_thread, 0);
+
+	pid_poller = fork();
+	ASSERT_GE(pid_poller, 0);
+	if (pid_poller == 0) {
+		/*
+		 * The subthread will now exec. The struct pid of the old
+		 * thread-group leader will be assumed by the subthread which
+		 * becomes the new thread-group leader. So no exit notification
+		 * must be generated. Wait for 5 seconds and call it a success
+		 * if no notification has been received.
+		 */
+		fds.events = POLLIN;
+		fds.fd = pidfd_leader_thread;
+		nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
+		if (nevents != 0)
+			_exit(EXIT_FAILURE);
+		if (fds.revents & POLLIN)
+			_exit(EXIT_FAILURE);
+		if (fds.revents & POLLHUP)
+			_exit(EXIT_FAILURE);
+		_exit(EXIT_SUCCESS);
+	}
+
+	/* Now that we've opened a thread-specific pidfd the thread can exec. */
+	ASSERT_EQ(write_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+	EXPECT_EQ(close(ipc_sockets[0]), 0);
+	ASSERT_EQ(wait_for_pid(pid_poller), 0);
+
+	/* Wait until the kernel has SIGKILLed the thread. */
+	fds.events = POLLHUP;
+	fds.fd = pidfd_thread;
+	nevents = poll(&fds, 1, -1);
+	ASSERT_EQ(nevents, 1);
+	/* The thread has been reaped. */
+	ASSERT_TRUE(!!(fds.revents & POLLHUP));
+
+	/* Retrieve thread-specific exit info from pidfd. */
+	ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+	/*
+	 * While the kernel will have SIGKILLed the whole thread-group
+	 * during exec it will cause the individual threads to exit
+	 * cleanly.
+	 */
+	ASSERT_TRUE(WIFEXITED(info.exit_code));
+	ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+
+	/*
+	 * The thread-group leader is still alive, the thread has taken
+	 * over its struct pid and thus its pid number.
+	 */
+	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+	ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+	ASSERT_EQ(info.pid, pid_leader);
+
+	/* Take down the thread-group leader. */
+	EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
+
+	/*
+	 * Afte the exec we're dealing with an empty thread-group so now
+	 * we must see an exit notification on the thread-specific pidfd
+	 * for the thread-group leader as there's no subthread that can
+	 * revive the struct pid.
+	 */
+	fds.events = POLLIN;
+	fds.fd = pidfd_leader_thread;
+	nevents = poll(&fds, 1, -1);
+	ASSERT_EQ(nevents, 1);
+	ASSERT_TRUE(!!(fds.revents & POLLIN));
+	ASSERT_FALSE(!!(fds.revents & POLLHUP));
+
+	EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
+
+	/* Retrieve exit information for the thread-group leader. */
+	info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+	ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+	ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+	ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+
+	EXPECT_EQ(close(pidfd_leader), 0);
+	EXPECT_EQ(close(pidfd_thread), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
index ce413a221bac..cd3de40e4977 100644
--- a/tools/testing/selftests/pidfd/pidfd_open_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -22,32 +22,6 @@
 #include "pidfd.h"
 #include "../kselftest.h"
 
-#ifndef PIDFS_IOCTL_MAGIC
-#define PIDFS_IOCTL_MAGIC 0xFF
-#endif
-
-#ifndef PIDFD_GET_INFO
-#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
-#define PIDFD_INFO_CGROUPID		(1UL << 0)
-
-struct pidfd_info {
-	__u64 request_mask;
-	__u64 cgroupid;
-	__u32 pid;
-	__u32 tgid;
-	__u32 ppid;
-	__u32 ruid;
-	__u32 rgid;
-	__u32 euid;
-	__u32 egid;
-	__u32 suid;
-	__u32 sgid;
-	__u32 fsuid;
-	__u32 fsgid;
-	__u32 spare0[1];
-};
-#endif
-
 static int safe_int(const char *numstr, int *converted)
 {
 	char *err = NULL;
@@ -148,7 +122,7 @@ out:
 int main(int argc, char **argv)
 {
 	struct pidfd_info info = {
-		.request_mask = PIDFD_INFO_CGROUPID,
+		.mask = PIDFD_INFO_CGROUPID,
 	};
 	int pidfd = -1, ret = 1;
 	pid_t pid;
@@ -227,7 +201,7 @@ int main(int argc, char **argv)
 			       getegid(), info.sgid);
 		goto on_error;
 	}
-	if ((info.request_mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) {
+	if ((info.mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) {
 		ksft_print_msg("cgroupid should not be 0 when PIDFD_INFO_CGROUPID is set\n");
 		goto on_error;
 	}
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 222f8131283b..e6a079b3d5e2 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -16,55 +16,10 @@
 #include <unistd.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
-#include <linux/ioctl.h>
 
 #include "pidfd.h"
 #include "../kselftest_harness.h"
 
-#ifndef PIDFS_IOCTL_MAGIC
-#define PIDFS_IOCTL_MAGIC 0xFF
-#endif
-
-#ifndef PIDFD_GET_CGROUP_NAMESPACE
-#define PIDFD_GET_CGROUP_NAMESPACE            _IO(PIDFS_IOCTL_MAGIC, 1)
-#endif
-
-#ifndef PIDFD_GET_IPC_NAMESPACE
-#define PIDFD_GET_IPC_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 2)
-#endif
-
-#ifndef PIDFD_GET_MNT_NAMESPACE
-#define PIDFD_GET_MNT_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 3)
-#endif
-
-#ifndef PIDFD_GET_NET_NAMESPACE
-#define PIDFD_GET_NET_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 4)
-#endif
-
-#ifndef PIDFD_GET_PID_NAMESPACE
-#define PIDFD_GET_PID_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 5)
-#endif
-
-#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE
-#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE  _IO(PIDFS_IOCTL_MAGIC, 6)
-#endif
-
-#ifndef PIDFD_GET_TIME_NAMESPACE
-#define PIDFD_GET_TIME_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 7)
-#endif
-
-#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE
-#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
-#endif
-
-#ifndef PIDFD_GET_USER_NAMESPACE
-#define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
-#endif
-
-#ifndef PIDFD_GET_UTS_NAMESPACE
-#define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
-#endif
-
 enum {
 	PIDFD_NS_USER,
 	PIDFD_NS_MNT,
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index 9faa686f90e4..fcd85cad9f18 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -42,12 +42,41 @@ static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *))
 #endif
 }
 
-static int signal_received;
+static pthread_t signal_received;
 
 static void set_signal_received_on_sigusr1(int sig)
 {
 	if (sig == SIGUSR1)
-		signal_received = 1;
+		signal_received = pthread_self();
+}
+
+static int send_signal(int pidfd)
+{
+	int ret = 0;
+
+	if (sys_pidfd_send_signal(pidfd, SIGUSR1, NULL, 0) < 0) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	if (signal_received != pthread_self()) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+exit:
+	signal_received = 0;
+	return ret;
+}
+
+static void *send_signal_worker(void *arg)
+{
+	int pidfd = (int)(intptr_t)arg;
+	int ret;
+
+	/* We forward any errors for the caller to handle. */
+	ret = send_signal(pidfd);
+	return (void *)(intptr_t)ret;
 }
 
 /*
@@ -56,8 +85,11 @@ static void set_signal_received_on_sigusr1(int sig)
  */
 static int test_pidfd_send_signal_simple_success(void)
 {
-	int pidfd, ret;
+	int pidfd;
 	const char *test_name = "pidfd_send_signal send SIGUSR1";
+	pthread_t thread;
+	void *thread_res;
+	int err;
 
 	if (!have_pidfd_send_signal) {
 		ksft_test_result_skip(
@@ -66,25 +98,45 @@ static int test_pidfd_send_signal_simple_success(void)
 		return 0;
 	}
 
+	signal(SIGUSR1, set_signal_received_on_sigusr1);
+
+	/* Try sending a signal to ourselves via /proc/self. */
 	pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC);
 	if (pidfd < 0)
 		ksft_exit_fail_msg(
 			"%s test: Failed to open process file descriptor\n",
 			test_name);
+	err = send_signal(pidfd);
+	if (err)
+		ksft_exit_fail_msg(
+			"%s test: Error %d on sending pidfd signal\n",
+			test_name, err);
+	close(pidfd);
 
-	signal(SIGUSR1, set_signal_received_on_sigusr1);
+	/* Now try the same thing only using PIDFD_SELF_THREAD_GROUP. */
+	err = send_signal(PIDFD_SELF_THREAD_GROUP);
+	if (err)
+		ksft_exit_fail_msg(
+			"%s test: Error %d on PIDFD_SELF_THREAD_GROUP signal\n",
+			test_name, err);
 
-	ret = sys_pidfd_send_signal(pidfd, SIGUSR1, NULL, 0);
-	close(pidfd);
-	if (ret < 0)
-		ksft_exit_fail_msg("%s test: Failed to send signal\n",
+	/*
+	 * Now try the same thing in a thread and assert thread ID is equal to
+	 * worker thread ID.
+	 */
+	if (pthread_create(&thread, NULL, send_signal_worker,
+			   (void *)(intptr_t)PIDFD_SELF_THREAD))
+		ksft_exit_fail_msg("%s test: Failed to create thread\n",
 				   test_name);
-
-	if (signal_received != 1)
-		ksft_exit_fail_msg("%s test: Failed to receive signal\n",
+	if (pthread_join(thread, &thread_res))
+		ksft_exit_fail_msg("%s test: Failed to join thread\n",
 				   test_name);
+	err = (int)(intptr_t)thread_res;
+	if (err)
+		ksft_exit_fail_msg(
+			"%s test: Error %d on PIDFD_SELF_THREAD signal\n",
+			test_name, err);
 
-	signal_received = 0;
 	ksft_test_result_pass("%s test: Sent signal\n", test_name);
 	return 0;
 }
@@ -497,7 +549,7 @@ static int child_poll_leader_exit_test(void *args)
 	pthread_create(&t2, NULL, test_pidfd_poll_leader_exit_thread, NULL);
 
 	/*
-	 * glibc exit calls exit_group syscall, so explicity call exit only
+	 * glibc exit calls exit_group syscall, so explicitly call exit only
 	 * so that only the group leader exits, leaving the threads alone.
 	 */
 	*child_exit_secs = time(NULL);
diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h
index 3a0129467de6..d6deb6ffa1b9 100644
--- a/tools/testing/selftests/powerpc/include/pkeys.h
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -24,6 +24,9 @@
 #undef PKEY_DISABLE_EXECUTE
 #define PKEY_DISABLE_EXECUTE	0x4
 
+#undef PKEY_UNRESTRICTED
+#define PKEY_UNRESTRICTED	0x0
+
 /* Older versions of libc do not define this */
 #ifndef SEGV_PKUERR
 #define SEGV_PKUERR	4
@@ -93,7 +96,7 @@ int pkeys_unsupported(void)
 	SKIP_IF(!hash_mmu);
 
 	/* Check if the system call is supported */
-	pkey = sys_pkey_alloc(0, 0);
+	pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
 	SKIP_IF(pkey < 0);
 	sys_pkey_free(pkey);
 
diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
index 0af4f02669a1..29b91b7456eb 100644
--- a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
+++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
@@ -72,7 +72,7 @@ static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
 
 		switch (fault_type) {
 		case PKEY_DISABLE_ACCESS:
-			pkey_set_rights(fault_pkey, 0);
+			pkey_set_rights(fault_pkey, PKEY_UNRESTRICTED);
 			break;
 		case PKEY_DISABLE_EXECUTE:
 			/*
diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
index 2db76e56d4cb..e89a164c686b 100644
--- a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
+++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
@@ -83,7 +83,7 @@ static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
 	    mprotect(pgstart, pgsize, PROT_EXEC))
 		_exit(1);
 	else
-		pkey_set_rights(pkey, 0);
+		pkey_set_rights(pkey, PKEY_UNRESTRICTED);
 
 	fault_count++;
 }
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index f061434af452..7ff53caeb4aa 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -95,16 +95,16 @@ static int child(struct shared_info *info)
 	/* Get some pkeys so that we can change their bits in the AMR. */
 	pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
 	if (pkey1 < 0) {
-		pkey1 = sys_pkey_alloc(0, 0);
+		pkey1 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
 		FAIL_IF(pkey1 < 0);
 
 		disable_execute = false;
 	}
 
-	pkey2 = sys_pkey_alloc(0, 0);
+	pkey2 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
 	FAIL_IF(pkey2 < 0);
 
-	pkey3 = sys_pkey_alloc(0, 0);
+	pkey3 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
 	FAIL_IF(pkey3 < 0);
 
 	info->amr |= 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index fc633014424f..10f63042cf91 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -57,16 +57,16 @@ static int child(struct shared_info *info)
 	/* Get some pkeys so that we can change their bits in the AMR. */
 	pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
 	if (pkey1 < 0) {
-		pkey1 = sys_pkey_alloc(0, 0);
+		pkey1 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
 		CHILD_FAIL_IF(pkey1 < 0, &info->child_sync);
 
 		disable_execute = false;
 	}
 
-	pkey2 = sys_pkey_alloc(0, 0);
+	pkey2 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
 	CHILD_FAIL_IF(pkey2 < 0, &info->child_sync);
 
-	pkey3 = sys_pkey_alloc(0, 0);
+	pkey3 = sys_pkey_alloc(0, PKEY_UNRESTRICTED);
 	CHILD_FAIL_IF(pkey3 < 0, &info->child_sync);
 
 	info->amr1 |= 3ul << pkeyshift(pkey1);
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 134cdef5a6e0..48a8052d5dae 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -181,10 +181,11 @@ done
 
 # Function to check for presence of a file on the specified system.
 # Complain if the system cannot be reached, and retry after a wait.
-# Currently just waits forever if a machine disappears.
+# Currently just waits 15 minutes if a machine disappears.
 #
 # Usage: checkremotefile system pathname
 checkremotefile () {
+	local nsshfails=0
 	local ret
 	local sleeptime=60
 
@@ -195,6 +196,11 @@ checkremotefile () {
 		if test "$ret" -eq 255
 		then
 			echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
+			nsshfails=$((nsshfails+1))
+			if ((nsshfails > 15))
+			then
+				return 255
+			fi
 		elif test "$ret" -eq 0
 		then
 			return 0
@@ -268,12 +274,23 @@ echo All batches started. `date` | tee -a "$oldrun/remote-log"
 for i in $systems
 do
 	echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log"
-	while checkremotefile "$i" "$resdir/$ds/remote.run"
+	while :
 	do
+		checkremotefile "$i" "$resdir/$ds/remote.run"
+		ret=$?
+		if test "$ret" -eq 1
+		then
+			echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
+			( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
+			break;
+		fi
+		if test "$ret" -eq 255
+		then
+			echo System $i persistent ssh failure, lost results `date` | tee -a "$oldrun/remote-log"
+			break;
+		fi
 		sleep 30
 	done
-	echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
-	( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
 done
 
 ( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
index 2e63ef009d59..2db12c5cad9c 100755
--- a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
+++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
@@ -49,7 +49,7 @@ do
 		do
 			err=
 			val=$((d*1000+t*10+c))
-			tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --bootargs "rcutorture.test_srcu_lockdep=$val" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1
+			tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x2" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1
 			ret=$?
 			mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val"
 			if test "$d" -ne 0 && test "$ret" -eq 0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
index 2db39f298d18..fb61703690cb 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -2,3 +2,4 @@ rcutorture.torture_type=srcud
 rcupdate.rcu_self_test=1
 rcutorture.fwd_progress=3
 srcutree.big_cpu_lim=5
+rcutorture.reader_flavor=0x8
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
index 8e50bfd4b710..90318591dae2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -5,3 +5,4 @@ rcutree.gp_cleanup_delay=3
 rcutree.kthread_prio=2
 threadirqs
 rcutree.use_softirq=0
+rcutorture.preempt_duration=10
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
index c419cac233ee..54f5c9053474 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
@@ -2,3 +2,9 @@ rcutree.gp_preinit_delay=3
 rcutree.gp_init_delay=3
 rcutree.gp_cleanup_delay=3
 rcupdate.rcu_self_test=1
+
+# This part is for synchronize_rcu() testing
+rcutorture.nfakewriters=-1
+rcutorture.gp_sync=1
+rcupdate.rcu_normal=1
+rcutree.rcu_normal_wake_from_gp=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index d30922d8c883..352393bc5c56 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -1,7 +1,8 @@
 CONFIG_SMP=y
 CONFIG_NR_CPUS=16
-CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT_LAZY=y
 CONFIG_PREEMPT=n
 CONFIG_PREEMPT_DYNAMIC=n
 #CHECK#CONFIG_TREE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
index 759ee51d3ddc..420632b030dc 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -1,6 +1,7 @@
 CONFIG_SMP=y
 CONFIG_NR_CPUS=74
-CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_LAZY=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
 CONFIG_PREEMPT_DYNAMIC=n
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
index f408bd6bfc3d..984534cfbf1b 100644
--- a/tools/testing/selftests/resctrl/Makefile
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -8,5 +8,6 @@ TEST_GEN_PROGS := resctrl_tests
 LOCAL_HDRS += $(wildcard *.h)
 
 include ../lib.mk
+CFLAGS += -I$(top_srcdir)/tools/include
 
 $(OUTPUT)/resctrl_tests: $(wildcard *.c)
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index 3bbf3042fb06..d09e693dc739 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -169,8 +169,8 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param
 		return ret;
 
 	ret = check_results(&param, span, n);
-	if (ret && (get_vendor() == ARCH_INTEL))
-		ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
+	if (ret && (get_vendor() == ARCH_INTEL) && !snc_kernel_support())
+		ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled on the system.\n");
 
 	return ret;
 }
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
index 536d9089d2f6..c7e9adc0368f 100644
--- a/tools/testing/selftests/resctrl/mba_test.c
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -201,6 +201,8 @@ static int mba_run_test(const struct resctrl_test *test, const struct user_param
 		return ret;
 
 	ret = check_results();
+	if (ret && (get_vendor() == ARCH_INTEL) && !snc_kernel_support())
+		ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled on the system.\n");
 
 	return ret;
 }
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index 315b2ef3b3bc..84d8bc250539 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -160,8 +160,8 @@ static int mbm_run_test(const struct resctrl_test *test, const struct user_param
 		return ret;
 
 	ret = check_results(param.fill_buf ? param.fill_buf->buf_size : 0);
-	if (ret && (get_vendor() == ARCH_INTEL))
-		ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
+	if (ret && (get_vendor() == ARCH_INTEL) && !snc_kernel_support())
+		ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled on the system.\n");
 
 	return ret;
 }
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index dab1953fc7a0..cd3adfc14969 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -11,6 +11,7 @@
 #include <signal.h>
 #include <dirent.h>
 #include <stdbool.h>
+#include <ctype.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 #include <sys/mount.h>
@@ -21,6 +22,7 @@
 #include <sys/eventfd.h>
 #include <asm/unistd.h>
 #include <linux/perf_event.h>
+#include <linux/compiler.h>
 #include "../kselftest.h"
 
 #define MB			(1024 * 1024)
@@ -156,8 +158,11 @@ struct perf_event_read {
  */
 extern volatile int *value_sink;
 
+extern int snc_unreliable;
+
 extern char llc_occup_path[1024];
 
+int snc_nodes_per_l3_cache(void);
 int get_vendor(void);
 bool check_resctrlfs_support(void);
 int filter_dmesg(void);
@@ -198,6 +203,7 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr);
 int signal_handler_register(const struct resctrl_test *test);
 void signal_handler_unregister(void);
 unsigned int count_bits(unsigned long n);
+int snc_kernel_support(void);
 
 void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config);
 void perf_event_initialize_read_format(struct perf_event_read *pe_read);
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 3335af815b21..5154ffd821c4 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -118,7 +118,7 @@ static bool test_vendor_specific_check(const struct resctrl_test *test)
 
 static void run_single_test(const struct resctrl_test *test, const struct user_params *uparams)
 {
-	int ret;
+	int ret, snc_mode;
 
 	if (test->disabled)
 		return;
@@ -128,8 +128,15 @@ static void run_single_test(const struct resctrl_test *test, const struct user_p
 		return;
 	}
 
+	snc_mode = snc_nodes_per_l3_cache();
+
 	ksft_print_msg("Starting %s test ...\n", test->name);
 
+	if (snc_mode == 1 && snc_unreliable && get_vendor() == ARCH_INTEL) {
+		ksft_test_result_skip("SNC detection unreliable due to offline CPUs. Test results may not be accurate if SNC enabled.\n");
+		return;
+	}
+
 	if (test_prepare(test)) {
 		ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
 		return;
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index d38d6dd90be4..195f04c4d158 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -13,6 +13,8 @@
 
 #include "resctrl.h"
 
+int snc_unreliable;
+
 static int find_resctrl_mount(char *buffer)
 {
 	FILE *mounts;
@@ -157,6 +159,98 @@ int get_domain_id(const char *resource, int cpu_no, int *domain_id)
 }
 
 /*
+ * Count number of CPUs in a /sys bitmap
+ */
+static unsigned int count_sys_bitmap_bits(char *name)
+{
+	FILE *fp = fopen(name, "r");
+	int count = 0, c;
+
+	if (!fp)
+		return 0;
+
+	while ((c = fgetc(fp)) != EOF) {
+		if (!isxdigit(c))
+			continue;
+		switch (c) {
+		case 'f':
+			count++;
+			fallthrough;
+		case '7': case 'b': case 'd': case 'e':
+			count++;
+			fallthrough;
+		case '3': case '5': case '6': case '9': case 'a': case 'c':
+			count++;
+			fallthrough;
+		case '1': case '2': case '4': case '8':
+			count++;
+			break;
+		}
+	}
+	fclose(fp);
+
+	return count;
+}
+
+static bool cpus_offline_empty(void)
+{
+	char offline_cpus_str[64];
+	FILE *fp;
+
+	fp = fopen("/sys/devices/system/cpu/offline", "r");
+	if (!fp) {
+		ksft_perror("Could not open /sys/devices/system/cpu/offline");
+		return 0;
+	}
+
+	if (fscanf(fp, "%63s", offline_cpus_str) < 0) {
+		if (!errno) {
+			fclose(fp);
+			return 1;
+		}
+		ksft_perror("Could not read /sys/devices/system/cpu/offline");
+	}
+
+	fclose(fp);
+
+	return 0;
+}
+
+/*
+ * Detect SNC by comparing #CPUs in node0 with #CPUs sharing LLC with CPU0.
+ * If any CPUs are offline declare the detection as unreliable.
+ */
+int snc_nodes_per_l3_cache(void)
+{
+	int node_cpus, cache_cpus;
+	static int snc_mode;
+
+	if (!snc_mode) {
+		snc_mode = 1;
+		if (!cpus_offline_empty()) {
+			ksft_print_msg("Runtime SNC detection unreliable due to offline CPUs.\n");
+			ksft_print_msg("Setting SNC mode to disabled.\n");
+			snc_unreliable = 1;
+			return snc_mode;
+		}
+		node_cpus = count_sys_bitmap_bits("/sys/devices/system/node/node0/cpumap");
+		cache_cpus = count_sys_bitmap_bits("/sys/devices/system/cpu/cpu0/cache/index3/shared_cpu_map");
+
+		if (!node_cpus || !cache_cpus) {
+			ksft_print_msg("Could not determine Sub-NUMA Cluster mode.\n");
+			snc_unreliable = 1;
+			return snc_mode;
+		}
+		snc_mode = cache_cpus / node_cpus;
+
+		if (snc_mode > 1)
+			ksft_print_msg("SNC-%d mode discovered.\n", snc_mode);
+	}
+
+	return snc_mode;
+}
+
+/*
  * get_cache_size - Get cache size for a specified CPU
  * @cpu_no:	CPU number
  * @cache_type:	Cache level L2/L3
@@ -211,6 +305,17 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size
 			break;
 	}
 
+	/*
+	 * The amount of cache represented by each bit in the masks
+	 * in the schemata file is reduced by a factor equal to SNC
+	 * nodes per L3 cache.
+	 * E.g. on a SNC-2 system with a 100MB L3 cache a test that
+	 * allocates memory from its local SNC node (default behavior
+	 * without using libnuma) will only see 50 MB llc_occupancy
+	 * with a fully populated L3 mask in the schemata file.
+	 */
+	if (cache_num == 3)
+		*cache_size /= snc_nodes_per_l3_cache();
 	return 0;
 }
 
@@ -852,3 +957,35 @@ unsigned int count_bits(unsigned long n)
 
 	return count;
 }
+
+/**
+ * snc_kernel_support - Check for existence of mon_sub_L3_00 file that indicates
+ * SNC resctrl support on the kernel side.
+ *
+ * Return: 0 if not supported, 1 if SNC is disabled or SNC discovery is
+ * unreliable or SNC is both enabled and supported.
+ */
+int snc_kernel_support(void)
+{
+	char node_path[PATH_MAX];
+	struct stat statbuf;
+	int ret;
+
+	ret = snc_nodes_per_l3_cache();
+	/*
+	 * If SNC is disabled then its kernel support isn't important. If SNC
+	 * got disabled because the discovery process was unreliable the
+	 * snc_unreliable variable was set. It can be used to verify the SNC
+	 * discovery reliability elsewhere in the selftest.
+	 */
+	if (ret == 1)
+		return ret;
+
+	snprintf(node_path, sizeof(node_path), "%s/%s", RESCTRL_PATH,
+		 "mon_data/mon_L3_00/mon_sub_L3_00");
+
+	if (!stat(node_path, &statbuf))
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/ring-buffer/map_test.c b/tools/testing/selftests/ring-buffer/map_test.c
index d10a847130fb..a58f520f2f41 100644
--- a/tools/testing/selftests/ring-buffer/map_test.c
+++ b/tools/testing/selftests/ring-buffer/map_test.c
@@ -233,12 +233,18 @@ TEST_F(map, data_mmap)
 	ASSERT_NE(data, MAP_FAILED);
 	munmap(data, data_len);
 
-	/* Overflow the available subbufs by 1 */
+	/* Offset within ring-buffer bounds, mapping size overflow */
 	meta_len += desc->meta->subbuf_size * 2;
 	data = mmap(NULL, data_len, PROT_READ, MAP_SHARED,
 		    desc->cpu_fd, meta_len);
 	ASSERT_EQ(data, MAP_FAILED);
 
+	/* Offset outside ring-buffer bounds */
+	data_len = desc->meta->subbuf_size * desc->meta->nr_subbufs;
+	data = mmap(NULL, data_len, PROT_READ, MAP_SHARED,
+		    desc->cpu_fd, data_len + (desc->meta->subbuf_size * 2));
+	ASSERT_EQ(data, MAP_FAILED);
+
 	/* Verify meta-page padding */
 	if (desc->meta->meta_page_size > getpagesize()) {
 		data_len = desc->meta->meta_page_size;
diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore
index 9ae7964491d5..7d9c87cd0649 100644
--- a/tools/testing/selftests/riscv/vector/.gitignore
+++ b/tools/testing/selftests/riscv/vector/.gitignore
@@ -1,3 +1,4 @@
 vstate_exec_nolibc
 vstate_prctl
-v_initval_nolibc
+v_initval
+v_exec_initval_nolibc
diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile
index bfff0ff4f3be..6f7497f4e7b3 100644
--- a/tools/testing/selftests/riscv/vector/Makefile
+++ b/tools/testing/selftests/riscv/vector/Makefile
@@ -2,18 +2,27 @@
 # Copyright (C) 2021 ARM Limited
 # Originally tools/testing/arm64/abi/Makefile
 
-TEST_GEN_PROGS := vstate_prctl v_initval_nolibc
-TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc
+TEST_GEN_PROGS := v_initval vstate_prctl
+TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc v_exec_initval_nolibc
 
 include ../../lib.mk
 
-$(OUTPUT)/vstate_prctl: vstate_prctl.c ../hwprobe/sys_hwprobe.S
+$(OUTPUT)/sys_hwprobe.o: ../hwprobe/sys_hwprobe.S
+	$(CC) -static -c -o$@ $(CFLAGS) $^
+
+$(OUTPUT)/v_helpers.o: v_helpers.c
+	$(CC) -static -c -o$@ $(CFLAGS) $^
+
+$(OUTPUT)/vstate_prctl: vstate_prctl.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
 	$(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
 
 $(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c
 	$(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
 		-Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
 
-$(OUTPUT)/v_initval_nolibc: v_initval_nolibc.c
+$(OUTPUT)/v_initval: v_initval.c $(OUTPUT)/sys_hwprobe.o $(OUTPUT)/v_helpers.o
+	$(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+
+$(OUTPUT)/v_exec_initval_nolibc: v_exec_initval_nolibc.c
 	$(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
 		-Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
diff --git a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c
new file mode 100644
index 000000000000..35c0812e32de
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Get values of vector registers as soon as the program starts to test if
+ * is properly cleaning the values before starting a new program. Vector
+ * registers are caller saved, so no function calls may happen before reading
+ * the values. To further ensure consistency, this file is compiled without
+ * libc and without auto-vectorization.
+ *
+ * To be "clean" all values must be either all ones or all zeroes.
+ */
+
+#define __stringify_1(x...)	#x
+#define __stringify(x...)	__stringify_1(x)
+
+int main(int argc, char **argv)
+{
+	char prev_value = 0, value;
+	unsigned long vl;
+	int first = 1;
+
+	if (argc > 2 && strcmp(argv[2], "x"))
+		asm volatile (
+			// 0 | zimm[10:0] | rs1 | 1 1 1 | rd |1010111| vsetvli
+			// vsetvli	t4, x0, e8, m1, d1
+			".4byte		0b00000000000000000111111011010111\n\t"
+			"mv		%[vl], t4\n\t"
+			: [vl] "=r" (vl) : : "t4"
+		);
+	else
+		asm volatile (
+			".option push\n\t"
+			".option arch, +v\n\t"
+			"vsetvli	%[vl], x0, e8, m1, ta, ma\n\t"
+			".option pop\n\t"
+			: [vl] "=r" (vl)
+		);
+
+#define CHECK_VECTOR_REGISTER(register) ({					\
+	for (int i = 0; i < vl; i++) {						\
+		asm volatile (							\
+			".option push\n\t"					\
+			".option arch, +v\n\t"					\
+			"vmv.x.s %0, " __stringify(register) "\n\t"		\
+			"vsrl.vi " __stringify(register) ", " __stringify(register) ", 8\n\t" \
+			".option pop\n\t"					\
+			: "=r" (value));					\
+		if (first) {							\
+			first = 0;						\
+		} else if (value != prev_value || !(value == 0x00 || value == 0xff)) { \
+			printf("Register " __stringify(register)		\
+				" values not clean! value: %u\n", value);	\
+			exit(-1);						\
+		}								\
+		prev_value = value;						\
+	}									\
+})
+
+	CHECK_VECTOR_REGISTER(v0);
+	CHECK_VECTOR_REGISTER(v1);
+	CHECK_VECTOR_REGISTER(v2);
+	CHECK_VECTOR_REGISTER(v3);
+	CHECK_VECTOR_REGISTER(v4);
+	CHECK_VECTOR_REGISTER(v5);
+	CHECK_VECTOR_REGISTER(v6);
+	CHECK_VECTOR_REGISTER(v7);
+	CHECK_VECTOR_REGISTER(v8);
+	CHECK_VECTOR_REGISTER(v9);
+	CHECK_VECTOR_REGISTER(v10);
+	CHECK_VECTOR_REGISTER(v11);
+	CHECK_VECTOR_REGISTER(v12);
+	CHECK_VECTOR_REGISTER(v13);
+	CHECK_VECTOR_REGISTER(v14);
+	CHECK_VECTOR_REGISTER(v15);
+	CHECK_VECTOR_REGISTER(v16);
+	CHECK_VECTOR_REGISTER(v17);
+	CHECK_VECTOR_REGISTER(v18);
+	CHECK_VECTOR_REGISTER(v19);
+	CHECK_VECTOR_REGISTER(v20);
+	CHECK_VECTOR_REGISTER(v21);
+	CHECK_VECTOR_REGISTER(v22);
+	CHECK_VECTOR_REGISTER(v23);
+	CHECK_VECTOR_REGISTER(v24);
+	CHECK_VECTOR_REGISTER(v25);
+	CHECK_VECTOR_REGISTER(v26);
+	CHECK_VECTOR_REGISTER(v27);
+	CHECK_VECTOR_REGISTER(v28);
+	CHECK_VECTOR_REGISTER(v29);
+	CHECK_VECTOR_REGISTER(v30);
+	CHECK_VECTOR_REGISTER(v31);
+
+#undef CHECK_VECTOR_REGISTER
+
+	return 0;
+}
diff --git a/tools/testing/selftests/riscv/vector/v_helpers.c b/tools/testing/selftests/riscv/vector/v_helpers.c
new file mode 100644
index 000000000000..01a8799dcb78
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_helpers.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../hwprobe/hwprobe.h"
+#include <asm/vendor/thead.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+bool is_xtheadvector_supported(void)
+{
+	struct riscv_hwprobe pair;
+
+	pair.key = RISCV_HWPROBE_KEY_VENDOR_EXT_THEAD_0;
+	riscv_hwprobe(&pair, 1, 0, NULL, 0);
+	return pair.value & RISCV_HWPROBE_VENDOR_EXT_XTHEADVECTOR;
+}
+
+bool is_vector_supported(void)
+{
+	struct riscv_hwprobe pair;
+
+	pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
+	riscv_hwprobe(&pair, 1, 0, NULL, 0);
+	return pair.value & RISCV_HWPROBE_EXT_ZVE32X;
+}
+
+int launch_test(char *next_program, int test_inherit, int xtheadvector)
+{
+	char *exec_argv[4], *exec_envp[1];
+	int rc, pid, status;
+
+	pid = fork();
+	if (pid < 0) {
+		printf("fork failed %d", pid);
+		return -1;
+	}
+
+	if (!pid) {
+		exec_argv[0] = next_program;
+		exec_argv[1] = test_inherit != 0 ? "x" : NULL;
+		exec_argv[2] = xtheadvector != 0 ? "x" : NULL;
+		exec_argv[3] = NULL;
+		exec_envp[0] = NULL;
+		/* launch the program again to check inherit */
+		rc = execve(next_program, exec_argv, exec_envp);
+		if (rc) {
+			perror("execve");
+			printf("child execve failed %d\n", rc);
+			exit(-1);
+		}
+	}
+
+	rc = waitpid(-1, &status, 0);
+	if (rc < 0) {
+		printf("waitpid failed\n");
+		return -3;
+	}
+
+	if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) ||
+	    WIFSIGNALED(status)) {
+		printf("child exited abnormally\n");
+		return -4;
+	}
+
+	return WEXITSTATUS(status);
+}
diff --git a/tools/testing/selftests/riscv/vector/v_helpers.h b/tools/testing/selftests/riscv/vector/v_helpers.h
new file mode 100644
index 000000000000..763cddfe26da
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_helpers.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <stdbool.h>
+
+bool is_xtheadvector_supported(void);
+
+bool is_vector_supported(void);
+
+int launch_test(char *next_program, int test_inherit, int xtheadvector);
diff --git a/tools/testing/selftests/riscv/vector/v_initval.c b/tools/testing/selftests/riscv/vector/v_initval.c
new file mode 100644
index 000000000000..be9e1d18ad29
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_initval.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../../kselftest_harness.h"
+#include "v_helpers.h"
+
+#define NEXT_PROGRAM "./v_exec_initval_nolibc"
+
+TEST(v_initval)
+{
+	int xtheadvector = 0;
+
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
+	}
+
+	ASSERT_EQ(0, launch_test(NEXT_PROGRAM, 0, xtheadvector));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
deleted file mode 100644
index 6174ffe016dc..000000000000
--- a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
+++ /dev/null
@@ -1,72 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#include "../../kselftest.h"
-#define MAX_VSIZE	(8192 * 32)
-
-void dump(char *ptr, int size)
-{
-	int i = 0;
-
-	for (i = 0; i < size; i++) {
-		if (i != 0) {
-			if (i % 16 == 0)
-				printf("\n");
-			else if (i % 8 == 0)
-				printf("  ");
-		}
-		printf("%02x ", ptr[i]);
-	}
-	printf("\n");
-}
-
-int main(void)
-{
-	int i;
-	unsigned long vl;
-	char *datap, *tmp;
-
-	ksft_set_plan(1);
-
-	datap = malloc(MAX_VSIZE);
-	if (!datap) {
-		ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE);
-		exit(-1);
-	}
-
-	tmp = datap;
-	asm volatile (
-		".option push\n\t"
-		".option arch, +v\n\t"
-		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
-		"vse8.v		v0, (%2)\n\t"
-		"add		%1, %2, %0\n\t"
-		"vse8.v		v8, (%1)\n\t"
-		"add		%1, %1, %0\n\t"
-		"vse8.v		v16, (%1)\n\t"
-		"add		%1, %1, %0\n\t"
-		"vse8.v		v24, (%1)\n\t"
-		".option pop\n\t"
-		: "=&r" (vl), "=r" (tmp) : "r" (datap) : "memory");
-
-	ksft_print_msg("vl = %lu\n", vl);
-
-	if (datap[0] != 0x00 && datap[0] != 0xff) {
-		ksft_test_result_fail("v-regesters are not properly initialized\n");
-		dump(datap, vl * 4);
-		exit(-1);
-	}
-
-	for (i = 1; i < vl * 4; i++) {
-		if (datap[i] != datap[0]) {
-			ksft_test_result_fail("detect stale values on v-regesters\n");
-			dump(datap, vl * 4);
-			exit(-2);
-		}
-	}
-
-	free(datap);
-
-	ksft_test_result_pass("tests for v_initval_nolibc pass\n");
-	ksft_exit_pass();
-	return 0;
-}
diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
index 1f9969bed235..7b7d6f21acb4 100644
--- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
+++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
@@ -6,13 +6,16 @@
 
 int main(int argc, char **argv)
 {
-	int rc, pid, status, test_inherit = 0;
+	int rc, pid, status, test_inherit = 0, xtheadvector = 0;
 	long ctrl, ctrl_c;
 	char *exec_argv[2], *exec_envp[2];
 
-	if (argc > 1)
+	if (argc > 1 && strcmp(argv[1], "x"))
 		test_inherit = 1;
 
+	if (argc > 2 && strcmp(argv[2], "x"))
+		xtheadvector = 1;
+
 	ctrl = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL);
 	if (ctrl < 0) {
 		puts("PR_RISCV_V_GET_CONTROL is not supported\n");
@@ -53,11 +56,14 @@ int main(int argc, char **argv)
 				puts("child's vstate_ctrl not equal to parent's\n");
 				exit(-1);
 			}
-			asm volatile (".option push\n\t"
-				      ".option arch, +v\n\t"
-				      "vsetvli x0, x0, e32, m8, ta, ma\n\t"
-				      ".option pop\n\t"
-				      );
+			if (xtheadvector)
+				asm volatile (".4byte	0x00007ed7");
+			else
+				asm volatile (".option push\n\t"
+					".option arch, +v\n\t"
+					"vsetvli x0, x0, e32, m8, ta, ma\n\t"
+					".option pop\n\t"
+					);
 			exit(ctrl);
 		}
 	}
diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c
index 40b3bffcbb40..62fbb17a0556 100644
--- a/tools/testing/selftests/riscv/vector/vstate_prctl.c
+++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c
@@ -3,181 +3,244 @@
 #include <unistd.h>
 #include <errno.h>
 #include <sys/wait.h>
+#include <sys/types.h>
+#include <stdlib.h>
 
-#include "../hwprobe/hwprobe.h"
-#include "../../kselftest.h"
+#include "../../kselftest_harness.h"
+#include "v_helpers.h"
 
 #define NEXT_PROGRAM "./vstate_exec_nolibc"
-static int launch_test(int test_inherit)
-{
-	char *exec_argv[3], *exec_envp[1];
-	int rc, pid, status;
-
-	pid = fork();
-	if (pid < 0) {
-		ksft_test_result_fail("fork failed %d", pid);
-		return -1;
-	}
 
-	if (!pid) {
-		exec_argv[0] = NEXT_PROGRAM;
-		exec_argv[1] = test_inherit != 0 ? "x" : NULL;
-		exec_argv[2] = NULL;
-		exec_envp[0] = NULL;
-		/* launch the program again to check inherit */
-		rc = execve(NEXT_PROGRAM, exec_argv, exec_envp);
-		if (rc) {
-			perror("execve");
-			ksft_test_result_fail("child execve failed %d\n", rc);
-			exit(-1);
-		}
-	}
-
-	rc = waitpid(-1, &status, 0);
-	if (rc < 0) {
-		ksft_test_result_fail("waitpid failed\n");
-		return -3;
-	}
-
-	if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) ||
-	    WIFSIGNALED(status)) {
-		ksft_test_result_fail("child exited abnormally\n");
-		return -4;
-	}
-
-	return WEXITSTATUS(status);
-}
-
-int test_and_compare_child(long provided, long expected, int inherit)
+int test_and_compare_child(long provided, long expected, int inherit, int xtheadvector)
 {
 	int rc;
 
 	rc = prctl(PR_RISCV_V_SET_CONTROL, provided);
 	if (rc != 0) {
-		ksft_test_result_fail("prctl with provided arg %lx failed with code %d\n",
-				      provided, rc);
+		printf("prctl with provided arg %lx failed with code %d\n",
+		       provided, rc);
 		return -1;
 	}
-	rc = launch_test(inherit);
+	rc = launch_test(NEXT_PROGRAM, inherit, xtheadvector);
 	if (rc != expected) {
-		ksft_test_result_fail("Test failed, check %d != %ld\n", rc,
-				      expected);
+		printf("Test failed, check %d != %ld\n", rc, expected);
 		return -2;
 	}
 	return 0;
 }
 
-#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT	0
-#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT	2
+#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0
+#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2
 
-int main(void)
+TEST(get_control_no_v)
 {
-	struct riscv_hwprobe pair;
-	long flag, expected;
 	long rc;
 
-	ksft_set_plan(1);
+	if (is_vector_supported() || is_xtheadvector_supported())
+		SKIP(return, "Test expects vector to be not supported");
 
-	pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
-	rc = riscv_hwprobe(&pair, 1, 0, NULL, 0);
-	if (rc < 0) {
-		ksft_test_result_fail("hwprobe() failed with %ld\n", rc);
-		return -1;
-	}
+	rc = prctl(PR_RISCV_V_GET_CONTROL);
+	EXPECT_EQ(-1, rc)
+	TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X");
+	EXPECT_EQ(EINVAL, errno)
+	TH_LOG("GET_CONTROL should fail on kernel/hw without ZVE32X");
+}
 
-	if (pair.key != RISCV_HWPROBE_KEY_IMA_EXT_0) {
-		ksft_test_result_fail("hwprobe cannot probe RISCV_HWPROBE_KEY_IMA_EXT_0\n");
-		return -2;
-	}
+TEST(set_control_no_v)
+{
+	long rc;
 
-	if (!(pair.value & RISCV_HWPROBE_EXT_ZVE32X)) {
-		rc = prctl(PR_RISCV_V_GET_CONTROL);
-		if (rc != -1 || errno != EINVAL) {
-			ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without ZVE32X\n");
-			return -3;
-		}
-
-		rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON);
-		if (rc != -1 || errno != EINVAL) {
-			ksft_test_result_fail("SET_CONTROL should fail on kernel/hw without ZVE32X\n");
-			return -4;
-		}
-
-		ksft_test_result_skip("Vector not supported\n");
-		return 0;
-	}
+	if (is_vector_supported() || is_xtheadvector_supported())
+		SKIP(return, "Test expects vector to be not supported");
+
+	rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON);
+	EXPECT_EQ(-1, rc)
+	TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X");
+	EXPECT_EQ(EINVAL, errno)
+	TH_LOG("SET_CONTROL should fail on kernel/hw without ZVE32X");
+}
+
+TEST(vstate_on_current)
+{
+	long flag;
+	long rc;
+
+	if (!is_vector_supported() && !is_xtheadvector_supported())
+		SKIP(return, "Vector not supported");
 
 	flag = PR_RISCV_V_VSTATE_CTRL_ON;
 	rc = prctl(PR_RISCV_V_SET_CONTROL, flag);
-	if (rc != 0) {
-		ksft_test_result_fail("Enabling V for current should always success\n");
-		return -5;
-	}
+	EXPECT_EQ(0, rc) TH_LOG("Enabling V for current should always succeed");
+}
+
+TEST(vstate_off_eperm)
+{
+	long flag;
+	long rc;
+
+	if (!is_vector_supported() && !is_xtheadvector_supported())
+		SKIP(return, "Vector not supported");
 
 	flag = PR_RISCV_V_VSTATE_CTRL_OFF;
 	rc = prctl(PR_RISCV_V_SET_CONTROL, flag);
-	if (rc != -1 || errno != EPERM) {
-		ksft_test_result_fail("Disabling current's V alive must fail with EPERM(%d)\n",
-				      errno);
-		return -5;
+	EXPECT_EQ(EPERM, errno)
+	TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno);
+	EXPECT_EQ(-1, rc)
+	TH_LOG("Disabling V in current thread with V enabled must fail with EPERM(%d)", errno);
+}
+
+TEST(vstate_on_no_nesting)
+{
+	long flag;
+	int xtheadvector = 0;
+
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
 	}
 
 	/* Turn on next's vector explicitly and test */
 	flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
-	if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0))
-		return -6;
+
+	EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0, xtheadvector));
+}
+
+TEST(vstate_off_nesting)
+{
+	long flag;
+	int xtheadvector = 0;
+
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
+	}
 
 	/* Turn off next's vector explicitly and test */
 	flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
-	if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 0))
-		return -7;
+
+	EXPECT_EQ(0, test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 1, xtheadvector));
+}
+
+TEST(vstate_on_inherit_no_nesting)
+{
+	long flag, expected;
+	int xtheadvector = 0;
+
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
+	}
+
+	/* Turn on next's vector explicitly and test no inherit */
+	flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+	flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
+	expected = flag | PR_RISCV_V_VSTATE_CTRL_ON;
+
+	EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector));
+}
+
+TEST(vstate_on_inherit)
+{
+	long flag, expected;
+	int xtheadvector = 0;
+
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
+	}
 
 	/* Turn on next's vector explicitly and test inherit */
 	flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
 	flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
 	expected = flag | PR_RISCV_V_VSTATE_CTRL_ON;
-	if (test_and_compare_child(flag, expected, 0))
-		return -8;
 
-	if (test_and_compare_child(flag, expected, 1))
-		return -9;
+	EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector));
+}
+
+TEST(vstate_off_inherit_no_nesting)
+{
+	long flag, expected;
+	int xtheadvector = 0;
+
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
+	}
+	/* Turn off next's vector explicitly and test no inherit */
+	flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+	flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
+	expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF;
+
+	EXPECT_EQ(0, test_and_compare_child(flag, expected, 0, xtheadvector));
+}
+
+TEST(vstate_off_inherit)
+{
+	long flag, expected;
+	int xtheadvector = 0;
+
+	if (!is_vector_supported()) {
+		if (is_xtheadvector_supported())
+			xtheadvector = 1;
+		else
+			SKIP(return, "Vector not supported");
+	}
 
 	/* Turn off next's vector explicitly and test inherit */
 	flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
 	flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
 	expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF;
-	if (test_and_compare_child(flag, expected, 0))
-		return -10;
 
-	if (test_and_compare_child(flag, expected, 1))
-		return -11;
+	EXPECT_EQ(0, test_and_compare_child(flag, expected, 1, xtheadvector));
+}
+
+/* arguments should fail with EINVAL */
+TEST(inval_set_control_1)
+{
+	int rc;
+
+	if (!is_vector_supported() && !is_xtheadvector_supported())
+		SKIP(return, "Vector not supported");
 
-	/* arguments should fail with EINVAL */
 	rc = prctl(PR_RISCV_V_SET_CONTROL, 0xff0);
-	if (rc != -1 || errno != EINVAL) {
-		ksft_test_result_fail("Undefined control argument should return EINVAL\n");
-		return -12;
-	}
+	EXPECT_EQ(-1, rc);
+	EXPECT_EQ(EINVAL, errno);
+}
+
+/* arguments should fail with EINVAL */
+TEST(inval_set_control_2)
+{
+	int rc;
+
+	if (!is_vector_supported() && !is_xtheadvector_supported())
+		SKIP(return, "Vector not supported");
 
 	rc = prctl(PR_RISCV_V_SET_CONTROL, 0x3);
-	if (rc != -1 || errno != EINVAL) {
-		ksft_test_result_fail("Undefined control argument should return EINVAL\n");
-		return -12;
-	}
+	EXPECT_EQ(-1, rc);
+	EXPECT_EQ(EINVAL, errno);
+}
 
-	rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);
-	if (rc != -1 || errno != EINVAL) {
-		ksft_test_result_fail("Undefined control argument should return EINVAL\n");
-		return -12;
-	}
+/* arguments should fail with EINVAL */
+TEST(inval_set_control_3)
+{
+	int rc;
 
-	rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);
-	if (rc != -1 || errno != EINVAL) {
-		ksft_test_result_fail("Undefined control argument should return EINVAL\n");
-		return -12;
-	}
+	if (!is_vector_supported() && !is_xtheadvector_supported())
+		SKIP(return, "Vector not supported");
 
-	ksft_test_result_pass("tests for riscv_v_vstate_ctrl pass\n");
-	ksft_exit_pass();
-	return 0;
+	rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);
+	EXPECT_EQ(-1, rc);
+	EXPECT_EQ(EINVAL, errno);
 }
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
index 16496de5f6ce..0fda241fa62b 100644
--- a/tools/testing/selftests/rseq/.gitignore
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -9,3 +9,4 @@ param_test_compare_twice
 param_test_mm_cid
 param_test_mm_cid_benchmark
 param_test_mm_cid_compare_twice
+syscall_errors_test
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index 5a3432fceb58..0d0a5fae5954 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -16,11 +16,12 @@ OVERRIDE_TARGETS = 1
 
 TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \
 		param_test_benchmark param_test_compare_twice param_test_mm_cid \
-		param_test_mm_cid_benchmark param_test_mm_cid_compare_twice
+		param_test_mm_cid_benchmark param_test_mm_cid_compare_twice \
+		syscall_errors_test
 
 TEST_GEN_PROGS_EXTENDED = librseq.so
 
-TEST_PROGS = run_param_test.sh
+TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh
 
 TEST_FILES := settings
 
@@ -54,3 +55,7 @@ $(OUTPUT)/param_test_mm_cid_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
 $(OUTPUT)/param_test_mm_cid_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
 					rseq.h rseq-*.h
 	$(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/syscall_errors_test: syscall_errors_test.c $(TEST_GEN_PROGS_EXTENDED) \
+					rseq.h rseq-*.h
+	$(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index 2f37961240ca..05d03e679e06 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -226,8 +226,32 @@ unsigned int yield_mod_cnt, nr_abort;
 	"addi  " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t"	\
 	"bnez " INJECT_ASM_REG ", 222b\n\t"			\
 	"333:\n\t"
+#elif defined(__or1k__)
 
+#define RSEQ_INJECT_INPUT \
+	, [loop_cnt_1]"m"(loop_cnt[1]) \
+	, [loop_cnt_2]"m"(loop_cnt[2]) \
+	, [loop_cnt_3]"m"(loop_cnt[3]) \
+	, [loop_cnt_4]"m"(loop_cnt[4]) \
+	, [loop_cnt_5]"m"(loop_cnt[5]) \
+	, [loop_cnt_6]"m"(loop_cnt[6])
 
+#define INJECT_ASM_REG	"r31"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n)					\
+	"l.lwz   " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t"	\
+	"l.sfeqi " INJECT_ASM_REG ", 0\n\t"			\
+	"l.bf 333f\n\t"						\
+	" l.nop\n\t"						\
+	"222:\n\t"						\
+	"l.addi  " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t"	\
+	"l.sfeqi " INJECT_ASM_REG ", 0\n\t"			\
+	"l.bf 222f\n\t"						\
+	" l.nop\n\t"						\
+	"333:\n\t"
 #else
 #error unsupported target
 #endif
diff --git a/tools/testing/selftests/rseq/rseq-or1k-bits.h b/tools/testing/selftests/rseq/rseq-or1k-bits.h
new file mode 100644
index 000000000000..15d0e8200cd1
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-or1k-bits.h
@@ -0,0 +1,412 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#include "rseq-bits-template.h"
+
+#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \
+	(defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv,
+				int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+				  RSEQ_ASM_OP_FINAL_STORE(v, newv, 3)
+				  RSEQ_INJECT_ASM(5)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+				    [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
+				    [v]			"m" (*v),
+				    [expect]		"r" (expect),
+				    [newv]		"r" (newv)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1
+				    RSEQ_INJECT_CLOBBER
+				  : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+				  RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+				  RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[error2]")
+#endif
+				  RSEQ_ASM_OP_R_LOAD(v)
+				  RSEQ_ASM_OP_R_STORE(load)
+				  RSEQ_ASM_OP_R_LOAD_OFF(voffp)
+				  RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+				  RSEQ_INJECT_ASM(5)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+				    [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
+				    [v]			"m" (*v),
+				    [expectnot]		"r" (expectnot),
+				    [load]		"m" (*load),
+				    [voffp]		"Ir" (voffp)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1
+				    RSEQ_INJECT_CLOBBER
+				  : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+#endif
+				  RSEQ_ASM_OP_R_LOAD(v)
+				  RSEQ_ASM_OP_R_ADD(count)
+				  RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+				  RSEQ_INJECT_ASM(4)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+				    [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
+				    [v]			"m" (*v),
+				    [count]		"r" (count)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1
+				    RSEQ_INJECT_CLOBBER
+				  : abort
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error3]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(4)
+				  RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+				  RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[error3]")
+#endif
+				  RSEQ_ASM_OP_FINAL_STORE(v, newv, 3)
+				  RSEQ_INJECT_ASM(6)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+				    [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
+				    [v]			"m" (*v),
+				    [expect]		"r" (expect),
+				    [v2]		"m" (*v2),
+				    [expect2]		"r" (expect2),
+				    [newv]		"r" (newv)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1
+				    RSEQ_INJECT_CLOBBER
+				  : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1, error2, error3
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+
+/*
+ *   pval = *(ptr+off)
+ *  *pval += inc;
+ */
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, intptr_t inc,
+				int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+#endif
+				  RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, 3)
+				  RSEQ_INJECT_ASM(4)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+				    [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
+				    [ptr]		"r" (ptr),
+				    [off]		"r" (off),
+				    [inc]		"r" (inc)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1
+				    RSEQ_INJECT_CLOBBER
+				  : abort
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) &&
+	(defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \
+	(defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+				  RSEQ_ASM_OP_STORE(v2, newv2)
+				  RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+				  RSEQ_ASM_OP_FINAL_STORE_RELEASE(v, newv, 3)
+#else
+				  RSEQ_ASM_OP_FINAL_STORE(v, newv, 3)
+#endif
+				  RSEQ_INJECT_ASM(6)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+				    [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
+				    [expect]		"r" (expect),
+				    [v]			"m" (*v),
+				    [newv]		"r" (newv),
+				    [v2]		"m" (*v2),
+				    [newv2]		"r" (newv2)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1
+				    RSEQ_INJECT_CLOBBER
+				  : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+	__asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+				  RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+				  RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+				  RSEQ_INJECT_ASM(3)
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+				  RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+				  RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+				  RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+				  RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+				  RSEQ_ASM_OP_FINAL_STORE_RELEASE(v, newv, 3)
+#else
+				  RSEQ_ASM_OP_FINAL_STORE(v, newv, 3)
+#endif
+				  RSEQ_INJECT_ASM(6)
+				  RSEQ_ASM_DEFINE_ABORT(4, abort)
+				  : /* gcc asm goto does not allow outputs */
+				  : [cpu_id]		"r" (cpu),
+				    [current_cpu_id]	"m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+				    [rseq_cs]		"m" (rseq_get_abi()->rseq_cs.arch.ptr),
+				    [expect]		"r" (expect),
+				    [v]			"m" (*v),
+				    [newv]		"r" (newv),
+				    [dst]		"r" (dst),
+				    [src]		"r" (src),
+				    [len]		"r" (len)
+				    RSEQ_INJECT_INPUT
+				  : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2,
+				    RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4
+				    RSEQ_INJECT_CLOBBER
+				  : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+				    , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) &&
+	(defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#include "rseq-bits-reset.h"
diff --git a/tools/testing/selftests/rseq/rseq-or1k-thread-pointer.h b/tools/testing/selftests/rseq/rseq-or1k-thread-pointer.h
new file mode 100644
index 000000000000..cda740f7aff3
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-or1k-thread-pointer.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+#ifndef _RSEQ_OR1K_THREAD_POINTER
+#define _RSEQ_OR1K_THREAD_POINTER
+
+static inline void *rseq_thread_pointer(void)
+{
+	void *__thread_register;
+
+	__asm__ ("l.or %0, r10, r0" : "=r" (__thread_register));
+	return __thread_register;
+}
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq-or1k.h b/tools/testing/selftests/rseq/rseq-or1k.h
new file mode 100644
index 000000000000..9e78eebdf79a
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-or1k.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+/*
+ * Select the instruction "l.nop 0x35" as the RSEQ_SIG.
+ */
+#define RSEQ_SIG   0x15000035
+
+#define rseq_smp_mb()	__asm__ __volatile__ ("l.msync" ::: "memory")
+#define rseq_smp_rmb()	rseq_smp_mb()
+#define rseq_smp_wmb()	rseq_smp_mb()
+#define RSEQ_ASM_TMP_REG_1	"r31"
+#define RSEQ_ASM_TMP_REG_2	"r29"
+#define RSEQ_ASM_TMP_REG_3	"r27"
+#define RSEQ_ASM_TMP_REG_4	"r25"
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p));	\
+	rseq_smp_mb();							\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_smp_mb();							\
+	RSEQ_WRITE_ONCE(*(p), v);					\
+} while (0)
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip,	\
+				post_commit_offset, abort_ip)		\
+	".pushsection	__rseq_cs, \"aw\"\n"				\
+	".balign	32\n"						\
+	__rseq_str(label) ":\n"						\
+	".long " __rseq_str(version) ", " __rseq_str(flags) "\n"	\
+	".long 0x0, " __rseq_str(start_ip) ", "				\
+		"0x0, " __rseq_str(post_commit_offset) ", "		\
+		"0x0, " __rseq_str(abort_ip) "\n"			\
+	".popsection\n\t"						\
+	".pushsection __rseq_cs_ptr_array, \"aw\"\n"			\
+	".long 0x0, " __rseq_str(label) "b\n"				\
+	".popsection\n"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,		 \
+				((post_commit_ip) - (start_ip)), abort_ip)
+
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip)			\
+	".pushsection __rseq_exit_point_array, \"aw\"\n"		\
+	".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(exit_ip) "\n"	\
+	".popsection\n"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)		\
+	RSEQ_INJECT_ASM(1)						\
+	"l.movhi " RSEQ_ASM_TMP_REG_1 ", hi(" __rseq_str(cs_label) ")\n"\
+	"l.ori   " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1		\
+		", lo(" __rseq_str(cs_label) ")\n"\
+	"l.sw  %[" __rseq_str(rseq_cs) "], " RSEQ_ASM_TMP_REG_1 "\n"	\
+	__rseq_str(label) ":\n"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label)			\
+	"l.j 222f\n"							\
+	" l.nop\n"							\
+	".balign	4\n"						\
+	".long "	__rseq_str(RSEQ_SIG) "\n"			\
+	__rseq_str(label) ":\n"						\
+	"l.j %l[" __rseq_str(abort_label) "]\n"				\
+	" l.nop\n"							\
+	"222:\n"
+
+#define RSEQ_ASM_OP_STORE(var, value)					\
+	"l.sw %[" __rseq_str(var) "], %[" __rseq_str(value) "]\n"
+
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label)				\
+	"l.lwz  " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"	\
+	"l.sfne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "]\n"	\
+	"l.bf   " __rseq_str(label) "\n"				\
+	" l.nop\n"
+
+#define RSEQ_ASM_OP_CMPNE(var, expect, label)				\
+	"l.lwz  " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"	\
+	"l.sfeq " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "]\n"	\
+	"l.bf   " __rseq_str(label) "\n"				\
+	" l.nop\n"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)		\
+	RSEQ_INJECT_ASM(2)						\
+	RSEQ_ASM_OP_CMPEQ(current_cpu_id, cpu_id, label)
+
+#define RSEQ_ASM_OP_R_LOAD(var)						\
+	"l.lwz " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_STORE(var)					\
+	"l.sw %[" __rseq_str(var) "], " RSEQ_ASM_TMP_REG_1 "\n"
+
+#define RSEQ_ASM_OP_R_LOAD_OFF(offset)					\
+	"l.lwz " RSEQ_ASM_TMP_REG_1 ", "				\
+		"%[" __rseq_str(offset) "](" RSEQ_ASM_TMP_REG_1 ")\n"
+
+#define RSEQ_ASM_OP_R_ADD(count)					\
+	"l.add " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1		\
+		", %[" __rseq_str(count) "]\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE(var, value, post_commit_label)		\
+	RSEQ_ASM_OP_STORE(var, value)					\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(var, value, post_commit_label)	\
+	"l.msync\n"							\
+	RSEQ_ASM_OP_STORE(var, value)					\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label)		\
+	"l.sw %[" __rseq_str(var) "], " RSEQ_ASM_TMP_REG_1 "\n"		\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)				\
+	"l.sfeq	%[" __rseq_str(len) "], r0\n"				\
+	"l.bf 333f\n"							\
+	" l.nop\n"							\
+	"l.ori  " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(len) "], 0\n"	\
+	"l.ori  " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(src) "], 0\n"	\
+	"l.ori  " RSEQ_ASM_TMP_REG_3 ", %[" __rseq_str(dst) "], 0\n"	\
+	"222:\n"							\
+	"l.lbz  " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_2 ")\n"	\
+	"l.sb   0(" RSEQ_ASM_TMP_REG_3 "), " RSEQ_ASM_TMP_REG_4 "\n"	\
+	"l.addi " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 ", -1\n"	\
+	"l.addi " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", 1\n"	\
+	"l.addi " RSEQ_ASM_TMP_REG_3 ", " RSEQ_ASM_TMP_REG_3 ", 1\n"	\
+	"l.sfne " RSEQ_ASM_TMP_REG_1 ", r0\n"				\
+	"l.bf 222b\n"							\
+	" l.nop\n"							\
+	"333:\n"
+
+#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, post_commit_label)	\
+	"l.ori  " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "], 0\n"	\
+	RSEQ_ASM_OP_R_ADD(off)						\
+	"l.lwz  " RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n"	\
+	RSEQ_ASM_OP_R_ADD(inc)						\
+	__rseq_str(post_commit_label) ":\n"
+
+/* Per-cpu-id indexing. */
+
+#define RSEQ_TEMPLATE_CPU_ID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-or1k-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-or1k-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_CPU_ID
+
+/* Per-mm-cid indexing. */
+
+#define RSEQ_TEMPLATE_MM_CID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-or1k-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-or1k-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_MM_CID
+
+/* APIs which are not based on cpu ids. */
+
+#define RSEQ_TEMPLATE_CPU_ID_NONE
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-or1k-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+#undef RSEQ_TEMPLATE_CPU_ID_NONE
diff --git a/tools/testing/selftests/rseq/rseq-riscv-bits.h b/tools/testing/selftests/rseq/rseq-riscv-bits.h
index de31a0143139..f02f411d550d 100644
--- a/tools/testing/selftests/rseq/rseq-riscv-bits.h
+++ b/tools/testing/selftests/rseq/rseq-riscv-bits.h
@@ -243,7 +243,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i
 #ifdef RSEQ_COMPARE_TWICE
 				  RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
 #endif
-				  RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3)
+				  RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, 3)
 				  RSEQ_INJECT_ASM(4)
 				  RSEQ_ASM_DEFINE_ABORT(4, abort)
 				  : /* gcc asm goto does not allow outputs */
@@ -251,8 +251,8 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i
 				    [current_cpu_id]		"m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
 				    [rseq_cs]			"m" (rseq_get_abi()->rseq_cs.arch.ptr),
 				    [ptr]			"r" (ptr),
-				    [off]			"er" (off),
-				    [inc]			"er" (inc)
+				    [off]			"r" (off),
+				    [inc]			"r" (inc)
 				    RSEQ_INJECT_INPUT
 				  : "memory", RSEQ_ASM_TMP_REG_1
 				    RSEQ_INJECT_CLOBBER
diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h
index 37e598d0a365..67d544aaa9a3 100644
--- a/tools/testing/selftests/rseq/rseq-riscv.h
+++ b/tools/testing/selftests/rseq/rseq-riscv.h
@@ -158,7 +158,7 @@ do {									\
 	"bnez	" RSEQ_ASM_TMP_REG_1 ", 222b\n"				\
 	"333:\n"
 
-#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, post_commit_label)		\
+#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, post_commit_label)	\
 	"mv	" RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "]\n"	\
 	RSEQ_ASM_OP_R_ADD(off)						\
 	REG_L	  RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n"	\
diff --git a/tools/testing/selftests/rseq/rseq-thread-pointer.h b/tools/testing/selftests/rseq/rseq-thread-pointer.h
index 977c25d758b2..3d5019307a1b 100644
--- a/tools/testing/selftests/rseq/rseq-thread-pointer.h
+++ b/tools/testing/selftests/rseq/rseq-thread-pointer.h
@@ -12,6 +12,8 @@
 #include "rseq-x86-thread-pointer.h"
 #elif defined(__PPC__)
 #include "rseq-ppc-thread-pointer.h"
+#elif defined(__or1k__)
+#include "rseq-or1k-thread-pointer.h"
 #else
 #include "rseq-generic-thread-pointer.h"
 #endif
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 5b9772cdf265..663a9cef1952 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -61,7 +61,6 @@ unsigned int rseq_size = -1U;
 unsigned int rseq_flags;
 
 static int rseq_ownership;
-static int rseq_reg_success;	/* At least one rseq registration has succeded. */
 
 /* Allocate a large area for the TLS. */
 #define RSEQ_THREAD_AREA_ALLOC_SIZE	1024
@@ -72,9 +71,20 @@ static int rseq_reg_success;	/* At least one rseq registration has succeded. */
 /* Original struct rseq allocation size is 32 bytes. */
 #define ORIG_RSEQ_ALLOC_SIZE		32
 
+/*
+ * Use a union to ensure we allocate a TLS area of 1024 bytes to accomodate an
+ * rseq registration that is larger than the current rseq ABI.
+ */
+union rseq_tls {
+	struct rseq_abi abi;
+	char dummy[RSEQ_THREAD_AREA_ALLOC_SIZE];
+};
+
 static
-__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
-	.cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
+__thread union rseq_tls __rseq __attribute__((tls_model("initial-exec"))) = {
+	.abi = {
+		.cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
+	},
 };
 
 static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
@@ -88,7 +98,7 @@ static int sys_getcpu(unsigned *cpu, unsigned *node)
 	return syscall(__NR_getcpu, cpu, node, NULL);
 }
 
-int rseq_available(void)
+bool rseq_available(void)
 {
 	int rc;
 
@@ -97,9 +107,9 @@ int rseq_available(void)
 		abort();
 	switch (errno) {
 	case ENOSYS:
-		return 0;
+		return false;
 	case EINVAL:
-		return 1;
+		return true;
 	default:
 		abort();
 	}
@@ -150,16 +160,29 @@ int rseq_register_current_thread(void)
 		/* Treat libc's ownership as a successful registration. */
 		return 0;
 	}
-	rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG);
+	rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG);
 	if (rc) {
-		if (RSEQ_READ_ONCE(rseq_reg_success)) {
+		/*
+		 * After at least one thread has registered successfully
+		 * (rseq_size > 0), the registration of other threads should
+		 * never fail.
+		 */
+		if (RSEQ_READ_ONCE(rseq_size) > 0) {
 			/* Incoherent success/failure within process. */
 			abort();
 		}
 		return -1;
 	}
 	assert(rseq_current_cpu_raw() >= 0);
-	RSEQ_WRITE_ONCE(rseq_reg_success, 1);
+
+	/*
+	 * The first thread to register sets the rseq_size to mimic the libc
+	 * behavior.
+	 */
+	if (RSEQ_READ_ONCE(rseq_size) == 0) {
+		RSEQ_WRITE_ONCE(rseq_size, get_rseq_kernel_feature_size());
+	}
+
 	return 0;
 }
 
@@ -171,7 +194,7 @@ int rseq_unregister_current_thread(void)
 		/* Treat libc's ownership as a successful unregistration. */
 		return 0;
 	}
-	rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+	rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
 	if (rc)
 		return -1;
 	return 0;
@@ -235,12 +258,18 @@ void rseq_init(void)
 		return;
 	}
 	rseq_ownership = 1;
-	if (!rseq_available()) {
-		rseq_size = 0;
-		return;
-	}
-	rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
+
+	/* Calculate the offset of the rseq area from the thread pointer. */
+	rseq_offset = (void *)&__rseq.abi - rseq_thread_pointer();
+
+	/* rseq flags are deprecated, always set to 0. */
 	rseq_flags = 0;
+
+	/*
+	 * Set the size to 0 until at least one thread registers to mimic the
+	 * libc behavior.
+	 */
+	rseq_size = 0;
 }
 
 static __attribute__((destructor))
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index 4e217b620e0c..f51a5fdb0444 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -60,7 +60,14 @@
 extern ptrdiff_t rseq_offset;
 
 /*
- * Size of the registered rseq area. 0 if the registration was
+ * The rseq ABI is composed of extensible feature fields. The extensions
+ * are done by appending additional fields at the end of the structure.
+ * The rseq_size defines the size of the active feature set which can be
+ * used by the application for the current rseq registration. Features
+ * starting at offset >= rseq_size are inactive and should not be used.
+ *
+ * The rseq_size is the intersection between the available allocation
+ * size for the rseq area and the feature size supported by the kernel.
  * unsuccessful.
  */
 extern unsigned int rseq_size;
@@ -122,6 +129,8 @@ static inline struct rseq_abi *rseq_get_abi(void)
 #include <rseq-s390.h>
 #elif defined(__riscv)
 #include <rseq-riscv.h>
+#elif defined(__or1k__)
+#include <rseq-or1k.h>
 #else
 #error unsupported target
 #endif
@@ -151,6 +160,11 @@ int32_t rseq_fallback_current_cpu(void);
 int32_t rseq_fallback_current_node(void);
 
 /*
+ * Returns true if rseq is supported.
+ */
+bool rseq_available(void);
+
+/*
  * Values returned can be either the current CPU number, -1 (rseq is
  * uninitialized), or -2 (rseq initialization has failed).
  */
diff --git a/tools/testing/selftests/rseq/run_syscall_errors_test.sh b/tools/testing/selftests/rseq/run_syscall_errors_test.sh
new file mode 100755
index 000000000000..9272246b39f2
--- /dev/null
+++ b/tools/testing/selftests/rseq/run_syscall_errors_test.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2024 Michael Jeanson <mjeanson@efficios.com>
+
+GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" ./syscall_errors_test
diff --git a/tools/testing/selftests/rseq/syscall_errors_test.c b/tools/testing/selftests/rseq/syscall_errors_test.c
new file mode 100644
index 000000000000..a5d9e1f8a2dc
--- /dev/null
+++ b/tools/testing/selftests/rseq/syscall_errors_test.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: 2024 Michael Jeanson <mjeanson@efficios.com>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#include <syscall.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "rseq.h"
+
+static int sys_rseq(void *rseq_abi, uint32_t rseq_len,
+		    int flags, uint32_t sig)
+{
+	return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
+}
+
+/*
+ * Check the value of errno on some expected failures of the rseq syscall.
+ */
+
+int main(void)
+{
+	struct rseq_abi *global_rseq = rseq_get_abi();
+	int ret;
+	int errno_copy;
+
+	if (!rseq_available()) {
+		fprintf(stderr, "rseq syscall unavailable");
+		goto error;
+	}
+
+	/* The current thread is NOT registered. */
+
+	/* EINVAL */
+	errno = 0;
+	ret = sys_rseq(global_rseq, 32, -1, RSEQ_SIG);
+	errno_copy = errno;
+	fprintf(stderr, "Registration with invalid flag fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+	if (ret == 0 || errno_copy != EINVAL)
+		goto error;
+
+	errno = 0;
+	ret = sys_rseq((char *) global_rseq + 1, 32, 0, RSEQ_SIG);
+	errno_copy = errno;
+	fprintf(stderr, "Registration with unaligned rseq_abi fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+	if (ret == 0 || errno_copy != EINVAL)
+		goto error;
+
+	errno = 0;
+	ret = sys_rseq(global_rseq, 31, 0, RSEQ_SIG);
+	errno_copy = errno;
+	fprintf(stderr, "Registration with invalid size fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+	if (ret == 0 || errno_copy != EINVAL)
+		goto error;
+
+
+#if defined(__LP64__) && (!defined(__s390__) && !defined(__s390x__))
+	/*
+	 * We haven't found a reliable way to find an invalid address when
+	 * running a 32bit userspace on a 64bit kernel, so only run this test
+	 * on 64bit builds for the moment.
+	 *
+	 * Also exclude architectures that select
+	 * CONFIG_ALTERNATE_USER_ADDRESS_SPACE where the kernel and userspace
+	 * have their own address space and this failure can't happen.
+	 */
+
+	/* EFAULT */
+	errno = 0;
+	ret = sys_rseq((void *) -4096UL, 32, 0, RSEQ_SIG);
+	errno_copy = errno;
+	fprintf(stderr, "Registration with invalid address fails with errno set to EFAULT (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+	if (ret == 0 || errno_copy != EFAULT)
+		goto error;
+#endif
+
+	errno = 0;
+	ret = sys_rseq(global_rseq, 32, 0, RSEQ_SIG);
+	errno_copy = errno;
+	fprintf(stderr, "Registration succeeds for the current thread (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+	if (ret != 0 && errno != 0)
+		goto error;
+
+	/* The current thread is registered. */
+
+	/* EBUSY */
+	errno = 0;
+	ret = sys_rseq(global_rseq, 32, 0, RSEQ_SIG);
+	errno_copy = errno;
+	fprintf(stderr, "Double registration fails with errno set to EBUSY (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+	if (ret == 0 || errno_copy != EBUSY)
+		goto error;
+
+	/* EPERM */
+	errno = 0;
+	ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG + 1);
+	errno_copy = errno;
+	fprintf(stderr, "Unregistration with wrong RSEQ_SIG fails with errno to EPERM (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+	if (ret == 0 || errno_copy != EPERM)
+		goto error;
+
+	errno = 0;
+	ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+	errno_copy = errno;
+	fprintf(stderr, "Unregistration succeeds for the current thread (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+	if (ret != 0)
+		goto error;
+
+	errno = 0;
+	ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+	errno_copy = errno;
+	fprintf(stderr, "Double unregistration fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+	if (ret == 0 || errno_copy != EINVAL)
+		goto error;
+
+	return 0;
+error:
+	return -1;
+}
diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh
index a28c1416cb89..50e03eefe7ac 100755
--- a/tools/testing/selftests/run_kselftest.sh
+++ b/tools/testing/selftests/run_kselftest.sh
@@ -21,7 +21,7 @@ usage()
 	cat <<EOF
 Usage: $0 [OPTIONS]
   -s | --summary		Print summary with detailed log in output.log (conflict with -p)
-  -p | --per_test_log		Print test log in /tmp with each test name (conflict with -s)
+  -p | --per-test-log		Print test log in /tmp with each test name (conflict with -s)
   -t | --test COLLECTION:TEST	Run TEST from COLLECTION
   -c | --collection COLLECTION	Run all tests from COLLECTION
   -l | --list			List the available collection:test entries
diff --git a/tools/testing/selftests/sched/config b/tools/testing/selftests/sched/config
index e8b09aa7c0c4..1bb8bf6d7fd4 100644
--- a/tools/testing/selftests/sched/config
+++ b/tools/testing/selftests/sched/config
@@ -1 +1 @@
-CONFIG_SCHED_DEBUG=y
+# empty
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
index 011762224600..f4531327b8e7 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -172,6 +172,7 @@ auto-test-targets :=			\
 	maximal				\
 	maybe_null			\
 	minimal				\
+	numa				\
 	prog_run			\
 	reload_loop			\
 	select_cpu_dfl			\
diff --git a/tools/testing/selftests/sched_ext/config b/tools/testing/selftests/sched_ext/config
index 0de9b4ee249d..aa901b05c8ad 100644
--- a/tools/testing/selftests/sched_ext/config
+++ b/tools/testing/selftests/sched_ext/config
@@ -1,4 +1,3 @@
-CONFIG_SCHED_DEBUG=y
 CONFIG_SCHED_CLASS_EXT=y
 CONFIG_CGROUPS=y
 CONFIG_CGROUP_SCHED=y
diff --git a/tools/testing/selftests/sched_ext/create_dsq.c b/tools/testing/selftests/sched_ext/create_dsq.c
index fa946d9146d4..d67431f57ac6 100644
--- a/tools/testing/selftests/sched_ext/create_dsq.c
+++ b/tools/testing/selftests/sched_ext/create_dsq.c
@@ -14,11 +14,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct create_dsq *skel;
 
-	skel = create_dsq__open_and_load();
-	if (!skel) {
-		SCX_ERR("Failed to open and load skel");
-		return SCX_TEST_FAIL;
-	}
+	skel = create_dsq__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(create_dsq__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c
index e65d22f23f3b..b6d13496b24e 100644
--- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c
+++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c
@@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct ddsp_bogus_dsq_fail *skel;
 
-	skel = ddsp_bogus_dsq_fail__open_and_load();
-	SCX_FAIL_IF(!skel, "Failed to open and load skel");
+	skel = ddsp_bogus_dsq_fail__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(ddsp_bogus_dsq_fail__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c
index abafee587cd6..af9ce4ee8baa 100644
--- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c
+++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c
@@ -14,8 +14,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct ddsp_vtimelocal_fail *skel;
 
-	skel = ddsp_vtimelocal_fail__open_and_load();
-	SCX_FAIL_IF(!skel, "Failed to open and load skel");
+	skel = ddsp_vtimelocal_fail__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(ddsp_vtimelocal_fail__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
index fbda6bf54671..c02b2aa6fc64 100644
--- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
+++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
@@ -43,7 +43,7 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev)
 	if (!p)
 		return;
 
-	if (p->nr_cpus_allowed == nr_cpus)
+	if (p->nr_cpus_allowed == nr_cpus && !is_migration_disabled(p))
 		target = bpf_get_prandom_u32() % nr_cpus;
 	else
 		target = scx_bpf_task_cpu(p);
diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.c b/tools/testing/selftests/sched_ext/dsp_local_on.c
index 0ff27e57fe43..e1f2ce4abfe6 100644
--- a/tools/testing/selftests/sched_ext/dsp_local_on.c
+++ b/tools/testing/selftests/sched_ext/dsp_local_on.c
@@ -15,6 +15,7 @@ static enum scx_test_status setup(void **ctx)
 
 	skel = dsp_local_on__open();
 	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
 
 	skel->rodata->nr_cpus = libbpf_num_possible_cpus();
 	SCX_FAIL_IF(dsp_local_on__load(skel), "Failed to load skel");
diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c
index 73e679953e27..d3387ae03679 100644
--- a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c
+++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c
@@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct enq_last_no_enq_fails *skel;
 
-	skel = enq_last_no_enq_fails__open_and_load();
-	if (!skel) {
-		SCX_ERR("Failed to open and load skel");
-		return SCX_TEST_FAIL;
-	}
+	skel = enq_last_no_enq_fails__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(enq_last_no_enq_fails__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c
index dd1350e5f002..a80e3a3b3698 100644
--- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c
+++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c
@@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct enq_select_cpu_fails *skel;
 
-	skel = enq_select_cpu_fails__open_and_load();
-	if (!skel) {
-		SCX_ERR("Failed to open and load skel");
-		return SCX_TEST_FAIL;
-	}
+	skel = enq_select_cpu_fails__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(enq_select_cpu_fails__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c
index 31bcd06e21cd..9451782689de 100644
--- a/tools/testing/selftests/sched_ext/exit.c
+++ b/tools/testing/selftests/sched_ext/exit.c
@@ -23,6 +23,7 @@ static enum scx_test_status run(void *ctx)
 		char buf[16];
 
 		skel = exit__open();
+		SCX_ENUM_INIT(skel);
 		skel->rodata->exit_point = tc;
 		exit__load(skel);
 		link = bpf_map__attach_struct_ops(skel->maps.exit_ops);
diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c
index 87bf220b1bce..1c9ceb661c43 100644
--- a/tools/testing/selftests/sched_ext/hotplug.c
+++ b/tools/testing/selftests/sched_ext/hotplug.c
@@ -49,8 +49,10 @@ static enum scx_test_status test_hotplug(bool onlining, bool cbs_defined)
 
 	SCX_ASSERT(is_cpu_online());
 
-	skel = hotplug__open_and_load();
-	SCX_ASSERT(skel);
+	skel = hotplug__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(hotplug__load(skel), "Failed to load skel");
 
 	/* Testing the offline -> online path, so go offline before starting */
 	if (onlining)
diff --git a/tools/testing/selftests/sched_ext/init_enable_count.c b/tools/testing/selftests/sched_ext/init_enable_count.c
index 97d45f1e5597..eddf9e0e26e7 100644
--- a/tools/testing/selftests/sched_ext/init_enable_count.c
+++ b/tools/testing/selftests/sched_ext/init_enable_count.c
@@ -15,22 +15,6 @@
 
 #define SCHED_EXT 7
 
-static struct init_enable_count *
-open_load_prog(bool global)
-{
-	struct init_enable_count *skel;
-
-	skel = init_enable_count__open();
-	SCX_BUG_ON(!skel, "Failed to open skel");
-
-	if (!global)
-		skel->struct_ops.init_enable_count_ops->flags |= SCX_OPS_SWITCH_PARTIAL;
-
-	SCX_BUG_ON(init_enable_count__load(skel), "Failed to load skel");
-
-	return skel;
-}
-
 static enum scx_test_status run_test(bool global)
 {
 	struct init_enable_count *skel;
@@ -40,7 +24,14 @@ static enum scx_test_status run_test(bool global)
 	struct sched_param param = {};
 	pid_t pids[num_pre_forks];
 
-	skel = open_load_prog(global);
+	skel = init_enable_count__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+
+	if (!global)
+		skel->struct_ops.init_enable_count_ops->flags |= SCX_OPS_SWITCH_PARTIAL;
+
+	SCX_FAIL_IF(init_enable_count__load(skel), "Failed to load skel");
 
 	/*
 	 * Fork a bunch of children before we attach the scheduler so that we
@@ -159,7 +150,7 @@ static enum scx_test_status run(void *ctx)
 
 struct scx_test init_enable_count = {
 	.name = "init_enable_count",
-	.description = "Verify we do the correct amount of counting of init, "
+	.description = "Verify we correctly count the occurrences of init, "
 		       "enable, etc callbacks.",
 	.run = run,
 };
diff --git a/tools/testing/selftests/sched_ext/maximal.c b/tools/testing/selftests/sched_ext/maximal.c
index f38fc973c380..c6be50a9941d 100644
--- a/tools/testing/selftests/sched_ext/maximal.c
+++ b/tools/testing/selftests/sched_ext/maximal.c
@@ -14,8 +14,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct maximal *skel;
 
-	skel = maximal__open_and_load();
-	SCX_FAIL_IF(!skel, "Failed to open and load skel");
+	skel = maximal__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(maximal__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/maybe_null.c b/tools/testing/selftests/sched_ext/maybe_null.c
index 31cfafb0cf65..aacf0c58ca4f 100644
--- a/tools/testing/selftests/sched_ext/maybe_null.c
+++ b/tools/testing/selftests/sched_ext/maybe_null.c
@@ -43,7 +43,7 @@ static enum scx_test_status run(void *ctx)
 
 struct scx_test maybe_null = {
 	.name = "maybe_null",
-	.description = "Verify if PTR_MAYBE_NULL work for .dispatch",
+	.description = "Verify if PTR_MAYBE_NULL works for .dispatch",
 	.run = run,
 };
 REGISTER_SCX_TEST(&maybe_null)
diff --git a/tools/testing/selftests/sched_ext/minimal.c b/tools/testing/selftests/sched_ext/minimal.c
index 6c5db8ebbf8a..89f7261757ff 100644
--- a/tools/testing/selftests/sched_ext/minimal.c
+++ b/tools/testing/selftests/sched_ext/minimal.c
@@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct minimal *skel;
 
-	skel = minimal__open_and_load();
-	if (!skel) {
-		SCX_ERR("Failed to open and load skel");
-		return SCX_TEST_FAIL;
-	}
+	skel = minimal__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(minimal__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/numa.bpf.c b/tools/testing/selftests/sched_ext/numa.bpf.c
new file mode 100644
index 000000000000..a79d86ed54a1
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/numa.bpf.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A scheduler that validates the behavior of the NUMA-aware
+ * functionalities.
+ *
+ * The scheduler creates a separate DSQ for each NUMA node, ensuring tasks
+ * are exclusively processed by CPUs within their respective nodes. Idle
+ * CPUs are selected only within the same node, so task migration can only
+ * occurs between CPUs belonging to the same node.
+ *
+ * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+UEI_DEFINE(uei);
+
+const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE;
+
+static bool is_cpu_idle(s32 cpu, int node)
+{
+	const struct cpumask *idle_cpumask;
+	bool idle;
+
+	idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node);
+	idle = bpf_cpumask_test_cpu(cpu, idle_cpumask);
+	scx_bpf_put_cpumask(idle_cpumask);
+
+	return idle;
+}
+
+s32 BPF_STRUCT_OPS(numa_select_cpu,
+		   struct task_struct *p, s32 prev_cpu, u64 wake_flags)
+{
+	int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
+	s32 cpu;
+
+	/*
+	 * We could just use __COMPAT_scx_bpf_pick_any_cpu_node() here,
+	 * since it already tries to pick an idle CPU within the node
+	 * first, but let's use both functions for better testing coverage.
+	 */
+	cpu = __COMPAT_scx_bpf_pick_idle_cpu_node(p->cpus_ptr, node,
+					__COMPAT_SCX_PICK_IDLE_IN_NODE);
+	if (cpu < 0)
+		cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node,
+						__COMPAT_SCX_PICK_IDLE_IN_NODE);
+
+	if (is_cpu_idle(cpu, node))
+		scx_bpf_error("CPU %d should be marked as busy", cpu);
+
+	if (__COMPAT_scx_bpf_cpu_node(cpu) != node)
+		scx_bpf_error("CPU %d should be in node %d", cpu, node);
+
+	return cpu;
+}
+
+void BPF_STRUCT_OPS(numa_enqueue, struct task_struct *p, u64 enq_flags)
+{
+	int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
+
+	scx_bpf_dsq_insert(p, node, SCX_SLICE_DFL, enq_flags);
+}
+
+void BPF_STRUCT_OPS(numa_dispatch, s32 cpu, struct task_struct *prev)
+{
+	int node = __COMPAT_scx_bpf_cpu_node(cpu);
+
+	scx_bpf_dsq_move_to_local(node);
+}
+
+s32 BPF_STRUCT_OPS_SLEEPABLE(numa_init)
+{
+	int node, err;
+
+	bpf_for(node, 0, __COMPAT_scx_bpf_nr_node_ids()) {
+		err = scx_bpf_create_dsq(node, node);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+void BPF_STRUCT_OPS(numa_exit, struct scx_exit_info *ei)
+{
+	UEI_RECORD(uei, ei);
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops numa_ops = {
+	.select_cpu		= (void *)numa_select_cpu,
+	.enqueue		= (void *)numa_enqueue,
+	.dispatch		= (void *)numa_dispatch,
+	.init			= (void *)numa_init,
+	.exit			= (void *)numa_exit,
+	.name			= "numa",
+};
diff --git a/tools/testing/selftests/sched_ext/numa.c b/tools/testing/selftests/sched_ext/numa.c
new file mode 100644
index 000000000000..b060c3b65c82
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/numa.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
+ */
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "numa.bpf.skel.h"
+#include "scx_test.h"
+
+static enum scx_test_status setup(void **ctx)
+{
+	struct numa *skel;
+
+	skel = numa__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	skel->rodata->__COMPAT_SCX_PICK_IDLE_IN_NODE = SCX_PICK_IDLE_IN_NODE;
+	skel->struct_ops.numa_ops->flags = SCX_OPS_BUILTIN_IDLE_PER_NODE;
+	SCX_FAIL_IF(numa__load(skel), "Failed to load skel");
+
+	*ctx = skel;
+
+	return SCX_TEST_PASS;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+	struct numa *skel = ctx;
+	struct bpf_link *link;
+
+	link = bpf_map__attach_struct_ops(skel->maps.numa_ops);
+	SCX_FAIL_IF(!link, "Failed to attach scheduler");
+
+	/* Just sleeping is fine, plenty of scheduling events happening */
+	sleep(1);
+
+	SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE));
+	bpf_link__destroy(link);
+
+	return SCX_TEST_PASS;
+}
+
+static void cleanup(void *ctx)
+{
+	struct numa *skel = ctx;
+
+	numa__destroy(skel);
+}
+
+struct scx_test numa = {
+	.name = "numa",
+	.description = "Verify NUMA-aware functionalities",
+	.setup = setup,
+	.run = run,
+	.cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&numa)
diff --git a/tools/testing/selftests/sched_ext/prog_run.c b/tools/testing/selftests/sched_ext/prog_run.c
index 3cd57ef8daaa..05974820ca69 100644
--- a/tools/testing/selftests/sched_ext/prog_run.c
+++ b/tools/testing/selftests/sched_ext/prog_run.c
@@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct prog_run *skel;
 
-	skel = prog_run__open_and_load();
-	if (!skel) {
-		SCX_ERR("Failed to open and load skel");
-		return SCX_TEST_FAIL;
-	}
+	skel = prog_run__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(prog_run__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/reload_loop.c b/tools/testing/selftests/sched_ext/reload_loop.c
index 5cfba2d6e056..308211d80436 100644
--- a/tools/testing/selftests/sched_ext/reload_loop.c
+++ b/tools/testing/selftests/sched_ext/reload_loop.c
@@ -18,11 +18,10 @@ bool force_exit = false;
 
 static enum scx_test_status setup(void **ctx)
 {
-	skel = maximal__open_and_load();
-	if (!skel) {
-		SCX_ERR("Failed to open and load skel");
-		return SCX_TEST_FAIL;
-	}
+	skel = maximal__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(maximal__load(skel), "Failed to load skel");
 
 	return SCX_TEST_PASS;
 }
diff --git a/tools/testing/selftests/sched_ext/runner.c b/tools/testing/selftests/sched_ext/runner.c
index eab48c7ff309..aa2d7d32dda9 100644
--- a/tools/testing/selftests/sched_ext/runner.c
+++ b/tools/testing/selftests/sched_ext/runner.c
@@ -22,11 +22,12 @@ const char help_fmt[] =
 "\n"
 "  -t TEST       Only run tests whose name includes this string\n"
 "  -s            Include print output for skipped tests\n"
+"  -l            List all available tests\n"
 "  -q            Don't print the test descriptions during run\n"
 "  -h            Display this help and exit\n";
 
 static volatile int exit_req;
-static bool quiet, print_skipped;
+static bool quiet, print_skipped, list;
 
 #define MAX_SCX_TESTS 2048
 
@@ -133,7 +134,7 @@ int main(int argc, char **argv)
 
 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
 
-	while ((opt = getopt(argc, argv, "qst:h")) != -1) {
+	while ((opt = getopt(argc, argv, "qslt:h")) != -1) {
 		switch (opt) {
 		case 'q':
 			quiet = true;
@@ -141,6 +142,9 @@ int main(int argc, char **argv)
 		case 's':
 			print_skipped = true;
 			break;
+		case 'l':
+			list = true;
+			break;
 		case 't':
 			filter = optarg;
 			break;
@@ -154,6 +158,13 @@ int main(int argc, char **argv)
 		enum scx_test_status status;
 		struct scx_test *test = &__scx_tests[i];
 
+		if (list) {
+			printf("%s\n", test->name);
+			if (i == (__scx_num_tests - 1))
+				return 0;
+			continue;
+		}
+
 		if (filter && should_skip_test(test, filter)) {
 			/*
 			 * Printing the skipped tests and their preambles can
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.c
index a53a40c2d2f0..5b6e045e1109 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dfl.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.c
@@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct select_cpu_dfl *skel;
 
-	skel = select_cpu_dfl__open_and_load();
-	SCX_FAIL_IF(!skel, "Failed to open and load skel");
+	skel = select_cpu_dfl__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(select_cpu_dfl__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c
index 1d85bf4bf3a3..9b5d232efb7f 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c
@@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct select_cpu_dfl_nodispatch *skel;
 
-	skel = select_cpu_dfl_nodispatch__open_and_load();
-	SCX_FAIL_IF(!skel, "Failed to open and load skel");
+	skel = select_cpu_dfl_nodispatch__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(select_cpu_dfl_nodispatch__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.c
index 0309ca8785b3..80283dbc41b7 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.c
@@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct select_cpu_dispatch *skel;
 
-	skel = select_cpu_dispatch__open_and_load();
-	SCX_FAIL_IF(!skel, "Failed to open and load skel");
+	skel = select_cpu_dispatch__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(select_cpu_dispatch__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c
index 47eb6ed7627d..5e72ebbc90a5 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c
@@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct select_cpu_dispatch_bad_dsq *skel;
 
-	skel = select_cpu_dispatch_bad_dsq__open_and_load();
-	SCX_FAIL_IF(!skel, "Failed to open and load skel");
+	skel = select_cpu_dispatch_bad_dsq__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(select_cpu_dispatch_bad_dsq__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c
index 48ff028a3c46..aa85949478bc 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c
@@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct select_cpu_dispatch_dbl_dsp *skel;
 
-	skel = select_cpu_dispatch_dbl_dsp__open_and_load();
-	SCX_FAIL_IF(!skel, "Failed to open and load skel");
+	skel = select_cpu_dispatch_dbl_dsp__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(select_cpu_dispatch_dbl_dsp__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.c
index b4629c2364f5..1e9b5c9bfff1 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_vtime.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.c
@@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx)
 {
 	struct select_cpu_vtime *skel;
 
-	skel = select_cpu_vtime__open_and_load();
-	SCX_FAIL_IF(!skel, "Failed to open and load skel");
+	skel = select_cpu_vtime__open();
+	SCX_FAIL_IF(!skel, "Failed to open");
+	SCX_ENUM_INIT(skel);
+	SCX_FAIL_IF(select_cpu_vtime__load(skel), "Failed to load skel");
+
 	*ctx = skel;
 
 	return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 8c3a73461475..b2f76a52215a 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -47,6 +47,7 @@
 #include <linux/kcmp.h>
 #include <sys/resource.h>
 #include <sys/capability.h>
+#include <linux/perf_event.h>
 
 #include <unistd.h>
 #include <sys/syscall.h>
@@ -68,6 +69,10 @@
 # define PR_SET_PTRACER 0x59616d61
 #endif
 
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
 #ifndef PR_SET_NO_NEW_PRIVS
 #define PR_SET_NO_NEW_PRIVS 38
 #define PR_GET_NO_NEW_PRIVS 39
@@ -150,6 +155,12 @@ struct seccomp_data {
 # endif
 #endif
 
+#ifndef __NR_uretprobe
+# if defined(__x86_64__)
+#  define __NR_uretprobe 335
+# endif
+#endif
+
 #ifndef SECCOMP_SET_MODE_STRICT
 #define SECCOMP_SET_MODE_STRICT 0
 #endif
@@ -4888,6 +4899,200 @@ TEST(tsync_vs_dead_thread_leader)
 	EXPECT_EQ(0, status);
 }
 
+noinline int probed(void)
+{
+	return 1;
+}
+
+static int parse_uint_from_file(const char *file, const char *fmt)
+{
+	int err = -1, ret;
+	FILE *f;
+
+	f = fopen(file, "re");
+	if (f) {
+		err = fscanf(f, fmt, &ret);
+		fclose(f);
+	}
+	return err == 1 ? ret : err;
+}
+
+static int determine_uprobe_perf_type(void)
+{
+	const char *file = "/sys/bus/event_source/devices/uprobe/type";
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_uprobe_retprobe_bit(void)
+{
+	const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
+
+	return parse_uint_from_file(file, "config:%d\n");
+}
+
+static ssize_t get_uprobe_offset(const void *addr)
+{
+	size_t start, base, end;
+	bool found = false;
+	char buf[256];
+	FILE *f;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f)
+		return -1;
+
+	while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
+		if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) {
+			found = true;
+			break;
+		}
+	}
+	fclose(f);
+	return found ? (uintptr_t)addr - start + base : -1;
+}
+
+FIXTURE(URETPROBE) {
+	int fd;
+};
+
+FIXTURE_VARIANT(URETPROBE) {
+	/*
+	 * All of the URETPROBE behaviors can be tested with either
+	 * uretprobe attached or not
+	 */
+	bool attach;
+};
+
+FIXTURE_VARIANT_ADD(URETPROBE, attached) {
+	.attach = true,
+};
+
+FIXTURE_VARIANT_ADD(URETPROBE, not_attached) {
+	.attach = false,
+};
+
+FIXTURE_SETUP(URETPROBE)
+{
+	const size_t attr_sz = sizeof(struct perf_event_attr);
+	struct perf_event_attr attr;
+	ssize_t offset;
+	int type, bit;
+
+#ifndef __NR_uretprobe
+	SKIP(return, "__NR_uretprobe syscall not defined");
+#endif
+
+	if (!variant->attach)
+		return;
+
+	memset(&attr, 0, attr_sz);
+
+	type = determine_uprobe_perf_type();
+	ASSERT_GE(type, 0);
+	bit = determine_uprobe_retprobe_bit();
+	ASSERT_GE(bit, 0);
+	offset = get_uprobe_offset(probed);
+	ASSERT_GE(offset, 0);
+
+	attr.config |= 1 << bit;
+	attr.size = attr_sz;
+	attr.type = type;
+	attr.config1 = ptr_to_u64("/proc/self/exe");
+	attr.config2 = offset;
+
+	self->fd = syscall(__NR_perf_event_open, &attr,
+			   getpid() /* pid */, -1 /* cpu */, -1 /* group_fd */,
+			   PERF_FLAG_FD_CLOEXEC);
+}
+
+FIXTURE_TEARDOWN(URETPROBE)
+{
+	/* we could call close(self->fd), but we'd need extra filter for
+	 * that and since we are calling _exit right away..
+	 */
+}
+
+static int run_probed_with_filter(struct sock_fprog *prog)
+{
+	if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) ||
+	    seccomp(SECCOMP_SET_MODE_FILTER, 0, prog)) {
+		return -1;
+	}
+
+	probed();
+	return 0;
+}
+
+TEST_F(URETPROBE, uretprobe_default_allow)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+
+	ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
+TEST_F(URETPROBE, uretprobe_default_block)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+
+	ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
+TEST_F(URETPROBE, uretprobe_block_uretprobe_syscall)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+#ifdef __NR_uretprobe
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1),
+#endif
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+
+	ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
+TEST_F(URETPROBE, uretprobe_default_block_with_uretprobe_syscall)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+#ifdef __NR_uretprobe
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0),
+#endif
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+
+	ASSERT_EQ(0, run_probed_with_filter(&prog));
+}
+
 /*
  * TODO:
  * - expand NNP testing
diff --git a/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py
new file mode 100755
index 000000000000..0f44a6199495
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Script that checks that SFQ rejects a limit of 1 at the kernel
+# level. We can't use iproute2's tc because it does not accept a limit
+# of 1.
+
+import sys
+import os
+
+from pyroute2 import IPRoute
+from pyroute2.netlink.exceptions import NetlinkError
+
+ip = IPRoute()
+ifidx = ip.link_lookup(ifname=sys.argv[1])
+
+try:
+    ip.tc('add', 'sfq', ifidx, limit=1)
+    sys.exit(1)
+except NetlinkError:
+    sys.exit(0)
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index d3dd65b05b5f..9044ac054167 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -94,5 +94,37 @@
             "$TC qdisc del dev $DUMMY ingress",
             "$IP addr del 10.10.10.10/24 dev $DUMMY"
         ]
-    }
+    },
+    {
+	"id": "a4b9",
+	"name": "Test class qlen notification",
+	"category": [
+	    "qdisc"
+	],
+	"plugins": {
+	    "requires": "nsPlugin"
+	},
+	"setup": [
+            "$IP link set dev $DUMMY up || true",
+            "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+            "$TC qdisc add dev $DUMMY root handle 1: drr",
+            "$TC filter add dev $DUMMY parent 1: basic classid 1:1",
+            "$TC class add dev $DUMMY parent 1: classid 1:1 drr",
+            "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem",
+            "$TC qdisc add dev $DUMMY parent 2: handle 3: drr",
+            "$TC filter add dev $DUMMY parent 3: basic action drop",
+            "$TC class add dev $DUMMY parent 3: classid 3:1 drr",
+            "$TC class del dev $DUMMY classid 1:1",
+            "$TC class add dev $DUMMY parent 1: classid 1:1 drr"
+        ],
+        "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC qdisc ls dev $DUMMY",
+        "matchPattern": "drr 1: root",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY root handle 1: drr",
+            "$IP addr del 10.10.10.10/24 dev $DUMMY"
+        ]
+   }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
index 7126ec3485cb..2b61d8d79bde 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
@@ -61,5 +61,30 @@
         "teardown": [
             "$TC qdisc del dev $DUMMY handle 1: root"
         ]
+    },
+    {
+        "id": "4009",
+        "name": "Reject creation of DRR class with classid TC_H_ROOT",
+        "category": [
+            "qdisc",
+            "drr"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DUMMY root handle ffff: drr",
+            "$TC filter add dev $DUMMY parent ffff: basic classid ffff:1",
+            "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr",
+            "$TC filter add dev $DUMMY parent ffff: prio 1 u32 match u16 0x0000 0xfe00 at 2 flowid ffff:ffff"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent ffff: classid ffff:ffff drr",
+        "expExitCode": "2",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class drr ffff:ffff",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY root"
+        ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
index ae3d286a32b2..6f20d033670d 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
@@ -313,6 +313,29 @@
         "matchPattern": "qdisc bfifo 1: root",
         "matchCount": "0",
         "teardown": [
+	]
+    },
+    {
+        "id": "d774",
+        "name": "Check pfifo_head_drop qdisc enqueue behaviour when limit == 0",
+        "category": [
+            "qdisc",
+            "pfifo_head_drop"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+            "$TC qdisc add dev $DUMMY root handle 1: pfifo_head_drop limit 0",
+            "$IP link set dev $DUMMY up || true"
+        ],
+        "cmdUnderTest": "ping -c2 -W0.01 -I $DUMMY 10.10.10.1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+        "matchPattern": "dropped 2",
+        "matchCount": "1",
+        "teardown": [
         ]
     }
 ]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
index 16d51936b385..50e8d72781cb 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
@@ -208,5 +208,25 @@
         "teardown": [
             "$TC qdisc del dev $DUMMY handle 1: root"
         ]
+    },
+    {
+        "id": "4d6f",
+        "name": "Check that limit of 1 is rejected",
+        "category": [
+            "qdisc",
+            "sfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+        ],
+        "cmdUnderTest": "./scripts/sfq_rejects_limit_1.py $DUMMY",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "sfq",
+        "matchCount": "0",
+        "teardown": [
+        ]
     }
 ]
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
index c5264594064c..83faa4e354e3 100644
--- a/tools/testing/selftests/timers/clocksource-switch.c
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -156,8 +156,8 @@ int main(int argc, char **argv)
 	/* Check everything is sane before we start switching asynchronously */
 	if (do_sanity_check) {
 		for (i = 0; i < count; i++) {
-			printf("Validating clocksource %s\n",
-				clocksource_list[i]);
+			ksft_print_msg("Validating clocksource %s\n",
+					clocksource_list[i]);
 			if (change_clocksource(clocksource_list[i])) {
 				status = -1;
 				goto out;
@@ -169,7 +169,7 @@ int main(int argc, char **argv)
 		}
 	}
 
-	printf("Running Asynchronous Switching Tests...\n");
+	ksft_print_msg("Running Asynchronous Switching Tests...\n");
 	pid = fork();
 	if (!pid)
 		return run_tests(runtime);
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index 9814b3a1c77d..f0eceb0faf34 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -7,6 +7,7 @@
  * Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com>
  */
 #define _GNU_SOURCE
+#include <sys/prctl.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <stdio.h>
@@ -599,14 +600,84 @@ static void check_overrun(int which, const char *name)
 			 "check_overrun %s\n", name);
 }
 
+#include <sys/syscall.h>
+
+static int do_timer_create(int *id)
+{
+	return syscall(__NR_timer_create, CLOCK_MONOTONIC, NULL, id);
+}
+
+static int do_timer_delete(int id)
+{
+	return syscall(__NR_timer_delete, id);
+}
+
+#ifndef PR_TIMER_CREATE_RESTORE_IDS
+# define PR_TIMER_CREATE_RESTORE_IDS		77
+# define PR_TIMER_CREATE_RESTORE_IDS_OFF	 0
+# define PR_TIMER_CREATE_RESTORE_IDS_ON		 1
+# define PR_TIMER_CREATE_RESTORE_IDS_GET	 2
+#endif
+
+static void check_timer_create_exact(void)
+{
+	int id;
+
+	if (prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_ON, 0, 0, 0)) {
+		switch (errno) {
+		case EINVAL:
+			ksft_test_result_skip("check timer create exact, not supported\n");
+			return;
+		default:
+			ksft_test_result_skip("check timer create exact, errno = %d\n", errno);
+			return;
+		}
+	}
+
+	if (prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_GET, 0, 0, 0) != 1)
+		fatal_error(NULL, "prctl(GET) failed\n");
+
+	id = 8;
+	if (do_timer_create(&id) < 0)
+		fatal_error(NULL, "timer_create()");
+
+	if (do_timer_delete(id))
+		fatal_error(NULL, "timer_delete()");
+
+	if (prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_OFF, 0, 0, 0))
+		fatal_error(NULL, "prctl(OFF)");
+
+	if (prctl(PR_TIMER_CREATE_RESTORE_IDS, PR_TIMER_CREATE_RESTORE_IDS_GET, 0, 0, 0) != 0)
+		fatal_error(NULL, "prctl(GET) failed\n");
+
+	if (id != 8) {
+		ksft_test_result_fail("check timer create exact %d != 8\n", id);
+		return;
+	}
+
+	/* Validate that it went back to normal mode and allocates ID 9 */
+	if (do_timer_create(&id) < 0)
+		fatal_error(NULL, "timer_create()");
+
+	if (do_timer_delete(id))
+		fatal_error(NULL, "timer_delete()");
+
+	if (id == 9)
+		ksft_test_result_pass("check timer create exact\n");
+	else
+		ksft_test_result_fail("check timer create exact. Disabling failed.\n");
+}
+
 int main(int argc, char **argv)
 {
 	ksft_print_header();
-	ksft_set_plan(18);
+	ksft_set_plan(19);
 
 	ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n");
 	ksft_print_msg("based timers if other threads run on the CPU...\n");
 
+	check_timer_create_exact();
+
 	check_itimer(ITIMER_VIRTUAL, "ITIMER_VIRTUAL");
 	check_itimer(ITIMER_PROF, "ITIMER_PROF");
 	check_itimer(ITIMER_REAL, "ITIMER_REAL");
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
index 83450145fe65..46c391d7f45d 100644
--- a/tools/testing/selftests/timers/skew_consistency.c
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -47,7 +47,7 @@ int main(int argc, char **argv)
 
 	pid = fork();
 	if (!pid)
-		return system("./inconsistency-check -c 1 -t 600");
+		return system("./inconsistency-check -t 60");
 
 	ppm = 500;
 	ret = 0;
diff --git a/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c b/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c
index b5c3ddb90942..02ecfe687dc2 100644
--- a/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c
+++ b/tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c
@@ -23,45 +23,56 @@
 #include <sys/mount.h>
 #include <unistd.h>
 
+#include "../kselftest.h"
+
 int main(void)
 {
 	int fd;
 
+	// Setting up kselftest framework
+	ksft_print_header();
+	ksft_set_plan(1);
+
+	// Check if test is run as root
+	if (geteuid()) {
+		ksft_exit_skip("This test needs root to run!\n");
+		return 1;
+	}
+
 	if (unshare(CLONE_NEWNS) == -1) {
 		if (errno == ENOSYS || errno == EPERM) {
-			fprintf(stderr, "error: unshare, errno %d\n", errno);
-			return 4;
+			ksft_exit_skip("unshare() error: unshare, errno %d\n", errno);
+		} else {
+			ksft_exit_fail_msg("unshare() error: unshare, errno %d\n", errno);
 		}
-		fprintf(stderr, "error: unshare, errno %d\n", errno);
-		return 1;
 	}
+
 	if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
-		fprintf(stderr, "error: mount '/', errno %d\n", errno);
-		return 1;
+		ksft_exit_fail_msg("mount() error: Root filesystem private mount: Fail %d\n", errno);
 	}
 
 	/* Our heroes: 1 root inode, 1 O_TMPFILE inode, 1 permanent inode. */
 	if (mount(NULL, "/tmp", "tmpfs", 0, "nr_inodes=3") == -1) {
-		fprintf(stderr, "error: mount tmpfs, errno %d\n", errno);
-		return 1;
+		ksft_exit_fail_msg("mount() error: Mounting tmpfs on /tmp: Fail %d\n", errno);
 	}
 
 	fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_TMPFILE, 0600);
 	if (fd == -1) {
-		fprintf(stderr, "error: open 1, errno %d\n", errno);
-		return 1;
+		ksft_exit_fail_msg("openat() error: Open first temporary file: Fail %d\n", errno);
 	}
+
 	if (linkat(fd, "", AT_FDCWD, "/tmp/1", AT_EMPTY_PATH) == -1) {
-		fprintf(stderr, "error: linkat, errno %d\n", errno);
-		return 1;
+		ksft_exit_fail_msg("linkat() error: Linking the temporary file: Fail %d\n", errno);
+		/* Ensure fd is closed on failure */
+		close(fd);
 	}
 	close(fd);
 
 	fd = openat(AT_FDCWD, "/tmp", O_WRONLY|O_TMPFILE, 0600);
 	if (fd == -1) {
-		fprintf(stderr, "error: open 2, errno %d\n", errno);
-		return 1;
+		ksft_exit_fail_msg("openat() error: Opening the second temporary file: Fail %d\n", errno);
 	}
-
+	ksft_test_result_pass(" ");
+	ksft_exit_pass();
 	return 0;
 }
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 1cf14a8da438..12a0614b9fd4 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -19,13 +19,20 @@ LDLIBS += -lgcc_s
 endif
 
 include ../lib.mk
+
+CFLAGS += $(TOOLS_INCLUDES)
+
+CFLAGS_NOLIBC := -nostdlib -nostdinc -ffreestanding -fno-asynchronous-unwind-tables \
+		 -fno-stack-protector -include $(top_srcdir)/tools/include/nolibc/nolibc.h \
+		 -I$(top_srcdir)/tools/include/nolibc/ $(KHDR_INCLUDES)
+
 $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
 $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
 $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
 $(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c
 
-$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
-$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c | headers
+$(OUTPUT)/vdso_standalone_test_x86: CFLAGS:=$(CFLAGS_NOLIBC) $(CFLAGS)
 
 $(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c
 $(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
index 28f35620c499..3ff00fb624a4 100644
--- a/tools/testing/selftests/vDSO/parse_vdso.c
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -19,13 +19,14 @@
 #include <stdint.h>
 #include <string.h>
 #include <limits.h>
-#include <elf.h>
+#include <linux/auxvec.h>
+#include <linux/elf.h>
 
 #include "parse_vdso.h"
 
 /* And here's the code. */
 #ifndef ELF_BITS
-# if ULONG_MAX > 0xffffffffUL
+# if __SIZEOF_LONG__ >= 8
 #  define ELF_BITS 64
 # else
 #  define ELF_BITS 32
@@ -53,6 +54,7 @@ static struct vdso_info
 	/* Symbol table */
 	ELF(Sym) *symtab;
 	const char *symstrings;
+	ELF(Word) *gnu_hash, *gnu_bucket;
 	ELF_HASH_ENTRY *bucket, *chain;
 	ELF_HASH_ENTRY nbucket, nchain;
 
@@ -81,6 +83,16 @@ static unsigned long elf_hash(const char *name)
 	return h;
 }
 
+static uint32_t gnu_hash(const char *name)
+{
+	const unsigned char *s = (void *)name;
+	uint32_t h = 5381;
+
+	for (; *s; s++)
+		h += h * 32 + *s;
+	return h;
+}
+
 void vdso_init_from_sysinfo_ehdr(uintptr_t base)
 {
 	size_t i;
@@ -123,6 +135,7 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base)
 	 */
 	ELF_HASH_ENTRY *hash = 0;
 	vdso_info.symstrings = 0;
+	vdso_info.gnu_hash = 0;
 	vdso_info.symtab = 0;
 	vdso_info.versym = 0;
 	vdso_info.verdef = 0;
@@ -143,6 +156,11 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base)
 				((uintptr_t)dyn[i].d_un.d_ptr
 				 + vdso_info.load_offset);
 			break;
+		case DT_GNU_HASH:
+			vdso_info.gnu_hash =
+				(ELF(Word) *)((uintptr_t)dyn[i].d_un.d_ptr +
+					      vdso_info.load_offset);
+			break;
 		case DT_VERSYM:
 			vdso_info.versym = (ELF(Versym) *)
 				((uintptr_t)dyn[i].d_un.d_ptr
@@ -155,17 +173,27 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base)
 			break;
 		}
 	}
-	if (!vdso_info.symstrings || !vdso_info.symtab || !hash)
+	if (!vdso_info.symstrings || !vdso_info.symtab ||
+	    (!hash && !vdso_info.gnu_hash))
 		return;  /* Failed */
 
 	if (!vdso_info.verdef)
 		vdso_info.versym = 0;
 
 	/* Parse the hash table header. */
-	vdso_info.nbucket = hash[0];
-	vdso_info.nchain = hash[1];
-	vdso_info.bucket = &hash[2];
-	vdso_info.chain = &hash[vdso_info.nbucket + 2];
+	if (vdso_info.gnu_hash) {
+		vdso_info.nbucket = vdso_info.gnu_hash[0];
+		/* The bucket array is located after the header (4 uint32) and the bloom
+		 * filter (size_t array of gnu_hash[2] elements).
+		 */
+		vdso_info.gnu_bucket = vdso_info.gnu_hash + 4 +
+				       sizeof(size_t) / 4 * vdso_info.gnu_hash[2];
+	} else {
+		vdso_info.nbucket = hash[0];
+		vdso_info.nchain = hash[1];
+		vdso_info.bucket = &hash[2];
+		vdso_info.chain = &hash[vdso_info.nbucket + 2];
+	}
 
 	/* That's all we need. */
 	vdso_info.valid = true;
@@ -209,6 +237,26 @@ static bool vdso_match_version(ELF(Versym) ver,
 		&& !strcmp(name, vdso_info.symstrings + aux->vda_name);
 }
 
+static bool check_sym(ELF(Sym) *sym, ELF(Word) i, const char *name,
+		      const char *version, unsigned long ver_hash)
+{
+	/* Check for a defined global or weak function w/ right name. */
+	if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
+		return false;
+	if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
+	    ELF64_ST_BIND(sym->st_info) != STB_WEAK)
+		return false;
+	if (strcmp(name, vdso_info.symstrings + sym->st_name))
+		return false;
+
+	/* Check symbol version. */
+	if (vdso_info.versym &&
+	    !vdso_match_version(vdso_info.versym[i], version, ver_hash))
+		return false;
+
+	return true;
+}
+
 void *vdso_sym(const char *version, const char *name)
 {
 	unsigned long ver_hash;
@@ -216,44 +264,37 @@ void *vdso_sym(const char *version, const char *name)
 		return 0;
 
 	ver_hash = elf_hash(version);
-	ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
-
-	for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
-		ELF(Sym) *sym = &vdso_info.symtab[chain];
-
-		/* Check for a defined global or weak function w/ right name. */
-		if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
-			continue;
-		if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
-		    ELF64_ST_BIND(sym->st_info) != STB_WEAK)
-			continue;
-		if (sym->st_shndx == SHN_UNDEF)
-			continue;
-		if (strcmp(name, vdso_info.symstrings + sym->st_name))
-			continue;
-
-		/* Check symbol version. */
-		if (vdso_info.versym
-		    && !vdso_match_version(vdso_info.versym[chain],
-					   version, ver_hash))
-			continue;
-
-		return (void *)(vdso_info.load_offset + sym->st_value);
-	}
-
-	return 0;
-}
-
-void vdso_init_from_auxv(void *auxv)
-{
-	ELF(auxv_t) *elf_auxv = auxv;
-	for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
-	{
-		if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
-			vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val);
-			return;
+	ELF(Word) i;
+
+	if (vdso_info.gnu_hash) {
+		uint32_t h1 = gnu_hash(name), h2, *hashval;
+
+		i = vdso_info.gnu_bucket[h1 % vdso_info.nbucket];
+		if (i == 0)
+			return 0;
+		h1 |= 1;
+		hashval = vdso_info.gnu_bucket + vdso_info.nbucket +
+			  (i - vdso_info.gnu_hash[1]);
+		for (;; i++) {
+			ELF(Sym) *sym = &vdso_info.symtab[i];
+			h2 = *hashval++;
+			if (h1 == (h2 | 1) &&
+			    check_sym(sym, i, name, version, ver_hash))
+				return (void *)(vdso_info.load_offset +
+						sym->st_value);
+			if (h2 & 1)
+				break;
+		}
+	} else {
+		i = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
+		for (; i; i = vdso_info.chain[i]) {
+			ELF(Sym) *sym = &vdso_info.symtab[i];
+			if (sym->st_shndx != SHN_UNDEF &&
+			    check_sym(sym, i, name, version, ver_hash))
+				return (void *)(vdso_info.load_offset +
+						sym->st_value);
 		}
 	}
 
-	vdso_info.valid = false;
+	return 0;
 }
diff --git a/tools/testing/selftests/vDSO/parse_vdso.h b/tools/testing/selftests/vDSO/parse_vdso.h
index de0453067d7c..09d068ed11f9 100644
--- a/tools/testing/selftests/vDSO/parse_vdso.h
+++ b/tools/testing/selftests/vDSO/parse_vdso.h
@@ -26,6 +26,5 @@
  */
 void *vdso_sym(const char *version, const char *name);
 void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-void vdso_init_from_auxv(void *auxv);
 
 #endif
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
index 644915862af8..9ce795b806f0 100644
--- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -1,142 +1,58 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * vdso_test.c: Sample code to test parse_vdso.c on x86
- * Copyright (c) 2011-2014 Andy Lutomirski
+ * vdso_test_gettimeofday.c: Sample code to test parse_vdso.c and
+ *                           vDSO gettimeofday()
+ * Copyright (c) 2014 Andy Lutomirski
  *
- * You can amuse yourself by compiling with:
- * gcc -std=gnu99 -nostdlib
- *     -Os -fno-asynchronous-unwind-tables -flto -lgcc_s
- *      vdso_standalone_test_x86.c parse_vdso.c
- * to generate a small binary.  On x86_64, you can omit -lgcc_s
- * if you want the binary to be completely standalone.
+ * Compile with:
+ * gcc -std=gnu99 vdso_test_gettimeofday.c parse_vdso_gettimeofday.c
+ *
+ * Tested on x86, 32-bit and 64-bit.  It may work on other architectures, too.
  */
 
-#include <sys/syscall.h>
+#include <stdio.h>
+#ifndef NOLIBC
+#include <sys/auxv.h>
 #include <sys/time.h>
-#include <unistd.h>
-#include <stdint.h>
-
-#include "parse_vdso.h"
-
-/* We need some libc functions... */
-int strcmp(const char *a, const char *b)
-{
-	/* This implementation is buggy: it never returns -1. */
-	while (*a || *b) {
-		if (*a != *b)
-			return 1;
-		if (*a == 0 || *b == 0)
-			return 1;
-		a++;
-		b++;
-	}
-
-	return 0;
-}
-
-/*
- * The clang build needs this, although gcc does not.
- * Stolen from lib/string.c.
- */
-void *memcpy(void *dest, const void *src, size_t count)
-{
-	char *tmp = dest;
-	const char *s = src;
-
-	while (count--)
-		*tmp++ = *s++;
-	return dest;
-}
-
-/* ...and two syscalls.  This is x86-specific. */
-static inline long x86_syscall3(long nr, long a0, long a1, long a2)
-{
-	long ret;
-#ifdef __x86_64__
-	asm volatile ("syscall" : "=a" (ret) : "a" (nr),
-		      "D" (a0), "S" (a1), "d" (a2) :
-		      "cc", "memory", "rcx",
-		      "r8", "r9", "r10", "r11" );
-#else
-	asm volatile ("int $0x80" : "=a" (ret) : "a" (nr),
-		      "b" (a0), "c" (a1), "d" (a2) :
-		      "cc", "memory" );
 #endif
-	return ret;
-}
 
-static inline long linux_write(int fd, const void *data, size_t len)
-{
-	return x86_syscall3(__NR_write, fd, (long)data, (long)len);
-}
+#include "../kselftest.h"
+#include "parse_vdso.h"
+#include "vdso_config.h"
+#include "vdso_call.h"
 
-static inline void linux_exit(int code)
+int main(int argc, char **argv)
 {
-	x86_syscall3(__NR_exit, code, 0, 0);
-}
+	const char *version = versions[VDSO_VERSION];
+	const char **name = (const char **)&names[VDSO_NAMES];
 
-void to_base10(char *lastdig, time_t n)
-{
-	while (n) {
-		*lastdig = (n % 10) + '0';
-		n /= 10;
-		lastdig--;
+	unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+	if (!sysinfo_ehdr) {
+		printf("AT_SYSINFO_EHDR is not present!\n");
+		return KSFT_SKIP;
 	}
-}
-
-void c_main(void **stack)
-{
-	/* Parse the stack */
-	long argc = (long)*stack;
-	stack += argc + 2;
-
-	/* Now we're pointing at the environment.  Skip it. */
-	while(*stack)
-		stack++;
-	stack++;
 
-	/* Now we're pointing at auxv.  Initialize the vDSO parser. */
-	vdso_init_from_auxv((void *)stack);
+	vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
 
 	/* Find gettimeofday. */
 	typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
-	gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+	gtod_t gtod = (gtod_t)vdso_sym(version, name[0]);
 
-	if (!gtod)
-		linux_exit(1);
+	if (!gtod) {
+		printf("Could not find %s\n", name[0]);
+		return KSFT_SKIP;
+	}
 
 	struct timeval tv;
-	long ret = gtod(&tv, 0);
+	long ret = VDSO_CALL(gtod, 2, &tv, 0);
 
 	if (ret == 0) {
-		char buf[] = "The time is                     .000000\n";
-		to_base10(buf + 31, tv.tv_sec);
-		to_base10(buf + 38, tv.tv_usec);
-		linux_write(1, buf, sizeof(buf) - 1);
+		printf("The time is %lld.%06lld\n",
+		       (long long)tv.tv_sec, (long long)tv.tv_usec);
 	} else {
-		linux_exit(ret);
+		printf("%s failed\n", name[0]);
+		return KSFT_FAIL;
 	}
 
-	linux_exit(0);
+	return 0;
 }
-
-/*
- * This is the real entry point.  It passes the initial stack into
- * the C entry point.
- */
-asm (
-	".text\n"
-	".global _start\n"
-	".type _start,@function\n"
-	"_start:\n\t"
-#ifdef __x86_64__
-	"mov %rsp,%rdi\n\t"
-	"and $-16,%rsp\n\t"
-	"sub $8,%rsp\n\t"
-	"jmp c_main"
-#else
-	"push %esp\n\t"
-	"call c_main\n\t"
-	"int $3"
-#endif
-	);
diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
index e31b18ffae33..9ce795b806f0 100644
--- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
+++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
@@ -10,11 +10,11 @@
  * Tested on x86, 32-bit and 64-bit.  It may work on other architectures, too.
  */
 
-#include <stdint.h>
-#include <elf.h>
 #include <stdio.h>
+#ifndef NOLIBC
 #include <sys/auxv.h>
 #include <sys/time.h>
+#endif
 
 #include "../kselftest.h"
 #include "parse_vdso.h"
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index 139fd9aa8b12..c305d2f613f0 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -27,7 +27,6 @@ CONFIG_DEBUG_KMEMLEAK=y
 CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_DEBUG_SHIRQ=y
 CONFIG_WQ_WATCHDOG=y
-CONFIG_SCHED_DEBUG=y
 CONFIG_SCHED_INFO=y
 CONFIG_SCHEDSTATS=y
 CONFIG_SCHED_STACK_END_CHECK=y
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index d51249f14e2f..28422c32cc8f 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -19,7 +19,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
 TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
-			corrupt_xstate_header amx lam test_shadow_stack
+			corrupt_xstate_header amx lam test_shadow_stack avx
 # Some selftests require 32bit support enabled also on 64bit systems
 TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
@@ -132,3 +132,7 @@ $(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
 
 $(OUTPUT)/nx_stack_32: CFLAGS += -Wl,-z,noexecstack
 $(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack
+
+$(OUTPUT)/avx_64: CFLAGS += -mno-avx -mno-avx512f
+$(OUTPUT)/amx_64: EXTRA_FILES += xstate.c
+$(OUTPUT)/avx_64: EXTRA_FILES += xstate.c
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
index 1fdf35a4d7f6..40769c16de1b 100644
--- a/tools/testing/selftests/x86/amx.c
+++ b/tools/testing/selftests/x86/amx.c
@@ -3,7 +3,6 @@
 #define _GNU_SOURCE
 #include <err.h>
 #include <errno.h>
-#include <pthread.h>
 #include <setjmp.h>
 #include <stdio.h>
 #include <string.h>
@@ -14,169 +13,27 @@
 #include <sys/auxv.h>
 #include <sys/mman.h>
 #include <sys/shm.h>
-#include <sys/ptrace.h>
 #include <sys/syscall.h>
 #include <sys/wait.h>
-#include <sys/uio.h>
 
-#include "../kselftest.h" /* For __cpuid_count() */
+#include "helpers.h"
+#include "xstate.h"
 
 #ifndef __x86_64__
 # error This test is 64-bit only
 #endif
 
-#define XSAVE_HDR_OFFSET	512
-#define XSAVE_HDR_SIZE		64
-
-struct xsave_buffer {
-	union {
-		struct {
-			char legacy[XSAVE_HDR_OFFSET];
-			char header[XSAVE_HDR_SIZE];
-			char extended[0];
-		};
-		char bytes[0];
-	};
-};
-
-static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
-{
-	uint32_t rfbm_lo = rfbm;
-	uint32_t rfbm_hi = rfbm >> 32;
-
-	asm volatile("xsave (%%rdi)"
-		     : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)
-		     : "memory");
-}
-
-static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm)
-{
-	uint32_t rfbm_lo = rfbm;
-	uint32_t rfbm_hi = rfbm >> 32;
-
-	asm volatile("xrstor (%%rdi)"
-		     : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi));
-}
-
 /* err() exits and will not return */
 #define fatal_error(msg, ...)	err(1, "[FAIL]\t" msg, ##__VA_ARGS__)
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		fatal_error("sigaction");
-}
-
-static void clearhandler(int sig)
-{
-	struct sigaction sa;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = SIG_DFL;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		fatal_error("sigaction");
-}
-
-#define XFEATURE_XTILECFG	17
-#define XFEATURE_XTILEDATA	18
 #define XFEATURE_MASK_XTILECFG	(1 << XFEATURE_XTILECFG)
 #define XFEATURE_MASK_XTILEDATA	(1 << XFEATURE_XTILEDATA)
 #define XFEATURE_MASK_XTILE	(XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
 
-#define CPUID_LEAF1_ECX_XSAVE_MASK	(1 << 26)
-#define CPUID_LEAF1_ECX_OSXSAVE_MASK	(1 << 27)
-
-static uint32_t xbuf_size;
-
-static struct {
-	uint32_t xbuf_offset;
-	uint32_t size;
-} xtiledata;
-
-#define CPUID_LEAF_XSTATE		0xd
-#define CPUID_SUBLEAF_XSTATE_USER	0x0
-#define TILE_CPUID			0x1d
-#define TILE_PALETTE_ID			0x1
-
-static void check_cpuid_xtiledata(void)
-{
-	uint32_t eax, ebx, ecx, edx;
-
-	__cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
-		      eax, ebx, ecx, edx);
-
-	/*
-	 * EBX enumerates the size (in bytes) required by the XSAVE
-	 * instruction for an XSAVE area containing all the user state
-	 * components corresponding to bits currently set in XCR0.
-	 *
-	 * Stash that off so it can be used to allocate buffers later.
-	 */
-	xbuf_size = ebx;
-
-	__cpuid_count(CPUID_LEAF_XSTATE, XFEATURE_XTILEDATA,
-		      eax, ebx, ecx, edx);
-	/*
-	 * eax: XTILEDATA state component size
-	 * ebx: XTILEDATA state component offset in user buffer
-	 */
-	if (!eax || !ebx)
-		fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
-				eax, ebx);
-
-	xtiledata.size	      = eax;
-	xtiledata.xbuf_offset = ebx;
-}
+struct xstate_info xtiledata;
 
 /* The helpers for managing XSAVE buffer and tile states: */
 
-struct xsave_buffer *alloc_xbuf(void)
-{
-	struct xsave_buffer *xbuf;
-
-	/* XSAVE buffer should be 64B-aligned. */
-	xbuf = aligned_alloc(64, xbuf_size);
-	if (!xbuf)
-		fatal_error("aligned_alloc()");
-	return xbuf;
-}
-
-static inline void clear_xstate_header(struct xsave_buffer *buffer)
-{
-	memset(&buffer->header, 0, sizeof(buffer->header));
-}
-
-static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv)
-{
-	/* XSTATE_BV is at the beginning of the header: */
-	*(uint64_t *)(&buffer->header) = bv;
-}
-
-static void set_rand_tiledata(struct xsave_buffer *xbuf)
-{
-	int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset];
-	int data;
-	int i;
-
-	/*
-	 * Ensure that 'data' is never 0.  This ensures that
-	 * the registers are never in their initial configuration
-	 * and thus never tracked as being in the init state.
-	 */
-	data = rand() | 1;
-
-	for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++)
-		*ptr = data;
-}
-
 struct xsave_buffer *stashed_xsave;
 
 static void init_stashed_xsave(void)
@@ -192,21 +49,6 @@ static void free_stashed_xsave(void)
 	free(stashed_xsave);
 }
 
-/* See 'struct _fpx_sw_bytes' at sigcontext.h */
-#define SW_BYTES_OFFSET		464
-/* N.B. The struct's field name varies so read from the offset. */
-#define SW_BYTES_BV_OFFSET	(SW_BYTES_OFFSET + 8)
-
-static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer)
-{
-	return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET);
-}
-
-static inline uint64_t get_fpx_sw_bytes_features(void *buffer)
-{
-	return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET);
-}
-
 /* Work around printf() being unsafe in signals: */
 #define SIGNAL_BUF_LEN 1000
 char signal_message_buffer[SIGNAL_BUF_LEN];
@@ -304,17 +146,10 @@ static inline bool load_rand_tiledata(struct xsave_buffer *xbuf)
 {
 	clear_xstate_header(xbuf);
 	set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA);
-	set_rand_tiledata(xbuf);
+	set_rand_data(&xtiledata, xbuf);
 	return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA);
 }
 
-/* Return XTILEDATA to its initial configuration. */
-static inline void init_xtiledata(void)
-{
-	clear_xstate_header(stashed_xsave);
-	xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA);
-}
-
 enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED };
 
 /* arch_prctl() and sigaltstack() test */
@@ -587,14 +422,6 @@ static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1)
 	return true;
 }
 
-static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf)
-{
-	int ret = __validate_tiledata_regs(xbuf);
-
-	if (ret != 0)
-		fatal_error("TILEDATA registers changed");
-}
-
 static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf)
 {
 	int ret = __validate_tiledata_regs(xbuf);
@@ -651,251 +478,6 @@ static void test_fork(void)
 	_exit(0);
 }
 
-/* Context switching test */
-
-static struct _ctxtswtest_cfg {
-	unsigned int iterations;
-	unsigned int num_threads;
-} ctxtswtest_config;
-
-struct futex_info {
-	pthread_t thread;
-	int nr;
-	pthread_mutex_t mutex;
-	struct futex_info *next;
-};
-
-static void *check_tiledata(void *info)
-{
-	struct futex_info *finfo = (struct futex_info *)info;
-	struct xsave_buffer *xbuf;
-	int i;
-
-	xbuf = alloc_xbuf();
-	if (!xbuf)
-		fatal_error("unable to allocate XSAVE buffer");
-
-	/*
-	 * Load random data into 'xbuf' and then restore
-	 * it to the tile registers themselves.
-	 */
-	load_rand_tiledata(xbuf);
-	for (i = 0; i < ctxtswtest_config.iterations; i++) {
-		pthread_mutex_lock(&finfo->mutex);
-
-		/*
-		 * Ensure the register values have not
-		 * diverged from those recorded in 'xbuf'.
-		 */
-		validate_tiledata_regs_same(xbuf);
-
-		/* Load new, random values into xbuf and registers */
-		load_rand_tiledata(xbuf);
-
-		/*
-		 * The last thread's last unlock will be for
-		 * thread 0's mutex.  However, thread 0 will
-		 * have already exited the loop and the mutex
-		 * will already be unlocked.
-		 *
-		 * Because this is not an ERRORCHECK mutex,
-		 * that inconsistency will be silently ignored.
-		 */
-		pthread_mutex_unlock(&finfo->next->mutex);
-	}
-
-	free(xbuf);
-	/*
-	 * Return this thread's finfo, which is
-	 * a unique value for this thread.
-	 */
-	return finfo;
-}
-
-static int create_threads(int num, struct futex_info *finfo)
-{
-	int i;
-
-	for (i = 0; i < num; i++) {
-		int next_nr;
-
-		finfo[i].nr = i;
-		/*
-		 * Thread 'i' will wait on this mutex to
-		 * be unlocked.  Lock it immediately after
-		 * initialization:
-		 */
-		pthread_mutex_init(&finfo[i].mutex, NULL);
-		pthread_mutex_lock(&finfo[i].mutex);
-
-		next_nr = (i + 1) % num;
-		finfo[i].next = &finfo[next_nr];
-
-		if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i]))
-			fatal_error("pthread_create()");
-	}
-	return 0;
-}
-
-static void affinitize_cpu0(void)
-{
-	cpu_set_t cpuset;
-
-	CPU_ZERO(&cpuset);
-	CPU_SET(0, &cpuset);
-
-	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
-		fatal_error("sched_setaffinity to CPU 0");
-}
-
-static void test_context_switch(void)
-{
-	struct futex_info *finfo;
-	int i;
-
-	/* Affinitize to one CPU to force context switches */
-	affinitize_cpu0();
-
-	req_xtiledata_perm();
-
-	printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n",
-	       ctxtswtest_config.iterations,
-	       ctxtswtest_config.num_threads);
-
-
-	finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads);
-	if (!finfo)
-		fatal_error("malloc()");
-
-	create_threads(ctxtswtest_config.num_threads, finfo);
-
-	/*
-	 * This thread wakes up thread 0
-	 * Thread 0 will wake up 1
-	 * Thread 1 will wake up 2
-	 * ...
-	 * the last thread will wake up 0
-	 *
-	 * ... this will repeat for the configured
-	 * number of iterations.
-	 */
-	pthread_mutex_unlock(&finfo[0].mutex);
-
-	/* Wait for all the threads to finish: */
-	for (i = 0; i < ctxtswtest_config.num_threads; i++) {
-		void *thread_retval;
-		int rc;
-
-		rc = pthread_join(finfo[i].thread, &thread_retval);
-
-		if (rc)
-			fatal_error("pthread_join() failed for thread %d err: %d\n",
-					i, rc);
-
-		if (thread_retval != &finfo[i])
-			fatal_error("unexpected thread retval for thread %d: %p\n",
-					i, thread_retval);
-
-	}
-
-	printf("[OK]\tNo incorrect case was found.\n");
-
-	free(finfo);
-}
-
-/* Ptrace test */
-
-/*
- * Make sure the ptracee has the expanded kernel buffer on the first
- * use. Then, initialize the state before performing the state
- * injection from the ptracer.
- */
-static inline void ptracee_firstuse_tiledata(void)
-{
-	load_rand_tiledata(stashed_xsave);
-	init_xtiledata();
-}
-
-/*
- * Ptracer injects the randomized tile data state. It also reads
- * before and after that, which will execute the kernel's state copy
- * functions. So, the tester is advised to double-check any emitted
- * kernel messages.
- */
-static void ptracer_inject_tiledata(pid_t target)
-{
-	struct xsave_buffer *xbuf;
-	struct iovec iov;
-
-	xbuf = alloc_xbuf();
-	if (!xbuf)
-		fatal_error("unable to allocate XSAVE buffer");
-
-	printf("\tRead the init'ed tiledata via ptrace().\n");
-
-	iov.iov_base = xbuf;
-	iov.iov_len = xbuf_size;
-
-	memset(stashed_xsave, 0, xbuf_size);
-
-	if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
-		fatal_error("PTRACE_GETREGSET");
-
-	if (!__compare_tiledata_state(stashed_xsave, xbuf))
-		printf("[OK]\tThe init'ed tiledata was read from ptracee.\n");
-	else
-		printf("[FAIL]\tThe init'ed tiledata was not read from ptracee.\n");
-
-	printf("\tInject tiledata via ptrace().\n");
-
-	load_rand_tiledata(xbuf);
-
-	memcpy(&stashed_xsave->bytes[xtiledata.xbuf_offset],
-	       &xbuf->bytes[xtiledata.xbuf_offset],
-	       xtiledata.size);
-
-	if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
-		fatal_error("PTRACE_SETREGSET");
-
-	if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
-		fatal_error("PTRACE_GETREGSET");
-
-	if (!__compare_tiledata_state(stashed_xsave, xbuf))
-		printf("[OK]\tTiledata was correctly written to ptracee.\n");
-	else
-		printf("[FAIL]\tTiledata was not correctly written to ptracee.\n");
-}
-
-static void test_ptrace(void)
-{
-	pid_t child;
-	int status;
-
-	child = fork();
-	if (child < 0) {
-		err(1, "fork");
-	} else if (!child) {
-		if (ptrace(PTRACE_TRACEME, 0, NULL, NULL))
-			err(1, "PTRACE_TRACEME");
-
-		ptracee_firstuse_tiledata();
-
-		raise(SIGTRAP);
-		_exit(0);
-	}
-
-	do {
-		wait(&status);
-	} while (WSTOPSIG(status) != SIGTRAP);
-
-	ptracer_inject_tiledata(child);
-
-	ptrace(PTRACE_DETACH, child, NULL, NULL);
-	wait(&status);
-	if (!WIFEXITED(status) || WEXITSTATUS(status))
-		err(1, "ptrace test");
-}
-
 int main(void)
 {
 	unsigned long features;
@@ -907,7 +489,11 @@ int main(void)
 		return KSFT_SKIP;
 	}
 
-	check_cpuid_xtiledata();
+	xtiledata = get_xstate_info(XFEATURE_XTILEDATA);
+	if (!xtiledata.size || !xtiledata.xbuf_offset) {
+		fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
+			    xtiledata.size, xtiledata.xbuf_offset);
+	}
 
 	init_stashed_xsave();
 	sethandler(SIGILL, handle_noperm, 0);
@@ -919,11 +505,11 @@ int main(void)
 
 	test_fork();
 
-	ctxtswtest_config.iterations = 10;
-	ctxtswtest_config.num_threads = 5;
-	test_context_switch();
-
-	test_ptrace();
+	/*
+	 * Perform generic xstate tests for context switching, ptrace,
+	 * and signal.
+	 */
+	test_xstate(XFEATURE_XTILEDATA);
 
 	clearhandler(SIGILL);
 	free_stashed_xsave();
diff --git a/tools/testing/selftests/x86/avx.c b/tools/testing/selftests/x86/avx.c
new file mode 100644
index 000000000000..11d5367c235f
--- /dev/null
+++ b/tools/testing/selftests/x86/avx.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE /* Required for inline xstate helpers */
+#include "xstate.h"
+
+int main(void)
+{
+	test_xstate(XFEATURE_YMM);
+	test_xstate(XFEATURE_OPMASK);
+	test_xstate(XFEATURE_ZMM_Hi256);
+	test_xstate(XFEATURE_Hi16_ZMM);
+}
diff --git a/tools/testing/selftests/x86/corrupt_xstate_header.c b/tools/testing/selftests/x86/corrupt_xstate_header.c
index cf9ce8fbb656..93a89a5997ca 100644
--- a/tools/testing/selftests/x86/corrupt_xstate_header.c
+++ b/tools/testing/selftests/x86/corrupt_xstate_header.c
@@ -18,6 +18,7 @@
 #include <sys/wait.h>
 
 #include "../kselftest.h" /* For __cpuid_count() */
+#include "helpers.h"
 
 static inline int xsave_enabled(void)
 {
@@ -29,19 +30,6 @@ static inline int xsave_enabled(void)
 	return ecx & (1U << 27);
 }
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
 static void sigusr1(int sig, siginfo_t *info, void *uc_void)
 {
 	ucontext_t *uc = uc_void;
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
index d1e919b0c1dc..5cb8393737d0 100644
--- a/tools/testing/selftests/x86/entry_from_vm86.c
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -24,31 +24,11 @@
 #include <errno.h>
 #include <sys/vm86.h>
 
+#include "helpers.h"
+
 static unsigned long load_addr = 0x10000;
 static int nerrs = 0;
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = SIG_DFL;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
 static sig_atomic_t got_signal;
 
 static void sighandler(int sig, siginfo_t *info, void *ctx_void)
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 50cf32de6313..0a75252d31b6 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -28,6 +28,8 @@
 #include <sys/wait.h>
 #include <setjmp.h>
 
+#include "helpers.h"
+
 #ifndef __x86_64__
 # error This test is 64-bit only
 #endif
@@ -39,28 +41,6 @@ static unsigned short *shared_scratch;
 
 static int nerrs;
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = SIG_DFL;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
 static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
 {
 	ucontext_t *ctx = (ucontext_t*)ctx_void;
diff --git a/tools/testing/selftests/x86/helpers.h b/tools/testing/selftests/x86/helpers.h
index 4ef42c4559a9..6deaad035161 100644
--- a/tools/testing/selftests/x86/helpers.h
+++ b/tools/testing/selftests/x86/helpers.h
@@ -2,8 +2,13 @@
 #ifndef __SELFTESTS_X86_HELPERS_H
 #define __SELFTESTS_X86_HELPERS_H
 
+#include <signal.h>
+#include <string.h>
+
 #include <asm/processor-flags.h>
 
+#include "../kselftest.h"
+
 static inline unsigned long get_eflags(void)
 {
 #ifdef __x86_64__
@@ -22,4 +27,27 @@ static inline void set_eflags(unsigned long eflags)
 #endif
 }
 
+static inline void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags)
+{
+	struct sigaction sa;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		ksft_exit_fail_msg("sigaction failed");
+}
+
+static inline void clearhandler(int sig)
+{
+	struct sigaction sa;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		ksft_exit_fail_msg("sigaction failed");
+}
+
 #endif /* __SELFTESTS_X86_HELPERS_H */
diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c
index 57ec5e99edb9..69d5fb7050c2 100644
--- a/tools/testing/selftests/x86/ioperm.c
+++ b/tools/testing/selftests/x86/ioperm.c
@@ -20,30 +20,9 @@
 #include <sched.h>
 #include <sys/io.h>
 
-static int nerrs = 0;
-
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-
-}
+#include "helpers.h"
 
-static void clearhandler(int sig)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = SIG_DFL;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
+static int nerrs = 0;
 
 static jmp_buf jmpbuf;
 
diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
index 7e3e09c1abac..457b6715542b 100644
--- a/tools/testing/selftests/x86/iopl.c
+++ b/tools/testing/selftests/x86/iopl.c
@@ -20,30 +20,9 @@
 #include <sched.h>
 #include <sys/io.h>
 
-static int nerrs = 0;
-
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-
-}
+#include "helpers.h"
 
-static void clearhandler(int sig)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = SIG_DFL;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
+static int nerrs = 0;
 
 static jmp_buf jmpbuf;
 
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index 0ea4f6813930..18d736640ece 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -4,6 +4,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/syscall.h>
+#include <sys/ioctl.h>
 #include <time.h>
 #include <signal.h>
 #include <setjmp.h>
@@ -43,7 +44,15 @@
 #define FUNC_INHERITE           0x20
 #define FUNC_PASID              0x40
 
+/* get_user() pointer test cases */
+#define GET_USER_USER           0
+#define GET_USER_KERNEL_TOP     1
+#define GET_USER_KERNEL_BOT     2
+#define GET_USER_KERNEL         3
+
 #define TEST_MASK               0x7f
+#define L5_SIGN_EXT_MASK        (0xFFUL << 56)
+#define L4_SIGN_EXT_MASK        (0x1FFFFUL << 47)
 
 #define LOW_ADDR                (0x1UL << 30)
 #define HIGH_ADDR               (0x3UL << 48)
@@ -115,23 +124,42 @@ static void segv_handler(int sig)
 	siglongjmp(segv_env, 1);
 }
 
-static inline int cpu_has_lam(void)
+static inline int lam_is_available(void)
 {
 	unsigned int cpuinfo[4];
+	unsigned long bits = 0;
+	int ret;
 
 	__cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
 
-	return (cpuinfo[0] & (1 << 26));
+	/* Check if cpu supports LAM */
+	if (!(cpuinfo[0] & (1 << 26))) {
+		ksft_print_msg("LAM is not supported!\n");
+		return 0;
+	}
+
+	/* Return 0 if CONFIG_ADDRESS_MASKING is not set */
+	ret = syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits);
+	if (ret) {
+		ksft_print_msg("LAM is disabled in the kernel!\n");
+		return 0;
+	}
+
+	return 1;
 }
 
-/* Check 5-level page table feature in CPUID.(EAX=07H, ECX=00H):ECX.[bit 16] */
-static inline int cpu_has_la57(void)
+static inline int la57_enabled(void)
 {
-	unsigned int cpuinfo[4];
+	int ret;
+	void *p;
+
+	p = mmap((void *)HIGH_ADDR, PAGE_SIZE, PROT_READ | PROT_WRITE,
+		 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
 
-	__cpuid_count(0x7, 0, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
+	ret = p == MAP_FAILED ? 0 : 1;
 
-	return (cpuinfo[2] & (1 << 16));
+	munmap(p, PAGE_SIZE);
+	return ret;
 }
 
 /*
@@ -237,7 +265,7 @@ static uint64_t set_metadata(uint64_t src, unsigned long lam)
  * both pointers should point to the same address.
  *
  * @return:
- * 0: value on the pointer with metadate and value on original are same
+ * 0: value on the pointer with metadata and value on original are same
  * 1: not same.
  */
 static int handle_lam_test(void *src, unsigned int lam)
@@ -322,7 +350,7 @@ static int handle_mmap(struct testcases *test)
 		   flags, -1, 0);
 	if (ptr == MAP_FAILED) {
 		if (test->addr == HIGH_ADDR)
-			if (!cpu_has_la57())
+			if (!la57_enabled())
 				return 3; /* unsupport LA57 */
 		return 1;
 	}
@@ -370,6 +398,78 @@ static int handle_syscall(struct testcases *test)
 	return ret;
 }
 
+static int get_user_syscall(struct testcases *test)
+{
+	uint64_t ptr_address, bitmask;
+	int fd, ret = 0;
+	void *ptr;
+
+	if (la57_enabled()) {
+		bitmask = L5_SIGN_EXT_MASK;
+		ptr_address = HIGH_ADDR;
+	} else {
+		bitmask = L4_SIGN_EXT_MASK;
+		ptr_address = LOW_ADDR;
+	}
+
+	ptr = mmap((void *)ptr_address, PAGE_SIZE, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+
+	if (ptr == MAP_FAILED) {
+		perror("failed to map byte to pass into get_user");
+		return 1;
+	}
+
+	if (set_lam(test->lam) != 0) {
+		ret = 2;
+		goto error;
+	}
+
+	fd = memfd_create("lam_ioctl", 0);
+	if (fd == -1) {
+		munmap(ptr, PAGE_SIZE);
+		exit(EXIT_FAILURE);
+	}
+
+	switch (test->later) {
+	case GET_USER_USER:
+		/* Control group - properly tagged user pointer */
+		ptr = (void *)set_metadata((uint64_t)ptr, test->lam);
+		break;
+	case GET_USER_KERNEL_TOP:
+		/* Kernel address with top bit cleared */
+		bitmask &= (bitmask >> 1);
+		ptr = (void *)((uint64_t)ptr | bitmask);
+		break;
+	case GET_USER_KERNEL_BOT:
+		/* Kernel address with bottom sign-extension bit cleared */
+		bitmask &= (bitmask << 1);
+		ptr = (void *)((uint64_t)ptr | bitmask);
+		break;
+	case GET_USER_KERNEL:
+		/* Try to pass a kernel address */
+		ptr = (void *)((uint64_t)ptr | bitmask);
+		break;
+	default:
+		printf("Invalid test case value passed!\n");
+		break;
+	}
+
+	/*
+	 * Use FIOASYNC ioctl because it utilizes get_user() internally and is
+	 * very non-invasive to the system. Pass differently tagged pointers to
+	 * get_user() in order to verify that valid user pointers are going
+	 * through and invalid kernel/non-canonical pointers are not.
+	 */
+	if (ioctl(fd, FIOASYNC, ptr) != 0)
+		ret = 1;
+
+	close(fd);
+error:
+	munmap(ptr, PAGE_SIZE);
+	return ret;
+}
+
 int sys_uring_setup(unsigned int entries, struct io_uring_params *p)
 {
 	return (int)syscall(__NR_io_uring_setup, entries, p);
@@ -596,8 +696,10 @@ int do_uring(unsigned long lam)
 	fi->file_fd = file_fd;
 
 	ring = malloc(sizeof(*ring));
-	if (!ring)
+	if (!ring) {
+		free(fi);
 		return 1;
+	}
 
 	memset(ring, 0, sizeof(struct io_ring));
 
@@ -883,6 +985,33 @@ static struct testcases syscall_cases[] = {
 		.test_func = handle_syscall,
 		.msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
 	},
+	{
+		.later = GET_USER_USER,
+		.lam = LAM_U57_BITS,
+		.test_func = get_user_syscall,
+		.msg = "GET_USER: get_user() and pass a properly tagged user pointer.\n",
+	},
+	{
+		.later = GET_USER_KERNEL_TOP,
+		.expected = 1,
+		.lam = LAM_U57_BITS,
+		.test_func = get_user_syscall,
+		.msg = "GET_USER:[Negative] get_user() with a kernel pointer and the top bit cleared.\n",
+	},
+	{
+		.later = GET_USER_KERNEL_BOT,
+		.expected = 1,
+		.lam = LAM_U57_BITS,
+		.test_func = get_user_syscall,
+		.msg = "GET_USER:[Negative] get_user() with a kernel pointer and the bottom sign-extension bit cleared.\n",
+	},
+	{
+		.later = GET_USER_KERNEL,
+		.expected = 1,
+		.lam = LAM_U57_BITS,
+		.test_func = get_user_syscall,
+		.msg = "GET_USER:[Negative] get_user() and pass a kernel pointer.\n",
+	},
 };
 
 static struct testcases mmap_cases[] = {
@@ -1181,10 +1310,8 @@ int main(int argc, char **argv)
 
 	tests_cnt = 0;
 
-	if (!cpu_has_lam()) {
-		ksft_print_msg("Unsupported LAM feature!\n");
+	if (!lam_is_available())
 		return KSFT_SKIP;
-	}
 
 	while ((c = getopt(argc, argv, "ht:")) != -1) {
 		switch (c) {
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 3a29346e1452..bb99a71380a5 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -26,6 +26,8 @@
 #include <asm/prctl.h>
 #include <sys/prctl.h>
 
+#include "helpers.h"
+
 #define AR_ACCESSED		(1<<8)
 
 #define AR_TYPE_RODATA		(0 * (1<<9))
@@ -506,20 +508,6 @@ static void fix_sa_restorer(int sig)
 }
 #endif
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-
-	fix_sa_restorer(sig);
-}
-
 static jmp_buf jmpbuf;
 
 static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
@@ -549,9 +537,11 @@ static void do_multicpu_tests(void)
 	}
 
 	sethandler(SIGSEGV, sigsegv, 0);
+	fix_sa_restorer(SIGSEGV);
 #ifdef __i386__
 	/* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */
 	sethandler(SIGILL, sigsegv, 0);
+	fix_sa_restorer(SIGILL);
 #endif
 
 	printf("[RUN]\tCross-CPU LDT invalidation\n");
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
index cc3de6ff9fba..f22cb6b382f9 100644
--- a/tools/testing/selftests/x86/mov_ss_trap.c
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -36,7 +36,7 @@
 #include <setjmp.h>
 #include <sys/prctl.h>
 
-#define X86_EFLAGS_RF (1UL << 16)
+#include "helpers.h"
 
 #if __x86_64__
 # define REG_IP REG_RIP
@@ -94,18 +94,6 @@ static void enable_watchpoint(void)
 	}
 }
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
 static char const * const signames[] = {
 	[SIGSEGV] = "SIGSEGV",
 	[SIGBUS] = "SIBGUS",
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index 12aaa063196e..360ec88d5432 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -15,6 +15,8 @@
 #include <asm/ptrace-abi.h>
 #include <sys/auxv.h>
 
+#include "helpers.h"
+
 /* Bitness-agnostic defines for user_regs_struct fields. */
 #ifdef __x86_64__
 # define user_syscall_nr	orig_rax
@@ -93,18 +95,6 @@ static siginfo_t wait_trap(pid_t chld)
 	return si;
 }
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
 static void setsigign(int sig, int flags)
 {
 	struct sigaction sa;
@@ -116,16 +106,6 @@ static void setsigign(int sig, int flags)
 		err(1, "sigaction");
 }
 
-static void clearhandler(int sig)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = SIG_DFL;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
 #ifdef __x86_64__
 # define REG_BP REG_RBP
 #else
diff --git a/tools/testing/selftests/x86/sigaltstack.c b/tools/testing/selftests/x86/sigaltstack.c
index f689af75e979..0ae1b784498c 100644
--- a/tools/testing/selftests/x86/sigaltstack.c
+++ b/tools/testing/selftests/x86/sigaltstack.c
@@ -14,6 +14,8 @@
 #include <sys/resource.h>
 #include <setjmp.h>
 
+#include "helpers.h"
+
 /* sigaltstack()-enforced minimum stack */
 #define ENFORCED_MINSIGSTKSZ	2048
 
@@ -27,30 +29,6 @@ static bool sigalrm_expected;
 
 static unsigned long at_minstack_size;
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
-	struct sigaction sa;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = SIG_DFL;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
 static int setup_altstack(void *start, unsigned long size)
 {
 	stack_t ss;
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 0b75b29f794b..26ef562f4232 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -46,6 +46,8 @@
 #include <sys/ptrace.h>
 #include <sys/user.h>
 
+#include "helpers.h"
+
 /* Pull in AR_xyz defines. */
 typedef unsigned int u32;
 typedef unsigned short u16;
@@ -138,28 +140,6 @@ static unsigned short LDT3(int idx)
 	return (idx << 3) | 7;
 }
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = SIG_DFL;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
 static void add_ldt(const struct user_desc *desc, unsigned short *var,
 		    const char *name)
 {
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 9a30f443e928..280d7a22b9c9 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -33,28 +33,6 @@
 
 #include "helpers.h"
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = SIG_DFL;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
 static volatile sig_atomic_t sig_traps, sig_eflags;
 sigjmp_buf jmpbuf;
 
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index 48ab065a76f9..f67a2df335ba 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -17,18 +17,6 @@
 
 #include "helpers.h"
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
 static sigjmp_buf jmpbuf;
 
 static volatile sig_atomic_t n_errs;
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
index a108b80dd082..f9c9814160f0 100644
--- a/tools/testing/selftests/x86/syscall_nt.c
+++ b/tools/testing/selftests/x86/syscall_nt.c
@@ -18,18 +18,6 @@
 
 static unsigned int nerrs;
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
 static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
 {
 }
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index 991591718bb0..41c42b7b54a6 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -25,6 +25,7 @@
 #include <sys/mman.h>
 
 #include <linux/ptrace.h>
+#include "../kselftest.h"
 
 /* Common system call numbers */
 #define SYS_READ	  0
@@ -313,7 +314,7 @@ static void test_syscall_numbering(void)
 	 * The MSB is supposed to be ignored, so we loop over a few
 	 * to test that out.
 	 */
-	for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
+	for (size_t i = 0; i < ARRAY_SIZE(msbs); i++) {
 		int msb = msbs[i];
 		run("Checking system calls with msb = %d (0x%x)\n",
 		    msb, msb);
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
index b30de9aaa6d4..5fb531e3ad7c 100644
--- a/tools/testing/selftests/x86/sysret_rip.c
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -22,6 +22,8 @@
 #include <sys/mman.h>
 #include <assert.h>
 
+#include "helpers.h"
+
 /*
  * These items are in clang_helpers_64.S, in order to avoid clang inline asm
  * limitations:
@@ -31,28 +33,6 @@ extern const char test_page[];
 
 static void const *current_test_page_addr = test_page;
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_handler = SIG_DFL;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
 /* State used by our signal handlers. */
 static gregset_t initial_regs;
 
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 6de11b4df458..05e1e6774fba 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -310,19 +310,6 @@ static void test_getcpu(int cpu)
 static jmp_buf jmpbuf;
 static volatile unsigned long segv_err;
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		ksft_exit_fail_msg("sigaction failed\n");
-}
-
 static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
 {
 	ucontext_t *ctx = (ucontext_t *)ctx_void;
diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c
index 4c311e1af4c7..9cc17588d818 100644
--- a/tools/testing/selftests/x86/unwind_vdso.c
+++ b/tools/testing/selftests/x86/unwind_vdso.c
@@ -43,18 +43,6 @@ int main()
 #include <dlfcn.h>
 #include <unwind.h>
 
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
-		       int flags)
-{
-	struct sigaction sa;
-	memset(&sa, 0, sizeof(sa));
-	sa.sa_sigaction = handler;
-	sa.sa_flags = SA_SIGINFO | flags;
-	sigemptyset(&sa.sa_mask);
-	if (sigaction(sig, &sa, 0))
-		err(1, "sigaction");
-}
-
 static volatile sig_atomic_t nerrs;
 static unsigned long sysinfo;
 static bool got_sysinfo = false;
diff --git a/tools/testing/selftests/x86/xstate.c b/tools/testing/selftests/x86/xstate.c
new file mode 100644
index 000000000000..23c1d6c964ea
--- /dev/null
+++ b/tools/testing/selftests/x86/xstate.c
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <elf.h>
+#include <pthread.h>
+#include <stdbool.h>
+
+#include <asm/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+
+#include "helpers.h"
+#include "xstate.h"
+
+/*
+ * The userspace xstate test suite is designed to be generic and operates
+ * with randomized xstate data. However, some states require special handling:
+ *
+ * - PKRU and XTILECFG need specific adjustments, such as modifying
+ *   randomization behavior or using fixed values.
+ * - But, PKRU already has a dedicated test suite in /tools/selftests/mm.
+ * - Legacy states (FP and SSE) are excluded, as they are not considered
+ *   part of extended states (xstates) and their usage is already deeply
+ *   integrated into user-space libraries.
+ */
+#define XFEATURE_MASK_TEST_SUPPORTED	\
+	((1 << XFEATURE_YMM) |		\
+	 (1 << XFEATURE_OPMASK)	|	\
+	 (1 << XFEATURE_ZMM_Hi256) |	\
+	 (1 << XFEATURE_Hi16_ZMM) |	\
+	 (1 << XFEATURE_XTILEDATA))
+
+static inline uint64_t xgetbv(uint32_t index)
+{
+	uint32_t eax, edx;
+
+	asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
+	return eax + ((uint64_t)edx << 32);
+}
+
+static inline uint64_t get_xstatebv(struct xsave_buffer *xbuf)
+{
+	return *(uint64_t *)(&xbuf->header);
+}
+
+static struct xstate_info xstate;
+
+struct futex_info {
+	unsigned int iterations;
+	struct futex_info *next;
+	pthread_mutex_t mutex;
+	pthread_t thread;
+	bool valid;
+	int nr;
+};
+
+static inline void load_rand_xstate(struct xstate_info *xstate, struct xsave_buffer *xbuf)
+{
+	clear_xstate_header(xbuf);
+	set_xstatebv(xbuf, xstate->mask);
+	set_rand_data(xstate, xbuf);
+	xrstor(xbuf, xstate->mask);
+}
+
+static inline void load_init_xstate(struct xstate_info *xstate, struct xsave_buffer *xbuf)
+{
+	clear_xstate_header(xbuf);
+	xrstor(xbuf, xstate->mask);
+}
+
+static inline void copy_xstate(struct xsave_buffer *xbuf_dst, struct xsave_buffer *xbuf_src)
+{
+	memcpy(&xbuf_dst->bytes[xstate.xbuf_offset],
+	       &xbuf_src->bytes[xstate.xbuf_offset],
+	       xstate.size);
+}
+
+static inline bool validate_xstate_same(struct xsave_buffer *xbuf1, struct xsave_buffer *xbuf2)
+{
+	int ret;
+
+	ret = memcmp(&xbuf1->bytes[xstate.xbuf_offset],
+		     &xbuf2->bytes[xstate.xbuf_offset],
+		     xstate.size);
+	return ret == 0;
+}
+
+static inline bool validate_xregs_same(struct xsave_buffer *xbuf1)
+{
+	struct xsave_buffer *xbuf2;
+	bool ret;
+
+	xbuf2 = alloc_xbuf();
+	if (!xbuf2)
+		ksft_exit_fail_msg("failed to allocate XSAVE buffer\n");
+
+	xsave(xbuf2, xstate.mask);
+	ret = validate_xstate_same(xbuf1, xbuf2);
+
+	free(xbuf2);
+	return ret;
+}
+
+/* Context switching test */
+
+static void *check_xstate(void *info)
+{
+	struct futex_info *finfo = (struct futex_info *)info;
+	struct xsave_buffer *xbuf;
+	int i;
+
+	xbuf = alloc_xbuf();
+	if (!xbuf)
+		ksft_exit_fail_msg("unable to allocate XSAVE buffer\n");
+
+	/*
+	 * Load random data into 'xbuf' and then restore it to the xstate
+	 * registers.
+	 */
+	load_rand_xstate(&xstate, xbuf);
+	finfo->valid = true;
+
+	for (i = 0; i < finfo->iterations; i++) {
+		pthread_mutex_lock(&finfo->mutex);
+
+		/*
+		 * Ensure the register values have not diverged from the
+		 * record. Then reload a new random value.  If it failed
+		 * ever before, skip it.
+		 */
+		if (finfo->valid) {
+			finfo->valid = validate_xregs_same(xbuf);
+			load_rand_xstate(&xstate, xbuf);
+		}
+
+		/*
+		 * The last thread's last unlock will be for thread 0's
+		 * mutex. However, thread 0 will have already exited the
+		 * loop and the mutex will already be unlocked.
+		 *
+		 * Because this is not an ERRORCHECK mutex, that
+		 * inconsistency will be silently ignored.
+		 */
+		pthread_mutex_unlock(&finfo->next->mutex);
+	}
+
+	free(xbuf);
+	return finfo;
+}
+
+static void create_threads(uint32_t num_threads, uint32_t iterations, struct futex_info *finfo)
+{
+	int i;
+
+	for (i = 0; i < num_threads; i++) {
+		int next_nr;
+
+		finfo[i].nr = i;
+		finfo[i].iterations = iterations;
+
+		/*
+		 * Thread 'i' will wait on this mutex to be unlocked.
+		 * Lock it immediately after initialization:
+		 */
+		pthread_mutex_init(&finfo[i].mutex, NULL);
+		pthread_mutex_lock(&finfo[i].mutex);
+
+		next_nr = (i + 1) % num_threads;
+		finfo[i].next = &finfo[next_nr];
+
+		if (pthread_create(&finfo[i].thread, NULL, check_xstate, &finfo[i]))
+			ksft_exit_fail_msg("pthread_create() failed\n");
+	}
+}
+
+static bool checkout_threads(uint32_t num_threads, struct futex_info *finfo)
+{
+	void *thread_retval;
+	bool valid = true;
+	int err, i;
+
+	for (i = 0; i < num_threads; i++) {
+		err = pthread_join(finfo[i].thread, &thread_retval);
+		if (err)
+			ksft_exit_fail_msg("pthread_join() failed for thread %d err: %d\n", i, err);
+
+		if (thread_retval != &finfo[i]) {
+			ksft_exit_fail_msg("unexpected thread retval for thread %d: %p\n",
+					   i, thread_retval);
+		}
+
+		valid &= finfo[i].valid;
+	}
+
+	return valid;
+}
+
+static void affinitize_cpu0(void)
+{
+	cpu_set_t cpuset;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+		ksft_exit_fail_msg("sched_setaffinity to CPU 0 failed\n");
+}
+
+static void test_context_switch(uint32_t num_threads, uint32_t iterations)
+{
+	struct futex_info *finfo;
+
+	/* Affinitize to one CPU to force context switches */
+	affinitize_cpu0();
+
+	printf("[RUN]\t%s: check context switches, %d iterations, %d threads.\n",
+	       xstate.name, iterations, num_threads);
+
+	finfo = malloc(sizeof(*finfo) * num_threads);
+	if (!finfo)
+		ksft_exit_fail_msg("unable allocate memory\n");
+
+	create_threads(num_threads, iterations, finfo);
+
+	/*
+	 * This thread wakes up thread 0
+	 * Thread 0 will wake up 1
+	 * Thread 1 will wake up 2
+	 * ...
+	 * The last thread will wake up 0
+	 *
+	 * This will repeat for the configured
+	 * number of iterations.
+	 */
+	pthread_mutex_unlock(&finfo[0].mutex);
+
+	/* Wait for all the threads to finish: */
+	if (checkout_threads(num_threads, finfo))
+		printf("[OK]\tNo incorrect case was found.\n");
+	else
+		printf("[FAIL]\tFailed with context switching test.\n");
+
+	free(finfo);
+}
+
+/*
+ * Ptrace test for the ABI format as described in arch/x86/include/asm/user.h
+ */
+
+/*
+ * Make sure the ptracee has the expanded kernel buffer on the first use.
+ * Then, initialize the state before performing the state injection from
+ * the ptracer. For non-dynamic states, this is benign.
+ */
+static inline void ptracee_touch_xstate(void)
+{
+	struct xsave_buffer *xbuf;
+
+	xbuf = alloc_xbuf();
+
+	load_rand_xstate(&xstate, xbuf);
+	load_init_xstate(&xstate, xbuf);
+
+	free(xbuf);
+}
+
+/*
+ * Ptracer injects the randomized xstate data. It also reads before and
+ * after that, which will execute the kernel's state copy functions.
+ */
+static void ptracer_inject_xstate(pid_t target)
+{
+	uint32_t xbuf_size = get_xbuf_size();
+	struct xsave_buffer *xbuf1, *xbuf2;
+	struct iovec iov;
+
+	/*
+	 * Allocate buffers to keep data while ptracer can write the
+	 * other buffer
+	 */
+	xbuf1 = alloc_xbuf();
+	xbuf2 = alloc_xbuf();
+	if (!xbuf1 || !xbuf2)
+		ksft_exit_fail_msg("unable to allocate XSAVE buffer\n");
+
+	iov.iov_base = xbuf1;
+	iov.iov_len  = xbuf_size;
+
+	if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+		ksft_exit_fail_msg("PTRACE_GETREGSET failed\n");
+
+	printf("[RUN]\t%s: inject xstate via ptrace().\n", xstate.name);
+
+	load_rand_xstate(&xstate, xbuf1);
+	copy_xstate(xbuf2, xbuf1);
+
+	if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+		ksft_exit_fail_msg("PTRACE_SETREGSET failed\n");
+
+	if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+		ksft_exit_fail_msg("PTRACE_GETREGSET failed\n");
+
+	if (*(uint64_t *)get_fpx_sw_bytes(xbuf1) == xgetbv(0))
+		printf("[OK]\t'xfeatures' in SW reserved area was correctly written\n");
+	else
+		printf("[FAIL]\t'xfeatures' in SW reserved area was not correctly written\n");
+
+	if (validate_xstate_same(xbuf2, xbuf1))
+		printf("[OK]\txstate was correctly updated.\n");
+	else
+		printf("[FAIL]\txstate was not correctly updated.\n");
+
+	free(xbuf1);
+	free(xbuf2);
+}
+
+static void test_ptrace(void)
+{
+	pid_t child;
+	int status;
+
+	child = fork();
+	if (child < 0) {
+		ksft_exit_fail_msg("fork() failed\n");
+	} else if (!child) {
+		if (ptrace(PTRACE_TRACEME, 0, NULL, NULL))
+			ksft_exit_fail_msg("PTRACE_TRACEME failed\n");
+
+		ptracee_touch_xstate();
+
+		raise(SIGTRAP);
+		_exit(0);
+	}
+
+	do {
+		wait(&status);
+	} while (WSTOPSIG(status) != SIGTRAP);
+
+	ptracer_inject_xstate(child);
+
+	ptrace(PTRACE_DETACH, child, NULL, NULL);
+	wait(&status);
+	if (!WIFEXITED(status) || WEXITSTATUS(status))
+		ksft_exit_fail_msg("ptracee exit error\n");
+}
+
+/*
+ * Test signal delivery for the ABI compatibility.
+ * See the ABI format: arch/x86/include/uapi/asm/sigcontext.h
+ */
+
+/*
+ * Avoid using printf() in signal handlers as it is not
+ * async-signal-safe.
+ */
+#define SIGNAL_BUF_LEN 1000
+static char signal_message_buffer[SIGNAL_BUF_LEN];
+static void sig_print(char *msg)
+{
+	int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1;
+
+	strncat(signal_message_buffer, msg, left);
+}
+
+static struct xsave_buffer *stashed_xbuf;
+
+static void validate_sigfpstate(int sig, siginfo_t *si, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t *)ctx_void;
+	void *xbuf = ctx->uc_mcontext.fpregs;
+	struct _fpx_sw_bytes *sw_bytes;
+	uint32_t magic2;
+
+	/* Reset the signal message buffer: */
+	signal_message_buffer[0] = '\0';
+
+	sw_bytes = get_fpx_sw_bytes(xbuf);
+	if (sw_bytes->magic1 == FP_XSTATE_MAGIC1)
+		sig_print("[OK]\t'magic1' is valid\n");
+	else
+		sig_print("[FAIL]\t'magic1' is not valid\n");
+
+	if (get_fpx_sw_bytes_features(xbuf) & xstate.mask)
+		sig_print("[OK]\t'xfeatures' in SW reserved area is valid\n");
+	else
+		sig_print("[FAIL]\t'xfeatures' in SW reserved area is not valid\n");
+
+	if (get_xstatebv(xbuf) & xstate.mask)
+		sig_print("[OK]\t'xfeatures' in XSAVE header is valid\n");
+	else
+		sig_print("[FAIL]\t'xfeatures' in XSAVE header is not valid\n");
+
+	if (validate_xstate_same(stashed_xbuf, xbuf))
+		sig_print("[OK]\txstate delivery was successful\n");
+	else
+		sig_print("[FAIL]\txstate delivery was not successful\n");
+
+	magic2 = *(uint32_t *)(xbuf + sw_bytes->xstate_size);
+	if (magic2 == FP_XSTATE_MAGIC2)
+		sig_print("[OK]\t'magic2' is valid\n");
+	else
+		sig_print("[FAIL]\t'magic2' is not valid\n");
+
+	set_rand_data(&xstate, xbuf);
+	copy_xstate(stashed_xbuf, xbuf);
+}
+
+static void test_signal(void)
+{
+	bool valid_xstate;
+
+	/*
+	 * The signal handler will access this to verify xstate context
+	 * preservation.
+	 */
+	stashed_xbuf = alloc_xbuf();
+	if (!stashed_xbuf)
+		ksft_exit_fail_msg("unable to allocate XSAVE buffer\n");
+
+	printf("[RUN]\t%s: load xstate and raise SIGUSR1\n", xstate.name);
+
+	sethandler(SIGUSR1, validate_sigfpstate, 0);
+
+	load_rand_xstate(&xstate, stashed_xbuf);
+
+	raise(SIGUSR1);
+
+	/*
+	 * Immediately record the test result, deferring printf() to
+	 * prevent unintended state contamination by that.
+	 */
+	valid_xstate = validate_xregs_same(stashed_xbuf);
+	printf("%s", signal_message_buffer);
+
+	printf("[RUN]\t%s: load new xstate from sighandler and check it after sigreturn\n",
+	       xstate.name);
+
+	if (valid_xstate)
+		printf("[OK]\txstate was restored correctly\n");
+	else
+		printf("[FAIL]\txstate restoration failed\n");
+
+	clearhandler(SIGUSR1);
+	free(stashed_xbuf);
+}
+
+void test_xstate(uint32_t feature_num)
+{
+	const unsigned int ctxtsw_num_threads = 5, ctxtsw_iterations = 10;
+	unsigned long features;
+	long rc;
+
+	if (!(XFEATURE_MASK_TEST_SUPPORTED & (1 << feature_num))) {
+		ksft_print_msg("The xstate test does not fully support the component %u, yet.\n",
+			       feature_num);
+		return;
+	}
+
+	rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_SUPP, &features);
+	if (rc || !(features & (1 << feature_num))) {
+		ksft_print_msg("The kernel does not support feature number: %u\n", feature_num);
+		return;
+	}
+
+	xstate = get_xstate_info(feature_num);
+	if (!xstate.size || !xstate.xbuf_offset) {
+		ksft_exit_fail_msg("invalid state size/offset (%d/%d)\n",
+				   xstate.size, xstate.xbuf_offset);
+	}
+
+	test_context_switch(ctxtsw_num_threads, ctxtsw_iterations);
+	test_ptrace();
+	test_signal();
+}
diff --git a/tools/testing/selftests/x86/xstate.h b/tools/testing/selftests/x86/xstate.h
new file mode 100644
index 000000000000..42af36ec852f
--- /dev/null
+++ b/tools/testing/selftests/x86/xstate.h
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef __SELFTESTS_X86_XSTATE_H
+#define __SELFTESTS_X86_XSTATE_H
+
+#include <stdint.h>
+
+#include "../kselftest.h"
+
+#define XSAVE_HDR_OFFSET	512
+#define XSAVE_HDR_SIZE		64
+
+/*
+ * List of XSAVE features Linux knows about. Copied from
+ * arch/x86/include/asm/fpu/types.h
+ */
+enum xfeature {
+	XFEATURE_FP,
+	XFEATURE_SSE,
+	XFEATURE_YMM,
+	XFEATURE_BNDREGS,
+	XFEATURE_BNDCSR,
+	XFEATURE_OPMASK,
+	XFEATURE_ZMM_Hi256,
+	XFEATURE_Hi16_ZMM,
+	XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
+	XFEATURE_PKRU,
+	XFEATURE_PASID,
+	XFEATURE_CET_USER,
+	XFEATURE_CET_KERNEL_UNUSED,
+	XFEATURE_RSRVD_COMP_13,
+	XFEATURE_RSRVD_COMP_14,
+	XFEATURE_LBR,
+	XFEATURE_RSRVD_COMP_16,
+	XFEATURE_XTILECFG,
+	XFEATURE_XTILEDATA,
+
+	XFEATURE_MAX,
+};
+
+/* Copied from arch/x86/kernel/fpu/xstate.c */
+static const char *xfeature_names[] =
+{
+	"x87 floating point registers",
+	"SSE registers",
+	"AVX registers",
+	"MPX bounds registers",
+	"MPX CSR",
+	"AVX-512 opmask",
+	"AVX-512 Hi256",
+	"AVX-512 ZMM_Hi256",
+	"Processor Trace (unused)",
+	"Protection Keys User registers",
+	"PASID state",
+	"Control-flow User registers",
+	"Control-flow Kernel registers (unused)",
+	"unknown xstate feature",
+	"unknown xstate feature",
+	"unknown xstate feature",
+	"unknown xstate feature",
+	"AMX Tile config",
+	"AMX Tile data",
+	"unknown xstate feature",
+};
+
+struct xsave_buffer {
+	union {
+		struct {
+			char legacy[XSAVE_HDR_OFFSET];
+			char header[XSAVE_HDR_SIZE];
+			char extended[0];
+		};
+		char bytes[0];
+	};
+};
+
+static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
+{
+	uint32_t rfbm_hi = rfbm >> 32;
+	uint32_t rfbm_lo = rfbm;
+
+	asm volatile("xsave (%%rdi)"
+		     : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)
+		     : "memory");
+}
+
+static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm)
+{
+	uint32_t rfbm_hi = rfbm >> 32;
+	uint32_t rfbm_lo = rfbm;
+
+	asm volatile("xrstor (%%rdi)"
+		     : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi));
+}
+
+#define CPUID_LEAF_XSTATE		0xd
+#define CPUID_SUBLEAF_XSTATE_USER	0x0
+
+static inline uint32_t get_xbuf_size(void)
+{
+	uint32_t eax, ebx, ecx, edx;
+
+	__cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
+		      eax, ebx, ecx, edx);
+
+	/*
+	 * EBX enumerates the size (in bytes) required by the XSAVE
+	 * instruction for an XSAVE area containing all the user state
+	 * components corresponding to bits currently set in XCR0.
+	 */
+	return ebx;
+}
+
+struct xstate_info {
+	const char *name;
+	uint32_t num;
+	uint32_t mask;
+	uint32_t xbuf_offset;
+	uint32_t size;
+};
+
+static inline struct xstate_info get_xstate_info(uint32_t xfeature_num)
+{
+	struct xstate_info xstate = { };
+	uint32_t eax, ebx, ecx, edx;
+
+	if (xfeature_num >= XFEATURE_MAX) {
+		ksft_print_msg("unknown state\n");
+		return xstate;
+	}
+
+	xstate.name = xfeature_names[xfeature_num];
+	xstate.num  = xfeature_num;
+	xstate.mask = 1 << xfeature_num;
+
+	__cpuid_count(CPUID_LEAF_XSTATE, xfeature_num,
+		      eax, ebx, ecx, edx);
+	xstate.size        = eax;
+	xstate.xbuf_offset = ebx;
+	return xstate;
+}
+
+static inline struct xsave_buffer *alloc_xbuf(void)
+{
+	uint32_t xbuf_size = get_xbuf_size();
+
+	/* XSAVE buffer should be 64B-aligned. */
+	return aligned_alloc(64, xbuf_size);
+}
+
+static inline void clear_xstate_header(struct xsave_buffer *xbuf)
+{
+	memset(&xbuf->header, 0, sizeof(xbuf->header));
+}
+
+static inline void set_xstatebv(struct xsave_buffer *xbuf, uint64_t bv)
+{
+	/* XSTATE_BV is at the beginning of the header: */
+	*(uint64_t *)(&xbuf->header) = bv;
+}
+
+/* See 'struct _fpx_sw_bytes' at sigcontext.h */
+#define SW_BYTES_OFFSET		464
+/* N.B. The struct's field name varies so read from the offset. */
+#define SW_BYTES_BV_OFFSET	(SW_BYTES_OFFSET + 8)
+
+static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *xbuf)
+{
+	return xbuf + SW_BYTES_OFFSET;
+}
+
+static inline uint64_t get_fpx_sw_bytes_features(void *buffer)
+{
+	return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET);
+}
+
+static inline void set_rand_data(struct xstate_info *xstate, struct xsave_buffer *xbuf)
+{
+	int *ptr = (int *)&xbuf->bytes[xstate->xbuf_offset];
+	int data, i;
+
+	/*
+	 * Ensure that 'data' is never 0.  This ensures that
+	 * the registers are never in their initial configuration
+	 * and thus never tracked as being in the init state.
+	 */
+	data = rand() | 1;
+
+	for (i = 0; i < xstate->size / sizeof(int); i++, ptr++)
+		*ptr = data;
+}
+
+/* Testing kernel's context switching and ABI support for the xstate. */
+void test_xstate(uint32_t feature_num);
+
+#endif /* __SELFTESTS_X86_XSTATE_H */
diff --git a/tools/testing/selftests/zram/.gitignore b/tools/testing/selftests/zram/.gitignore
new file mode 100644
index 000000000000..088cd9bad87a
--- /dev/null
+++ b/tools/testing/selftests/zram/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+err.log
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index 8fab5e13c7c3..04ab45e27fb8 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -18,6 +18,12 @@ static bool fail_prealloc;
 #define vma_iter_prealloc(vmi, vma)					\
 	(fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL))
 
+#define CONFIG_DEFAULT_MMAP_MIN_ADDR 65536
+
+unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
+unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
+unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+
 /*
  * Directly import the VMA implementation here. Our vma_internal.h wrapper
  * provides userland-equivalent functionality for everything vma.c uses.
@@ -47,6 +53,11 @@ struct task_struct *get_current(void)
 	return &__current;
 }
 
+unsigned long rlimit(unsigned int limit)
+{
+	return (unsigned long)-1;
+}
+
 /* Helper function to simply allocate a VMA. */
 static struct vm_area_struct *alloc_vma(struct mm_struct *mm,
 					unsigned long start,
@@ -89,7 +100,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
 	 * begun. Linking to the tree will have caused this to be incremented,
 	 * which means we will get a false positive otherwise.
 	 */
-	vma->vm_lock_seq = -1;
+	vma->vm_lock_seq = UINT_MAX;
 
 	return vma;
 }
@@ -214,7 +225,7 @@ static bool vma_write_started(struct vm_area_struct *vma)
 	int seq = vma->vm_lock_seq;
 
 	/* We reset after each check. */
-	vma->vm_lock_seq = -1;
+	vma->vm_lock_seq = UINT_MAX;
 
 	/* The vma_start_write() stub simply increments this value. */
 	return seq > -1;
@@ -1563,6 +1574,57 @@ static bool test_expand_only_mode(void)
 	return true;
 }
 
+static bool test_mmap_region_basic(void)
+{
+	struct mm_struct mm = {};
+	unsigned long addr;
+	struct vm_area_struct *vma;
+	VMA_ITERATOR(vmi, &mm, 0);
+
+	current->mm = &mm;
+
+	/* Map at 0x300000, length 0x3000. */
+	addr = __mmap_region(NULL, 0x300000, 0x3000,
+			     VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
+			     0x300, NULL);
+	ASSERT_EQ(addr, 0x300000);
+
+	/* Map at 0x250000, length 0x3000. */
+	addr = __mmap_region(NULL, 0x250000, 0x3000,
+			     VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
+			     0x250, NULL);
+	ASSERT_EQ(addr, 0x250000);
+
+	/* Map at 0x303000, merging to 0x300000 of length 0x6000. */
+	addr = __mmap_region(NULL, 0x303000, 0x3000,
+			     VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
+			     0x303, NULL);
+	ASSERT_EQ(addr, 0x303000);
+
+	/* Map at 0x24d000, merging to 0x250000 of length 0x6000. */
+	addr = __mmap_region(NULL, 0x24d000, 0x3000,
+			     VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE,
+			     0x24d, NULL);
+	ASSERT_EQ(addr, 0x24d000);
+
+	ASSERT_EQ(mm.map_count, 2);
+
+	for_each_vma(vmi, vma) {
+		if (vma->vm_start == 0x300000) {
+			ASSERT_EQ(vma->vm_end, 0x306000);
+			ASSERT_EQ(vma->vm_pgoff, 0x300);
+		} else if (vma->vm_start == 0x24d000) {
+			ASSERT_EQ(vma->vm_end, 0x253000);
+			ASSERT_EQ(vma->vm_pgoff, 0x24d);
+		} else {
+			ASSERT_FALSE(true);
+		}
+	}
+
+	cleanup_mm(&mm, &vmi);
+	return true;
+}
+
 int main(void)
 {
 	int num_tests = 0, num_fail = 0;
@@ -1596,6 +1658,8 @@ int main(void)
 	TEST(copy_vma);
 	TEST(expand_only_mode);
 
+	TEST(mmap_region_basic);
+
 #undef TEST
 
 	printf("%d tests run, %d passed, %d failed.\n",
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index e76ff579e1fd..1eae23039854 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -27,11 +27,23 @@
 #include <linux/rbtree.h>
 #include <linux/rwsem.h>
 
+extern unsigned long stack_guard_gap;
+#ifdef CONFIG_MMU
+extern unsigned long mmap_min_addr;
+extern unsigned long dac_mmap_min_addr;
+#else
+#define mmap_min_addr		0UL
+#define dac_mmap_min_addr	0UL
+#endif
+
 #define VM_WARN_ON(_expr) (WARN_ON(_expr))
 #define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr))
+#define VM_WARN_ON_VMG(_expr, _vmg) (WARN_ON(_expr))
 #define VM_BUG_ON(_expr) (BUG_ON(_expr))
 #define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
 
+#define MMF_HAS_MDWE	28
+
 #define VM_NONE		0x00000000
 #define VM_READ		0x00000001
 #define VM_WRITE	0x00000002
@@ -39,6 +51,7 @@
 #define VM_SHARED	0x00000008
 #define VM_MAYREAD	0x00000010
 #define VM_MAYWRITE	0x00000020
+#define VM_MAYEXEC	0x00000040
 #define VM_GROWSDOWN	0x00000100
 #define VM_PFNMAP	0x00000400
 #define VM_LOCKED	0x00002000
@@ -51,6 +64,8 @@
 #define VM_STACK	VM_GROWSDOWN
 #define VM_SHADOW_STACK	VM_NONE
 #define VM_SOFTDIRTY	0
+#define VM_ARCH_1	0x01000000	/* Architecture-specific flag */
+#define VM_GROWSUP	VM_NONE
 
 #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC)
 #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
@@ -58,6 +73,20 @@
 /* This mask represents all the VMA flag bits used by mlock */
 #define VM_LOCKED_MASK	(VM_LOCKED | VM_LOCKONFAULT)
 
+#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0)
+
+#define VM_DATA_FLAGS_TSK_EXEC	(VM_READ | VM_WRITE | TASK_EXEC | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_TSK_EXEC
+
+#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK)
+
+#define RLIMIT_STACK		3	/* max stack size */
+#define RLIMIT_MEMLOCK		8	/* max locked-in-memory address space */
+
+#define CAP_IPC_LOCK         14
+
 #ifdef CONFIG_64BIT
 /* VM is sealed, in vm_flags */
 #define VM_SEALED	_BITUL(63)
@@ -122,10 +151,22 @@ enum {
 	TASK_COMM_LEN = 16,
 };
 
+/*
+ * Flags for bug emulation.
+ *
+ * These occupy the top three bytes.
+ */
+enum {
+	READ_IMPLIES_EXEC =	0x0400000,
+};
+
 struct task_struct {
 	char comm[TASK_COMM_LEN];
 	pid_t pid;
 	struct mm_struct *mm;
+
+	/* Used for emulating ABI behavior of previous Linux versions: */
+	unsigned int			personality;
 };
 
 struct task_struct *get_current(void);
@@ -186,6 +227,10 @@ struct mm_struct {
 	unsigned long data_vm;	   /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
 	unsigned long exec_vm;	   /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
 	unsigned long stack_vm;	   /* VM_STACK */
+
+	unsigned long def_flags;
+
+	unsigned long flags; /* Must use atomic bitops to access */
 };
 
 struct vma_lock {
@@ -241,7 +286,7 @@ struct vm_area_struct {
 	 * counter reuse can only lead to occasional unnecessary use of the
 	 * slowpath.
 	 */
-	int vm_lock_seq;
+	unsigned int vm_lock_seq;
 	struct vma_lock *vm_lock;
 #endif
 
@@ -373,6 +418,17 @@ struct vm_operations_struct {
 					  unsigned long addr);
 };
 
+struct vm_unmapped_area_info {
+#define VM_UNMAPPED_AREA_TOPDOWN 1
+	unsigned long flags;
+	unsigned long length;
+	unsigned long low_limit;
+	unsigned long high_limit;
+	unsigned long align_mask;
+	unsigned long align_offset;
+	unsigned long start_gap;
+};
+
 static inline void vma_iter_invalidate(struct vma_iterator *vmi)
 {
 	mas_pause(&vmi->mas);
@@ -416,7 +472,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma)
 		return false;
 
 	init_rwsem(&vma->vm_lock->lock);
-	vma->vm_lock_seq = -1;
+	vma->vm_lock_seq = UINT_MAX;
 
 	return true;
 }
@@ -432,6 +488,8 @@ static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached)
 
 extern const struct vm_operations_struct vma_dummy_vm_ops;
 
+extern unsigned long rlimit(unsigned int limit);
+
 static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
 {
 	memset(vma, 0, sizeof(*vma));
@@ -853,6 +911,11 @@ static inline void mmap_write_unlock(struct mm_struct *)
 {
 }
 
+static inline int mmap_write_lock_killable(struct mm_struct *)
+{
+	return 0;
+}
+
 static inline bool can_modify_mm(struct mm_struct *mm,
 				 unsigned long start,
 				 unsigned long end)
@@ -938,7 +1001,7 @@ static inline bool is_file_hugepages(struct file *)
 
 static inline int security_vm_enough_memory_mm(struct mm_struct *, long)
 {
-	return true;
+	return 0;
 }
 
 static inline bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long)
@@ -1033,4 +1096,159 @@ static inline int mmap_file(struct file *, struct vm_area_struct *)
 	return 0;
 }
 
+static inline unsigned long stack_guard_start_gap(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_GROWSDOWN)
+		return stack_guard_gap;
+
+	/* See reasoning around the VM_SHADOW_STACK definition */
+	if (vma->vm_flags & VM_SHADOW_STACK)
+		return PAGE_SIZE;
+
+	return 0;
+}
+
+static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
+{
+	unsigned long gap = stack_guard_start_gap(vma);
+	unsigned long vm_start = vma->vm_start;
+
+	vm_start -= gap;
+	if (vm_start > vma->vm_start)
+		vm_start = 0;
+	return vm_start;
+}
+
+static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
+{
+	unsigned long vm_end = vma->vm_end;
+
+	if (vma->vm_flags & VM_GROWSUP) {
+		vm_end += stack_guard_gap;
+		if (vm_end < vma->vm_end)
+			vm_end = -PAGE_SIZE;
+	}
+	return vm_end;
+}
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					unsigned long addr, unsigned long len)
+{
+	return 0;
+}
+
+static inline bool vma_is_accessible(struct vm_area_struct *vma)
+{
+	return vma->vm_flags & VM_ACCESS_FLAGS;
+}
+
+static inline bool capable(int cap)
+{
+	return true;
+}
+
+static inline bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
+			unsigned long bytes)
+{
+	unsigned long locked_pages, limit_pages;
+
+	if (!(flags & VM_LOCKED) || capable(CAP_IPC_LOCK))
+		return true;
+
+	locked_pages = bytes >> PAGE_SHIFT;
+	locked_pages += mm->locked_vm;
+
+	limit_pages = rlimit(RLIMIT_MEMLOCK);
+	limit_pages >>= PAGE_SHIFT;
+
+	return locked_pages <= limit_pages;
+}
+
+static inline int __anon_vma_prepare(struct vm_area_struct *vma)
+{
+	struct anon_vma *anon_vma = calloc(1, sizeof(struct anon_vma));
+
+	if (!anon_vma)
+		return -ENOMEM;
+
+	anon_vma->root = anon_vma;
+	vma->anon_vma = anon_vma;
+
+	return 0;
+}
+
+static inline int anon_vma_prepare(struct vm_area_struct *vma)
+{
+	if (likely(vma->anon_vma))
+		return 0;
+
+	return __anon_vma_prepare(vma);
+}
+
+static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
+					      struct list_head *uf)
+{
+}
+
+/*
+ * Denies creating a writable executable mapping or gaining executable permissions.
+ *
+ * This denies the following:
+ *
+ *     a)      mmap(PROT_WRITE | PROT_EXEC)
+ *
+ *     b)      mmap(PROT_WRITE)
+ *             mprotect(PROT_EXEC)
+ *
+ *     c)      mmap(PROT_WRITE)
+ *             mprotect(PROT_READ)
+ *             mprotect(PROT_EXEC)
+ *
+ * But allows the following:
+ *
+ *     d)      mmap(PROT_READ | PROT_EXEC)
+ *             mmap(PROT_READ | PROT_EXEC | PROT_BTI)
+ *
+ * This is only applicable if the user has set the Memory-Deny-Write-Execute
+ * (MDWE) protection mask for the current process.
+ *
+ * @old specifies the VMA flags the VMA originally possessed, and @new the ones
+ * we propose to set.
+ *
+ * Return: false if proposed change is OK, true if not ok and should be denied.
+ */
+static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
+{
+	/* If MDWE is disabled, we have nothing to deny. */
+	if (!test_bit(MMF_HAS_MDWE, &current->mm->flags))
+		return false;
+
+	/* If the new VMA is not executable, we have nothing to deny. */
+	if (!(new & VM_EXEC))
+		return false;
+
+	/* Under MDWE we do not accept newly writably executable VMAs... */
+	if (new & VM_WRITE)
+		return true;
+
+	/* ...nor previously non-executable VMAs becoming executable. */
+	if (!(old & VM_EXEC))
+		return true;
+
+	return false;
+}
+
+static inline int mapping_map_writable(struct address_space *mapping)
+{
+	int c = atomic_read(&mapping->i_mmap_writable);
+
+	/* Derived from the raw_atomic_inc_unless_negative() implementation. */
+	do {
+		if (c < 0)
+			return -EPERM;
+	} while (!__sync_bool_compare_and_swap(&mapping->i_mmap_writable, c, c+1));
+
+	return 0;
+}
+
 #endif	/* __MM_VMA_INTERNAL_H */
diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README
index 84ee217ba8ee..680ce666ceb5 100644
--- a/tools/testing/vsock/README
+++ b/tools/testing/vsock/README
@@ -36,6 +36,21 @@ Invoke test binaries in both directions as follows:
                        --control-port=1234 \
                        --peer-cid=3
 
+Some tests are designed to produce kernel memory leaks. Leaks detection,
+however, is deferred to Kernel Memory Leak Detector. It is recommended to enable
+kmemleak (CONFIG_DEBUG_KMEMLEAK=y) and explicitly trigger a scan after each test
+suite run, e.g.
+
+  # echo clear > /sys/kernel/debug/kmemleak
+  # $TEST_BINARY ...
+  # echo "wait for any grace periods" && sleep 2
+  # echo scan > /sys/kernel/debug/kmemleak
+  # echo "wait for kmemleak" && sleep 5
+  # echo scan > /sys/kernel/debug/kmemleak
+  # cat /sys/kernel/debug/kmemleak
+
+For more information see Documentation/dev-tools/kmemleak.rst.
+
 vsock_perf utility
 -------------------
 'vsock_perf' is a simple tool to measure vsock performance. It works in
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 34e9dac0a105..de25892f865f 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -96,41 +96,57 @@ void vsock_wait_remote_close(int fd)
 	close(epollfd);
 }
 
-/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */
-int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type)
+/* Create socket <type>, bind to <cid, port> and return the file descriptor. */
+int vsock_bind(unsigned int cid, unsigned int port, int type)
 {
-	struct sockaddr_vm sa_client = {
-		.svm_family = AF_VSOCK,
-		.svm_cid = VMADDR_CID_ANY,
-		.svm_port = bind_port,
-	};
-	struct sockaddr_vm sa_server = {
+	struct sockaddr_vm sa = {
 		.svm_family = AF_VSOCK,
 		.svm_cid = cid,
 		.svm_port = port,
 	};
+	int fd;
 
-	int client_fd, ret;
-
-	client_fd = socket(AF_VSOCK, type, 0);
-	if (client_fd < 0) {
+	fd = socket(AF_VSOCK, type, 0);
+	if (fd < 0) {
 		perror("socket");
 		exit(EXIT_FAILURE);
 	}
 
-	if (bind(client_fd, (struct sockaddr *)&sa_client, sizeof(sa_client))) {
+	if (bind(fd, (struct sockaddr *)&sa, sizeof(sa))) {
 		perror("bind");
 		exit(EXIT_FAILURE);
 	}
 
+	return fd;
+}
+
+int vsock_connect_fd(int fd, unsigned int cid, unsigned int port)
+{
+	struct sockaddr_vm sa = {
+		.svm_family = AF_VSOCK,
+		.svm_cid = cid,
+		.svm_port = port,
+	};
+	int ret;
+
 	timeout_begin(TIMEOUT);
 	do {
-		ret = connect(client_fd, (struct sockaddr *)&sa_server, sizeof(sa_server));
+		ret = connect(fd, (struct sockaddr *)&sa, sizeof(sa));
 		timeout_check("connect");
 	} while (ret < 0 && errno == EINTR);
 	timeout_end();
 
-	if (ret < 0) {
+	return ret;
+}
+
+/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */
+int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type)
+{
+	int client_fd;
+
+	client_fd = vsock_bind(VMADDR_CID_ANY, bind_port, type);
+
+	if (vsock_connect_fd(client_fd, cid, port)) {
 		perror("connect");
 		exit(EXIT_FAILURE);
 	}
@@ -141,17 +157,6 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po
 /* Connect to <cid, port> and return the file descriptor. */
 int vsock_connect(unsigned int cid, unsigned int port, int type)
 {
-	union {
-		struct sockaddr sa;
-		struct sockaddr_vm svm;
-	} addr = {
-		.svm = {
-			.svm_family = AF_VSOCK,
-			.svm_port = port,
-			.svm_cid = cid,
-		},
-	};
-	int ret;
 	int fd;
 
 	control_expectln("LISTENING");
@@ -162,20 +167,14 @@ int vsock_connect(unsigned int cid, unsigned int port, int type)
 		exit(EXIT_FAILURE);
 	}
 
-	timeout_begin(TIMEOUT);
-	do {
-		ret = connect(fd, &addr.sa, sizeof(addr.svm));
-		timeout_check("connect");
-	} while (ret < 0 && errno == EINTR);
-	timeout_end();
-
-	if (ret < 0) {
+	if (vsock_connect_fd(fd, cid, port)) {
 		int old_errno = errno;
 
 		close(fd);
 		fd = -1;
 		errno = old_errno;
 	}
+
 	return fd;
 }
 
@@ -192,28 +191,9 @@ int vsock_seqpacket_connect(unsigned int cid, unsigned int port)
 /* Listen on <cid, port> and return the file descriptor. */
 static int vsock_listen(unsigned int cid, unsigned int port, int type)
 {
-	union {
-		struct sockaddr sa;
-		struct sockaddr_vm svm;
-	} addr = {
-		.svm = {
-			.svm_family = AF_VSOCK,
-			.svm_port = port,
-			.svm_cid = cid,
-		},
-	};
 	int fd;
 
-	fd = socket(AF_VSOCK, type, 0);
-	if (fd < 0) {
-		perror("socket");
-		exit(EXIT_FAILURE);
-	}
-
-	if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
-		perror("bind");
-		exit(EXIT_FAILURE);
-	}
+	fd = vsock_bind(cid, port, type);
 
 	if (listen(fd, 1) < 0) {
 		perror("listen");
@@ -401,7 +381,7 @@ void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret)
  */
 void send_byte(int fd, int expected_ret, int flags)
 {
-	const uint8_t byte = 'A';
+	static const uint8_t byte = 'A';
 
 	send_buf(fd, &byte, sizeof(byte), flags, expected_ret);
 }
@@ -420,7 +400,7 @@ void recv_byte(int fd, int expected_ret, int flags)
 	recv_buf(fd, &byte, sizeof(byte), flags, expected_ret);
 
 	if (byte != 'A') {
-		fprintf(stderr, "unexpected byte read %c\n", byte);
+		fprintf(stderr, "unexpected byte read 0x%02x\n", byte);
 		exit(EXIT_FAILURE);
 	}
 }
@@ -486,8 +466,7 @@ void list_tests(const struct test_case *test_cases)
 	exit(EXIT_FAILURE);
 }
 
-void skip_test(struct test_case *test_cases, size_t test_cases_len,
-	       const char *test_id_str)
+static unsigned long parse_test_id(const char *test_id_str, size_t test_cases_len)
 {
 	unsigned long test_id;
 	char *endptr = NULL;
@@ -505,9 +484,35 @@ void skip_test(struct test_case *test_cases, size_t test_cases_len,
 		exit(EXIT_FAILURE);
 	}
 
+	return test_id;
+}
+
+void skip_test(struct test_case *test_cases, size_t test_cases_len,
+	       const char *test_id_str)
+{
+	unsigned long test_id = parse_test_id(test_id_str, test_cases_len);
 	test_cases[test_id].skip = true;
 }
 
+void pick_test(struct test_case *test_cases, size_t test_cases_len,
+	       const char *test_id_str)
+{
+	static bool skip_all = true;
+	unsigned long test_id;
+
+	if (skip_all) {
+		unsigned long i;
+
+		for (i = 0; i < test_cases_len; ++i)
+			test_cases[i].skip = true;
+
+		skip_all = false;
+	}
+
+	test_id = parse_test_id(test_id_str, test_cases_len);
+	test_cases[test_id].skip = false;
+}
+
 unsigned long hash_djb2(const void *data, size_t len)
 {
 	unsigned long hash = 5381;
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index ba84d296d8b7..d1f765ce3eee 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -39,10 +39,12 @@ struct test_case {
 void init_signals(void);
 unsigned int parse_cid(const char *str);
 unsigned int parse_port(const char *str);
+int vsock_connect_fd(int fd, unsigned int cid, unsigned int port);
 int vsock_connect(unsigned int cid, unsigned int port, int type);
 int vsock_accept(unsigned int cid, unsigned int port,
 		 struct sockaddr_vm *clientaddrp, int type);
 int vsock_stream_connect(unsigned int cid, unsigned int port);
+int vsock_bind(unsigned int cid, unsigned int port, int type);
 int vsock_bind_connect(unsigned int cid, unsigned int port,
 		       unsigned int bind_port, int type);
 int vsock_seqpacket_connect(unsigned int cid, unsigned int port);
@@ -62,6 +64,8 @@ void run_tests(const struct test_case *test_cases,
 void list_tests(const struct test_case *test_cases);
 void skip_test(struct test_case *test_cases, size_t test_cases_len,
 	       const char *test_id_str);
+void pick_test(struct test_case *test_cases, size_t test_cases_len,
+	       const char *test_id_str);
 unsigned long hash_djb2(const void *data, size_t len);
 size_t iovec_bytes(const struct iovec *iov, size_t iovnum);
 unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum);
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 48f17641ca50..d0f6d253ac72 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -22,12 +22,17 @@
 #include <signal.h>
 #include <sys/ioctl.h>
 #include <linux/sockios.h>
+#include <linux/time64.h>
 
 #include "vsock_test_zerocopy.h"
 #include "timeout.h"
 #include "control.h"
 #include "util.h"
 
+/* Basic messages for control_writeulong(), control_readulong() */
+#define CONTROL_CONTINUE	1
+#define CONTROL_DONE		0
+
 static void test_stream_connection_reset(const struct test_opts *opts)
 {
 	union {
@@ -108,24 +113,9 @@ static void test_stream_bind_only_client(const struct test_opts *opts)
 
 static void test_stream_bind_only_server(const struct test_opts *opts)
 {
-	union {
-		struct sockaddr sa;
-		struct sockaddr_vm svm;
-	} addr = {
-		.svm = {
-			.svm_family = AF_VSOCK,
-			.svm_port = opts->peer_port,
-			.svm_cid = VMADDR_CID_ANY,
-		},
-	};
 	int fd;
 
-	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
-
-	if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
-		perror("bind");
-		exit(EXIT_FAILURE);
-	}
+	fd = vsock_bind(VMADDR_CID_ANY, opts->peer_port, SOCK_STREAM);
 
 	/* Notify the client that the server is ready */
 	control_writeln("BIND");
@@ -559,7 +549,7 @@ static time_t current_nsec(void)
 		exit(EXIT_FAILURE);
 	}
 
-	return (ts.tv_sec * 1000000000ULL) + ts.tv_nsec;
+	return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
 }
 
 #define RCVTIMEO_TIMEOUT_SEC 1
@@ -599,7 +589,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts)
 	}
 
 	read_overhead_ns = current_nsec() - read_enter_ns -
-			1000000000ULL * RCVTIMEO_TIMEOUT_SEC;
+			   NSEC_PER_SEC * RCVTIMEO_TIMEOUT_SEC;
 
 	if (read_overhead_ns > READ_OVERHEAD_NSEC) {
 		fprintf(stderr,
@@ -1473,6 +1463,367 @@ static void test_stream_cred_upd_on_set_rcvlowat(const struct test_opts *opts)
 	test_stream_credit_update_test(opts, false);
 }
 
+/* The goal of test leak_acceptq is to stress the race between connect() and
+ * close(listener). Implementation of client/server loops boils down to:
+ *
+ * client                server
+ * ------                ------
+ * write(CONTINUE)
+ *                       expect(CONTINUE)
+ *                       listen()
+ *                       write(LISTENING)
+ * expect(LISTENING)
+ * connect()             close()
+ */
+#define ACCEPTQ_LEAK_RACE_TIMEOUT 2 /* seconds */
+
+static void test_stream_leak_acceptq_client(const struct test_opts *opts)
+{
+	time_t tout;
+	int fd;
+
+	tout = current_nsec() + ACCEPTQ_LEAK_RACE_TIMEOUT * NSEC_PER_SEC;
+	do {
+		control_writeulong(CONTROL_CONTINUE);
+
+		fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+		if (fd >= 0)
+			close(fd);
+	} while (current_nsec() < tout);
+
+	control_writeulong(CONTROL_DONE);
+}
+
+/* Test for a memory leak. User is expected to run kmemleak scan, see README. */
+static void test_stream_leak_acceptq_server(const struct test_opts *opts)
+{
+	int fd;
+
+	while (control_readulong() == CONTROL_CONTINUE) {
+		fd = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port);
+		control_writeln("LISTENING");
+		close(fd);
+	}
+}
+
+/* Test for a memory leak. User is expected to run kmemleak scan, see README. */
+static void test_stream_msgzcopy_leak_errq_client(const struct test_opts *opts)
+{
+	struct pollfd fds = { 0 };
+	int fd;
+
+	fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	enable_so_zerocopy_check(fd);
+	send_byte(fd, 1, MSG_ZEROCOPY);
+
+	fds.fd = fd;
+	fds.events = 0;
+	if (poll(&fds, 1, -1) < 0) {
+		perror("poll");
+		exit(EXIT_FAILURE);
+	}
+
+	close(fd);
+}
+
+static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts)
+{
+	int fd;
+
+	fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	recv_byte(fd, 1, 0);
+	vsock_wait_remote_close(fd);
+	close(fd);
+}
+
+/* Test msgzcopy_leak_zcskb is meant to exercise sendmsg() error handling path,
+ * that might leak an skb. The idea is to fail virtio_transport_init_zcopy_skb()
+ * by hitting net.core.optmem_max limit in sock_omalloc(), specifically
+ *
+ *   vsock_connectible_sendmsg
+ *     virtio_transport_stream_enqueue
+ *       virtio_transport_send_pkt_info
+ *         virtio_transport_init_zcopy_skb
+ *         . msg_zerocopy_realloc
+ *         .   msg_zerocopy_alloc
+ *         .     sock_omalloc
+ *         .       sk_omem_alloc + size > sysctl_optmem_max
+ *         return -ENOMEM
+ *
+ * We abuse the implementation detail of net/socket.c:____sys_sendmsg().
+ * sk_omem_alloc can be precisely bumped by sock_kmalloc(), as it is used to
+ * fetch user-provided control data.
+ *
+ * While this approach works for now, it relies on assumptions regarding the
+ * implementation and configuration (for example, order of net.core.optmem_max
+ * can not exceed MAX_PAGE_ORDER), which may not hold in the future. A more
+ * resilient testing could be implemented by leveraging the Fault injection
+ * framework (CONFIG_FAULT_INJECTION), e.g.
+ *
+ *   client# echo N > /sys/kernel/debug/failslab/ignore-gfp-wait
+ *   client# echo 0 > /sys/kernel/debug/failslab/verbose
+ *
+ *   void client(const struct test_opts *opts)
+ *   {
+ *       char buf[16];
+ *       int f, s, i;
+ *
+ *       f = open("/proc/self/fail-nth", O_WRONLY);
+ *
+ *       for (i = 1; i < 32; i++) {
+ *           control_writeulong(CONTROL_CONTINUE);
+ *
+ *           s = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ *           enable_so_zerocopy_check(s);
+ *
+ *           sprintf(buf, "%d", i);
+ *           write(f, buf, strlen(buf));
+ *
+ *           send(s, &(char){ 0 }, 1, MSG_ZEROCOPY);
+ *
+ *           write(f, "0", 1);
+ *           close(s);
+ *       }
+ *
+ *       control_writeulong(CONTROL_DONE);
+ *       close(f);
+ *   }
+ *
+ *   void server(const struct test_opts *opts)
+ *   {
+ *       int fd;
+ *
+ *       while (control_readulong() == CONTROL_CONTINUE) {
+ *           fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ *           vsock_wait_remote_close(fd);
+ *           close(fd);
+ *       }
+ *   }
+ *
+ * Refer to Documentation/fault-injection/fault-injection.rst.
+ */
+#define MAX_PAGE_ORDER	10	/* usually */
+#define PAGE_SIZE	4096
+
+/* Test for a memory leak. User is expected to run kmemleak scan, see README. */
+static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts)
+{
+	size_t optmem_max, ctl_len, chunk_size;
+	struct msghdr msg = { 0 };
+	struct iovec iov;
+	char *chunk;
+	int fd, res;
+	FILE *f;
+
+	f = fopen("/proc/sys/net/core/optmem_max", "r");
+	if (!f) {
+		perror("fopen(optmem_max)");
+		exit(EXIT_FAILURE);
+	}
+
+	if (fscanf(f, "%zu", &optmem_max) != 1) {
+		fprintf(stderr, "fscanf(optmem_max) failed\n");
+		exit(EXIT_FAILURE);
+	}
+
+	fclose(f);
+
+	fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	enable_so_zerocopy_check(fd);
+
+	ctl_len = optmem_max - 1;
+	if (ctl_len > PAGE_SIZE << MAX_PAGE_ORDER) {
+		fprintf(stderr, "Try with net.core.optmem_max = 100000\n");
+		exit(EXIT_FAILURE);
+	}
+
+	chunk_size = CMSG_SPACE(ctl_len);
+	chunk = malloc(chunk_size);
+	if (!chunk) {
+		perror("malloc");
+		exit(EXIT_FAILURE);
+	}
+	memset(chunk, 0, chunk_size);
+
+	iov.iov_base = &(char){ 0 };
+	iov.iov_len = 1;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = chunk;
+	msg.msg_controllen = ctl_len;
+
+	errno = 0;
+	res = sendmsg(fd, &msg, MSG_ZEROCOPY);
+	if (res >= 0 || errno != ENOMEM) {
+		fprintf(stderr, "Expected ENOMEM, got errno=%d res=%d\n",
+			errno, res);
+		exit(EXIT_FAILURE);
+	}
+
+	close(fd);
+}
+
+static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts)
+{
+	int fd;
+
+	fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	vsock_wait_remote_close(fd);
+	close(fd);
+}
+
+#define MAX_PORT_RETRIES	24	/* net/vmw_vsock/af_vsock.c */
+
+/* Test attempts to trigger a transport release for an unbound socket. This can
+ * lead to a reference count mishandling.
+ */
+static void test_stream_transport_uaf_client(const struct test_opts *opts)
+{
+	int sockets[MAX_PORT_RETRIES];
+	struct sockaddr_vm addr;
+	int fd, i, alen;
+
+	fd = vsock_bind(VMADDR_CID_ANY, VMADDR_PORT_ANY, SOCK_STREAM);
+
+	alen = sizeof(addr);
+	if (getsockname(fd, (struct sockaddr *)&addr, &alen)) {
+		perror("getsockname");
+		exit(EXIT_FAILURE);
+	}
+
+	for (i = 0; i < MAX_PORT_RETRIES; ++i)
+		sockets[i] = vsock_bind(VMADDR_CID_ANY, ++addr.svm_port,
+					SOCK_STREAM);
+
+	close(fd);
+	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+	if (fd < 0) {
+		perror("socket");
+		exit(EXIT_FAILURE);
+	}
+
+	if (!vsock_connect_fd(fd, addr.svm_cid, addr.svm_port)) {
+		perror("Unexpected connect() #1 success");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Vulnerable system may crash now. */
+	if (!vsock_connect_fd(fd, VMADDR_CID_HOST, VMADDR_PORT_ANY)) {
+		perror("Unexpected connect() #2 success");
+		exit(EXIT_FAILURE);
+	}
+
+	close(fd);
+	while (i--)
+		close(sockets[i]);
+
+	control_writeln("DONE");
+}
+
+static void test_stream_transport_uaf_server(const struct test_opts *opts)
+{
+	control_expectln("DONE");
+}
+
+static void test_stream_connect_retry_client(const struct test_opts *opts)
+{
+	int fd;
+
+	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+	if (fd < 0) {
+		perror("socket");
+		exit(EXIT_FAILURE);
+	}
+
+	if (!vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) {
+		fprintf(stderr, "Unexpected connect() #1 success\n");
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("LISTEN");
+	control_expectln("LISTENING");
+
+	if (vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) {
+		perror("connect() #2");
+		exit(EXIT_FAILURE);
+	}
+
+	close(fd);
+}
+
+static void test_stream_connect_retry_server(const struct test_opts *opts)
+{
+	int fd;
+
+	control_expectln("LISTEN");
+
+	fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	vsock_wait_remote_close(fd);
+	close(fd);
+}
+
+static void test_stream_linger_client(const struct test_opts *opts)
+{
+	struct linger optval = {
+		.l_onoff = 1,
+		.l_linger = 1
+	};
+	int fd;
+
+	fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
+		perror("setsockopt(SO_LINGER)");
+		exit(EXIT_FAILURE);
+	}
+
+	close(fd);
+}
+
+static void test_stream_linger_server(const struct test_opts *opts)
+{
+	int fd;
+
+	fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	vsock_wait_remote_close(fd);
+	close(fd);
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -1603,6 +1954,36 @@ static struct test_case test_cases[] = {
 		.run_client = test_seqpacket_unsent_bytes_client,
 		.run_server = test_seqpacket_unsent_bytes_server,
 	},
+	{
+		.name = "SOCK_STREAM leak accept queue",
+		.run_client = test_stream_leak_acceptq_client,
+		.run_server = test_stream_leak_acceptq_server,
+	},
+	{
+		.name = "SOCK_STREAM MSG_ZEROCOPY leak MSG_ERRQUEUE",
+		.run_client = test_stream_msgzcopy_leak_errq_client,
+		.run_server = test_stream_msgzcopy_leak_errq_server,
+	},
+	{
+		.name = "SOCK_STREAM MSG_ZEROCOPY leak completion skb",
+		.run_client = test_stream_msgzcopy_leak_zcskb_client,
+		.run_server = test_stream_msgzcopy_leak_zcskb_server,
+	},
+	{
+		.name = "SOCK_STREAM transport release use-after-free",
+		.run_client = test_stream_transport_uaf_client,
+		.run_server = test_stream_transport_uaf_server,
+	},
+	{
+		.name = "SOCK_STREAM retry failed connect()",
+		.run_client = test_stream_connect_retry_client,
+		.run_server = test_stream_connect_retry_server,
+	},
+	{
+		.name = "SOCK_STREAM SO_LINGER null-ptr-deref",
+		.run_client = test_stream_linger_client,
+		.run_server = test_stream_linger_server,
+	},
 	{},
 };
 
@@ -1644,6 +2025,11 @@ static const struct option longopts[] = {
 		.val = 's',
 	},
 	{
+		.name = "pick",
+		.has_arg = required_argument,
+		.val = 't',
+	},
+	{
 		.name = "help",
 		.has_arg = no_argument,
 		.val = '?',
@@ -1680,6 +2066,8 @@ static void usage(void)
 		"  --peer-cid <cid>       CID of the other side\n"
 		"  --peer-port <port>     AF_VSOCK port used for the test [default: %d]\n"
 		"  --list                 List of tests that will be executed\n"
+		"  --pick <test_id>       Test ID to execute selectively;\n"
+		"                         use multiple --pick options to select more tests\n"
 		"  --skip <test_id>       Test ID to skip;\n"
 		"                         use multiple --skip options to skip more tests\n",
 		DEFAULT_PEER_PORT
@@ -1736,6 +2124,10 @@ int main(int argc, char **argv)
 			skip_test(test_cases, ARRAY_SIZE(test_cases) - 1,
 				  optarg);
 			break;
+		case 't':
+			pick_test(test_cases, ARRAY_SIZE(test_cases) - 1,
+				  optarg);
+			break;
 		case '?':
 		default:
 			usage();
diff --git a/tools/thermal/lib/Makefile b/tools/thermal/lib/Makefile
index f2552f73a64c..056d212f25cf 100644
--- a/tools/thermal/lib/Makefile
+++ b/tools/thermal/lib/Makefile
@@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative)
 libdir_SQ = $(subst ','\'',$(libdir))
 libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
 
-ifeq ("$(origin V)", "command line")
-  VERBOSE = $(V)
-endif
-ifndef VERBOSE
-  VERBOSE = 0
-endif
-
-ifeq ($(VERBOSE),1)
-  Q =
-else
-  Q = @
-endif
-
 # Set compile option CFLAGS
 ifdef EXTRA_CFLAGS
   CFLAGS := $(EXTRA_CFLAGS)
diff --git a/tools/tracing/latency/Makefile b/tools/tracing/latency/Makefile
index 6518b03e05c7..257a56b1899f 100644
--- a/tools/tracing/latency/Makefile
+++ b/tools/tracing/latency/Makefile
@@ -37,12 +37,6 @@ FEATURE_TESTS	+= libtracefs
 FEATURE_DISPLAY	:= libtraceevent
 FEATURE_DISPLAY	+= libtracefs
 
-ifeq ($(V),1)
-  Q 		=
-else
-  Q 		= @
-endif
-
 all: $(LATENCY-COLLECTOR)
 
 include $(srctree)/tools/build/Makefile.include
diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile
index a6a7dee16622..0b61208db604 100644
--- a/tools/tracing/rtla/Makefile
+++ b/tools/tracing/rtla/Makefile
@@ -37,12 +37,6 @@ FEATURE_DISPLAY	:= libtraceevent
 FEATURE_DISPLAY	+= libtracefs
 FEATURE_DISPLAY	+= libcpupower
 
-ifeq ($(V),1)
-  Q		=
-else
-  Q		= @
-endif
-
 all: $(RTLA)
 
 include $(srctree)/tools/build/Makefile.include
@@ -85,4 +79,6 @@ clean: doc_clean fixdep-clean
 	$(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
 	$(Q)rm -f rtla rtla-static fixdep FEATURE-DUMP rtla-*
 	$(Q)rm -rf feature
-.PHONY: FORCE clean
+check: $(RTLA)
+	RTLA=$(RTLA) prove -o -f tests/
+.PHONY: FORCE clean check
diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c
index 245e9344932b..85f398b89597 100644
--- a/tools/tracing/rtla/src/osnoise.c
+++ b/tools/tracing/rtla/src/osnoise.c
@@ -867,7 +867,7 @@ int osnoise_set_workload(struct osnoise_context *context, bool onoff)
 
 	retval = osnoise_options_set_option("OSNOISE_WORKLOAD", onoff);
 	if (retval < 0)
-		return -1;
+		return -2;
 
 	context->opt_workload = onoff;
 
@@ -1079,6 +1079,42 @@ out_err:
 	return NULL;
 }
 
+bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record)
+{
+	/*
+	 * The tool instance is always present, it is the one used to collect
+	 * data.
+	 */
+	if (!tracefs_trace_is_on(tool->trace.inst))
+		return true;
+
+	/*
+	 * The trace record instance is only enabled when -t is set. IOW, when the system
+	 * is tracing.
+	 */
+	return record && !tracefs_trace_is_on(record->trace.inst);
+}
+
+/*
+ * osnoise_report_missed_events - report number of events dropped by trace
+ * buffer
+ */
+void
+osnoise_report_missed_events(struct osnoise_tool *tool)
+{
+	unsigned long long total_events;
+
+	if (tool->trace.missed_events == UINT64_MAX)
+		printf("unknown number of events missed, results might not be accurate\n");
+	else if (tool->trace.missed_events > 0) {
+		total_events = tool->trace.processed_events + tool->trace.missed_events;
+
+		printf("%lld (%.2f%%) events missed, results might not be accurate\n",
+		       tool->trace.missed_events,
+		       (double) tool->trace.missed_events / total_events * 100.0);
+	}
+}
+
 static void osnoise_usage(int err)
 {
 	int i;
diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h
index 555f4f4903cc..91835a7d8c2b 100644
--- a/tools/tracing/rtla/src/osnoise.h
+++ b/tools/tracing/rtla/src/osnoise.h
@@ -104,6 +104,8 @@ struct osnoise_tool {
 void osnoise_destroy_tool(struct osnoise_tool *top);
 struct osnoise_tool *osnoise_init_tool(char *tool_name);
 struct osnoise_tool *osnoise_init_trace_tool(char *tracer);
+void osnoise_report_missed_events(struct osnoise_tool *tool);
+bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record);
 
 int osnoise_hist_main(int argc, char *argv[]);
 int osnoise_top_main(int argc, char **argv);
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c
index 214e2c93fde0..b4930b835b0a 100644
--- a/tools/tracing/rtla/src/osnoise_hist.c
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -440,6 +440,7 @@ osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *too
 	trace_seq_reset(trace->seq);
 
 	osnoise_print_summary(params, trace, data);
+	osnoise_report_missed_events(tool);
 }
 
 /*
@@ -970,7 +971,7 @@ int osnoise_hist_main(int argc, char *argv[])
 			goto out_hist;
 		}
 
-		if (trace_is_off(&tool->trace, &record->trace))
+		if (osnoise_trace_is_off(tool, record))
 			break;
 	}
 
@@ -980,7 +981,7 @@ int osnoise_hist_main(int argc, char *argv[])
 
 	return_value = 0;
 
-	if (trace_is_off(&tool->trace, &record->trace)) {
+	if (osnoise_trace_is_off(tool, record)) {
 		printf("rtla osnoise hit stop tracing\n");
 		if (params->trace_output) {
 			printf("  Saving trace to %s\n", params->trace_output);
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
index 45647495ce3b..4772677ac762 100644
--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -280,6 +280,7 @@ osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top)
 
 	trace_seq_do_printf(trace->seq);
 	trace_seq_reset(trace->seq);
+	osnoise_report_missed_events(top);
 }
 
 /*
@@ -801,7 +802,7 @@ int osnoise_top_main(int argc, char **argv)
 		if (!params->quiet)
 			osnoise_print_stats(params, tool);
 
-		if (trace_is_off(&tool->trace, &record->trace))
+		if (osnoise_trace_is_off(tool, record))
 			break;
 
 	}
@@ -810,7 +811,7 @@ int osnoise_top_main(int argc, char **argv)
 
 	return_value = 0;
 
-	if (trace_is_off(&tool->trace, &record->trace)) {
+	if (osnoise_trace_is_off(tool, record)) {
 		printf("osnoise hit stop tracing\n");
 		if (params->trace_output) {
 			printf("  Saving trace to %s\n", params->trace_output);
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
index 4403cc4eba30..6d7d0a2d45b4 100644
--- a/tools/tracing/rtla/src/timerlat_hist.c
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -656,6 +656,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t
 
 	timerlat_print_summary(params, trace, data);
 	timerlat_print_stats_all(params, trace, data);
+	osnoise_report_missed_events(tool);
 }
 
 /*
@@ -1100,12 +1101,15 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param
 		}
 	}
 
-	if (params->user_hist) {
-		retval = osnoise_set_workload(tool->context, 0);
-		if (retval) {
-			err_msg("Failed to set OSNOISE_WORKLOAD option\n");
-			goto out_err;
-		}
+	/*
+	* Set workload according to type of thread if the kernel supports it.
+	* On kernels without support, user threads will have already failed
+	* on missing timerlat_fd, and kernel threads do not need it.
+	*/
+	retval = osnoise_set_workload(tool->context, params->kernel_workload);
+	if (retval < -1) {
+		err_msg("Failed to set OSNOISE_WORKLOAD option\n");
+		goto out_err;
 	}
 
 	return 0;
@@ -1146,9 +1150,20 @@ out_err:
 }
 
 static int stop_tracing;
+static struct trace_instance *hist_inst = NULL;
 static void stop_hist(int sig)
 {
+	if (stop_tracing) {
+		/*
+		 * Stop requested twice in a row; abort event processing and
+		 * exit immediately
+		 */
+		tracefs_iterate_stop(hist_inst->inst);
+		return;
+	}
 	stop_tracing = 1;
+	if (hist_inst)
+		trace_instance_stop(hist_inst);
 }
 
 /*
@@ -1195,6 +1210,12 @@ int timerlat_hist_main(int argc, char *argv[])
 	}
 
 	trace = &tool->trace;
+	/*
+	 * Save trace instance into global variable so that SIGINT can stop
+	 * the timerlat tracer.
+	 * Otherwise, rtla could loop indefinitely when overloaded.
+	 */
+	hist_inst = trace;
 
 	retval = enable_timerlat(trace);
 	if (retval) {
@@ -1342,7 +1363,7 @@ int timerlat_hist_main(int argc, char *argv[])
 			goto out_hist;
 		}
 
-		if (trace_is_off(&tool->trace, &record->trace))
+		if (osnoise_trace_is_off(tool, record))
 			break;
 
 		/* is there still any user-threads ? */
@@ -1363,7 +1384,7 @@ int timerlat_hist_main(int argc, char *argv[])
 
 	return_value = 0;
 
-	if (trace_is_off(&tool->trace, &record->trace)) {
+	if (osnoise_trace_is_off(tool, record) && !stop_tracing) {
 		printf("rtla timerlat hit stop tracing\n");
 
 		if (!params->no_aa)
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
index 059b468981e4..05a9403b01d2 100644
--- a/tools/tracing/rtla/src/timerlat_top.c
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -435,6 +435,7 @@ timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *to
 
 	trace_seq_do_printf(trace->seq);
 	trace_seq_reset(trace->seq);
+	osnoise_report_missed_events(top);
 }
 
 /*
@@ -851,12 +852,15 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *
 		}
 	}
 
-	if (params->user_top) {
-		retval = osnoise_set_workload(top->context, 0);
-		if (retval) {
-			err_msg("Failed to set OSNOISE_WORKLOAD option\n");
-			goto out_err;
-		}
+	/*
+	* Set workload according to type of thread if the kernel supports it.
+	* On kernels without support, user threads will have already failed
+	* on missing timerlat_fd, and kernel threads do not need it.
+	*/
+	retval = osnoise_set_workload(top->context, params->kernel_workload);
+	if (retval < -1) {
+		err_msg("Failed to set OSNOISE_WORKLOAD option\n");
+		goto out_err;
 	}
 
 	if (isatty(STDOUT_FILENO) && !params->quiet)
@@ -900,9 +904,20 @@ out_err:
 }
 
 static int stop_tracing;
+static struct trace_instance *top_inst = NULL;
 static void stop_top(int sig)
 {
+	if (stop_tracing) {
+		/*
+		 * Stop requested twice in a row; abort event processing and
+		 * exit immediately
+		 */
+		tracefs_iterate_stop(top_inst->inst);
+		return;
+	}
 	stop_tracing = 1;
+	if (top_inst)
+		trace_instance_stop(top_inst);
 }
 
 /*
@@ -950,6 +965,13 @@ int timerlat_top_main(int argc, char *argv[])
 	}
 
 	trace = &top->trace;
+	/*
+	* Save trace instance into global variable so that SIGINT can stop
+	* the timerlat tracer.
+	* Otherwise, rtla could loop indefinitely when overloaded.
+	*/
+	top_inst = trace;
+
 
 	retval = enable_timerlat(trace);
 	if (retval) {
@@ -1093,7 +1115,7 @@ int timerlat_top_main(int argc, char *argv[])
 	while (!stop_tracing) {
 		sleep(params->sleep_time);
 
-		if (params->aa_only && !trace_is_off(&top->trace, &record->trace))
+		if (params->aa_only && !osnoise_trace_is_off(top, record))
 			continue;
 
 		retval = tracefs_iterate_raw_events(trace->tep,
@@ -1110,7 +1132,7 @@ int timerlat_top_main(int argc, char *argv[])
 		if (!params->quiet)
 			timerlat_print_stats(params, top);
 
-		if (trace_is_off(&top->trace, &record->trace))
+		if (osnoise_trace_is_off(top, record))
 			break;
 
 		/* is there still any user-threads ? */
@@ -1131,7 +1153,7 @@ int timerlat_top_main(int argc, char *argv[])
 
 	return_value = 0;
 
-	if (trace_is_off(&top->trace, &record->trace)) {
+	if (osnoise_trace_is_off(top, record) && !stop_tracing) {
 		printf("rtla timerlat hit stop tracing\n");
 
 		if (!params->no_aa)
diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c
index 170a706248ab..728f5029d533 100644
--- a/tools/tracing/rtla/src/trace.c
+++ b/tools/tracing/rtla/src/trace.c
@@ -118,6 +118,8 @@ collect_registered_events(struct tep_event *event, struct tep_record *record,
 	struct trace_instance *trace = context;
 	struct trace_seq *s = trace->seq;
 
+	trace->processed_events++;
+
 	if (!event->handler)
 		return 0;
 
@@ -127,6 +129,31 @@ collect_registered_events(struct tep_event *event, struct tep_record *record,
 }
 
 /*
+ * collect_missed_events - record number of missed events
+ *
+ * If rtla cannot keep up with events generated by tracer, events are going
+ * to fall out of the ring buffer.
+ * Collect how many events were missed so it can be reported to the user.
+ */
+static int
+collect_missed_events(struct tep_event *event, struct tep_record *record,
+		      int cpu, void *context)
+{
+	struct trace_instance *trace = context;
+
+	if (trace->missed_events == UINT64_MAX)
+		return 0;
+
+	if (record->missed_events > 0)
+		trace->missed_events += record->missed_events;
+	else
+		/* Events missed but no data on how many */
+		trace->missed_events = UINT64_MAX;
+
+	return 0;
+}
+
+/*
  * trace_instance_destroy - destroy and free a rtla trace instance
  */
 void trace_instance_destroy(struct trace_instance *trace)
@@ -181,6 +208,17 @@ int trace_instance_init(struct trace_instance *trace, char *tool_name)
 	 */
 	tracefs_trace_off(trace->inst);
 
+	/*
+	 * Collect the number of events missed due to tracefs buffer
+	 * overflow.
+	 */
+	trace->missed_events = 0;
+	tracefs_follow_missed_events(trace->inst,
+				     collect_missed_events,
+				     trace);
+
+	trace->processed_events = 0;
+
 	return 0;
 
 out_err:
@@ -197,6 +235,14 @@ int trace_instance_start(struct trace_instance *trace)
 }
 
 /*
+ * trace_instance_stop - stop tracing a given rtla instance
+ */
+int trace_instance_stop(struct trace_instance *trace)
+{
+	return tracefs_trace_off(trace->inst);
+}
+
+/*
  * trace_events_free - free a list of trace events
  */
 static void trace_events_free(struct trace_events *events)
@@ -522,25 +568,6 @@ void trace_events_destroy(struct trace_instance *instance,
 	trace_events_free(events);
 }
 
-int trace_is_off(struct trace_instance *tool, struct trace_instance *trace)
-{
-	/*
-	 * The tool instance is always present, it is the one used to collect
-	 * data.
-	 */
-	if (!tracefs_trace_is_on(tool->inst))
-		return 1;
-
-	/*
-	 * The trace instance is only enabled when -t is set. IOW, when the system
-	 * is tracing.
-	 */
-	if (trace && !tracefs_trace_is_on(trace->inst))
-		return 1;
-
-	return 0;
-}
-
 /*
  * trace_set_buffer_size - set the per-cpu tracing buffer size.
  */
diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h
index c7c92dc9a18a..3cd40dd3f06c 100644
--- a/tools/tracing/rtla/src/trace.h
+++ b/tools/tracing/rtla/src/trace.h
@@ -17,10 +17,13 @@ struct trace_instance {
 	struct tracefs_instance		*inst;
 	struct tep_handle		*tep;
 	struct trace_seq		*seq;
+	unsigned long long		missed_events;
+	unsigned long long		processed_events;
 };
 
 int trace_instance_init(struct trace_instance *trace, char *tool_name);
 int trace_instance_start(struct trace_instance *trace);
+int trace_instance_stop(struct trace_instance *trace);
 void trace_instance_destroy(struct trace_instance *trace);
 
 struct trace_seq *get_trace_seq(void);
@@ -47,5 +50,4 @@ int trace_events_enable(struct trace_instance *instance,
 
 int trace_event_add_filter(struct trace_events *event, char *filter);
 int trace_event_add_trigger(struct trace_events *event, char *trigger);
-int trace_is_off(struct trace_instance *tool, struct trace_instance *trace);
 int trace_set_buffer_size(struct trace_instance *trace, int size);
diff --git a/tools/tracing/rtla/tests/engine.sh b/tools/tracing/rtla/tests/engine.sh
new file mode 100644
index 000000000000..64d0446dc28e
--- /dev/null
+++ b/tools/tracing/rtla/tests/engine.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+test_begin() {
+	# Count tests to allow the test harness to double-check if all were
+	# included correctly.
+	ctr=0
+	[ -z "$RTLA" ] && RTLA="./rtla"
+	[ -n "$TEST_COUNT" ] && echo "1..$TEST_COUNT"
+}
+
+check() {
+	# Simple check: run rtla with given arguments and test exit code.
+	# If TEST_COUNT is set, run the test. Otherwise, just count.
+	ctr=$(($ctr + 1))
+	if [ -n "$TEST_COUNT" ]
+	then
+		# Run rtla; in case of failure, include its output as comment
+		# in the test results.
+		result=$(stdbuf -oL $TIMEOUT "$RTLA" $2 2>&1); exitcode=$?
+		if [ $exitcode -eq 0 ]
+		then
+			echo "ok $ctr - $1"
+		else
+			echo "not ok $ctr - $1"
+			# Add rtla output and exit code as comments in case of failure
+			echo "$result" | col -b | while read line; do echo "# $line"; done
+			printf "#\n# exit code %s\n" $exitcode
+		fi
+	fi
+}
+
+set_timeout() {
+	TIMEOUT="timeout -v -k 15s $1"
+}
+
+unset_timeout() {
+	unset TIMEOUT
+}
+
+test_end() {
+	# If running without TEST_COUNT, tests are not actually run, just
+	# counted. In that case, re-run the test with the correct count.
+	[ -z "$TEST_COUNT" ] && TEST_COUNT=$ctr exec bash $0 || true
+}
+
+# Avoid any environmental discrepancies
+export LC_ALL=C
+unset_timeout
diff --git a/tools/tracing/rtla/tests/hwnoise.t b/tools/tracing/rtla/tests/hwnoise.t
new file mode 100644
index 000000000000..bbed17580537
--- /dev/null
+++ b/tools/tracing/rtla/tests/hwnoise.t
@@ -0,0 +1,21 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+source tests/engine.sh
+test_begin
+
+set_timeout 2m
+
+check "verify help page" \
+	"hwnoise --help"
+check "detect noise higher than one microsecond" \
+	"hwnoise -c 0 -T 1 -d 5s -q"
+check "set the automatic trace mode" \
+	"hwnoise -a 5 -d 30s"
+check "set scheduling param to the osnoise tracer threads" \
+	"hwnoise -P F:1 -c 0 -r 900000 -d 1M -q"
+check "stop the trace if a single sample is higher than 1 us" \
+	"hwnoise -s 1 -T 1 -t -d 30s"
+check "enable a trace event trigger" \
+	"hwnoise -t -e osnoise:irq_noise trigger=\"hist:key=desc,duration:sort=desc,duration:vals=hitcount\" -d 1m"
+
+test_end
diff --git a/tools/tracing/rtla/tests/osnoise.t b/tools/tracing/rtla/tests/osnoise.t
new file mode 100644
index 000000000000..86596e547893
--- /dev/null
+++ b/tools/tracing/rtla/tests/osnoise.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+source tests/engine.sh
+test_begin
+
+set_timeout 2m
+
+check "verify help page" \
+	"osnoise --help"
+check "verify the --priority/-P param" \
+	"osnoise top -P F:1 -c 0 -r 900000 -d 1M -q"
+check "verify the --stop/-s param" \
+	"osnoise top -s 30 -T 1 -t"
+check "verify the  --trace param" \
+	"osnoise hist -s 30 -T 1 -t"
+check "verify the --entries/-E param" \
+	"osnoise hist -P F:1 -c 0 -r 900000 -d 1M -b 10 -E 25"
+
+test_end
diff --git a/tools/tracing/rtla/tests/timerlat.t b/tools/tracing/rtla/tests/timerlat.t
new file mode 100644
index 000000000000..e86f40e5749e
--- /dev/null
+++ b/tools/tracing/rtla/tests/timerlat.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+source tests/engine.sh
+test_begin
+
+set_timeout 2m
+
+check "verify help page" \
+	"timerlat --help"
+check "verify -s/--stack" \
+	"timerlat top -s 3 -T 10 -t"
+check "verify -P/--priority" \
+	"timerlat top -P F:1 -c 0 -d 1M -q"
+check "test in nanoseconds" \
+	"timerlat top -i 2 -c 0 -n -d 30s"
+check "set the automatic trace mode" \
+	"timerlat top -a 5 --dump-tasks"
+check "print the auto-analysis if hits the stop tracing condition" \
+	"timerlat top --aa-only 5"
+check "disable auto-analysis" \
+	"timerlat top -s 3 -T 10 -t --no-aa"
+check "verify -c/--cpus" \
+	"timerlat hist -c 0 -d 30s"
+check "hist test in nanoseconds" \
+	"timerlat hist -i 2 -c 0 -n -d 30s"
+
+test_end
diff --git a/tools/verification/dot2/automata.py b/tools/verification/dot2/automata.py
index bdeb98baa8b0..d9a3fe2b74bf 100644
--- a/tools/verification/dot2/automata.py
+++ b/tools/verification/dot2/automata.py
@@ -19,13 +19,14 @@ class Automata:
 
     invalid_state_str = "INVALID_STATE"
 
-    def __init__(self, file_path):
+    def __init__(self, file_path, model_name=None):
         self.__dot_path = file_path
-        self.name = self.__get_model_name()
+        self.name = model_name or self.__get_model_name()
         self.__dot_lines = self.__open_dot()
         self.states, self.initial_state, self.final_states = self.__get_state_variables()
         self.events = self.__get_event_variables()
         self.function = self.__create_matrix()
+        self.events_start, self.events_start_run = self.__store_init_events()
 
     def __get_model_name(self):
         basename = ntpath.basename(self.__dot_path)
@@ -172,3 +173,34 @@ class Automata:
             cursor += 1
 
         return matrix
+
+    def __store_init_events(self):
+        events_start = [False] * len(self.events)
+        events_start_run = [False] * len(self.events)
+        for i, _ in enumerate(self.events):
+            curr_event_will_init = 0
+            curr_event_from_init = False
+            curr_event_used = 0
+            for j, _ in enumerate(self.states):
+                if self.function[j][i] != self.invalid_state_str:
+                    curr_event_used += 1
+                if self.function[j][i] == self.initial_state:
+                    curr_event_will_init += 1
+            if self.function[0][i] != self.invalid_state_str:
+                curr_event_from_init = True
+            # this event always leads to init
+            if curr_event_will_init and curr_event_used == curr_event_will_init:
+                events_start[i] = True
+            # this event is only called from init
+            if curr_event_from_init and curr_event_used == 1:
+                events_start_run[i] = True
+        return events_start, events_start_run
+
+    def is_start_event(self, event):
+        return self.events_start[self.events.index(event)]
+
+    def is_start_run_event(self, event):
+        # prefer handle_start_event if there
+        if any(self.events_start):
+            return False
+        return self.events_start_run[self.events.index(event)]
diff --git a/tools/verification/dot2/dot2c.py b/tools/verification/dot2/dot2c.py
index 87d8a1e1470c..fa2816ac7b61 100644
--- a/tools/verification/dot2/dot2c.py
+++ b/tools/verification/dot2/dot2c.py
@@ -22,8 +22,8 @@ class Dot2c(Automata):
     struct_automaton_def = "automaton"
     var_automaton_def = "aut"
 
-    def __init__(self, file_path):
-        super().__init__(file_path)
+    def __init__(self, file_path, model_name=None):
+        super().__init__(file_path, model_name)
         self.line_length = 100
 
     def __buff_to_string(self, buff):
diff --git a/tools/verification/dot2/dot2k b/tools/verification/dot2/dot2k
index d4d7e52d549e..559ba191a1f6 100644
--- a/tools/verification/dot2/dot2k
+++ b/tools/verification/dot2/dot2k
@@ -21,25 +21,24 @@ if __name__ == '__main__':
     parser.add_argument('-t', "--monitor_type", dest="monitor_type", required=True)
     parser.add_argument('-n', "--model_name", dest="model_name", required=False)
     parser.add_argument("-D", "--description", dest="description", required=False)
+    parser.add_argument("-a", "--auto_patch", dest="auto_patch",
+                        action="store_true", required=False,
+                        help="Patch the kernel in place")
     params = parser.parse_args()
 
     print("Opening and parsing the dot file %s" % params.dot_file)
     try:
-        monitor=dot2k(params.dot_file, params.monitor_type)
+        monitor=dot2k(params.dot_file, params.monitor_type, vars(params))
     except Exception as e:
         print('Error: '+ str(e))
         print("Sorry : :-(")
         sys.exit(1)
 
-    # easier than using argparse action.
-    if params.model_name != None:
-        print(params.model_name)
-
     print("Writing the monitor into the directory %s" % monitor.name)
     monitor.print_files()
     print("Almost done, checklist")
     print("  - Edit the %s/%s.c to add the instrumentation" % (monitor.name, monitor.name))
-    print("  - Edit include/trace/events/rv.h to add the tracepoint entry")
-    print("  - Move it to the kernel's monitor directory")
-    print("  - Edit kernel/trace/rv/Makefile")
-    print("  - Edit kernel/trace/rv/Kconfig")
+    print(monitor.fill_tracepoint_tooltip())
+    print(monitor.fill_makefile_tooltip())
+    print(monitor.fill_kconfig_tooltip())
+    print(monitor.fill_monitor_tooltip())
diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py
index 016550fccf1f..7547eb290b7d 100644
--- a/tools/verification/dot2/dot2k.py
+++ b/tools/verification/dot2/dot2k.py
@@ -14,50 +14,83 @@ import os
 
 class dot2k(Dot2c):
     monitor_types = { "global" : 1, "per_cpu" : 2, "per_task" : 3 }
-    monitor_templates_dir = "dot2k/rv_templates/"
+    monitor_templates_dir = "dot2/dot2k_templates/"
+    rv_dir = "kernel/trace/rv"
     monitor_type = "per_cpu"
 
-    def __init__(self, file_path, MonitorType):
-        super().__init__(file_path)
+    def __init__(self, file_path, MonitorType, extra_params={}):
+        super().__init__(file_path, extra_params.get("model_name"))
 
         self.monitor_type = self.monitor_types.get(MonitorType)
-        if self.monitor_type == None:
-            raise Exception("Unknown monitor type: %s" % MonitorType)
+        if self.monitor_type is None:
+            raise ValueError("Unknown monitor type: %s" % MonitorType)
 
         self.monitor_type = MonitorType
         self.__fill_rv_templates_dir()
-        self.main_c = self.__open_file(self.monitor_templates_dir + "main_" + MonitorType + ".c")
+        self.main_c = self.__read_file(self.monitor_templates_dir + "main.c")
+        self.trace_h = self.__read_file(self.monitor_templates_dir + "trace.h")
+        self.kconfig = self.__read_file(self.monitor_templates_dir + "Kconfig")
         self.enum_suffix = "_%s" % self.name
+        self.description = extra_params.get("description", self.name) or "auto-generated"
+        self.auto_patch = extra_params.get("auto_patch")
+        if self.auto_patch:
+            self.__fill_rv_kernel_dir()
 
     def __fill_rv_templates_dir(self):
 
-        if os.path.exists(self.monitor_templates_dir) == True:
+        if os.path.exists(self.monitor_templates_dir):
             return
 
         if platform.system() != "Linux":
-            raise Exception("I can only run on Linux.")
+            raise OSError("I can only run on Linux.")
 
         kernel_path = "/lib/modules/%s/build/tools/verification/dot2/dot2k_templates/" % (platform.release())
 
-        if os.path.exists(kernel_path) == True:
+        if os.path.exists(kernel_path):
             self.monitor_templates_dir = kernel_path
             return
 
-        if os.path.exists("/usr/share/dot2/dot2k_templates/") == True:
+        if os.path.exists("/usr/share/dot2/dot2k_templates/"):
             self.monitor_templates_dir = "/usr/share/dot2/dot2k_templates/"
             return
 
-        raise Exception("Could not find the template directory, do you have the kernel source installed?")
+        raise FileNotFoundError("Could not find the template directory, do you have the kernel source installed?")
 
+    def __fill_rv_kernel_dir(self):
 
-    def __open_file(self, path):
+        # first try if we are running in the kernel tree root
+        if os.path.exists(self.rv_dir):
+            return
+
+        # offset if we are running inside the kernel tree from verification/dot2
+        kernel_path = os.path.join("../..", self.rv_dir)
+
+        if os.path.exists(kernel_path):
+            self.rv_dir = kernel_path
+            return
+
+        if platform.system() != "Linux":
+            raise OSError("I can only run on Linux.")
+
+        kernel_path = os.path.join("/lib/modules/%s/build" % platform.release(), self.rv_dir)
+
+        # if the current kernel is from a distro this may not be a full kernel tree
+        # verify that one of the files we are going to modify is available
+        if os.path.exists(os.path.join(kernel_path, "rv_trace.h")):
+            self.rv_dir = kernel_path
+            return
+
+        raise FileNotFoundError("Could not find the rv directory, do you have the kernel source installed?")
+
+    def __read_file(self, path):
         try:
-            fd = open(path)
+            fd = open(path, 'r')
         except OSError:
             raise Exception("Cannot open the file: %s" % path)
 
         content = fd.read()
 
+        fd.close()
         return content
 
     def __buff_to_string(self, buff):
@@ -69,16 +102,26 @@ class dot2k(Dot2c):
         # cut off the last \n
         return string[:-1]
 
+    def fill_monitor_type(self):
+        return self.monitor_type.upper()
+
     def fill_tracepoint_handlers_skel(self):
         buff = []
         for event in self.events:
             buff.append("static void handle_%s(void *data, /* XXX: fill header */)" % event)
             buff.append("{")
+            handle = "handle_event"
+            if self.is_start_event(event):
+                buff.append("\t/* XXX: validate that this event always leads to the initial state */")
+                handle = "handle_start_event"
+            elif self.is_start_run_event(event):
+                buff.append("\t/* XXX: validate that this event is only valid in the initial state */")
+                handle = "handle_start_run_event"
             if self.monitor_type == "per_task":
                 buff.append("\tstruct task_struct *p = /* XXX: how do I get p? */;");
-                buff.append("\tda_handle_event_%s(p, %s%s);" % (self.name, event, self.enum_suffix));
+                buff.append("\tda_%s_%s(p, %s%s);" % (handle, self.name, event, self.enum_suffix));
             else:
-                buff.append("\tda_handle_event_%s(%s%s);" % (self.name, event, self.enum_suffix));
+                buff.append("\tda_%s_%s(%s%s);" % (handle, self.name, event, self.enum_suffix));
             buff.append("}")
             buff.append("")
         return self.__buff_to_string(buff)
@@ -97,18 +140,21 @@ class dot2k(Dot2c):
 
     def fill_main_c(self):
         main_c = self.main_c
+        monitor_type = self.fill_monitor_type()
         min_type = self.get_minimun_type()
-        nr_events = self.events.__len__()
+        nr_events = len(self.events)
         tracepoint_handlers = self.fill_tracepoint_handlers_skel()
         tracepoint_attach = self.fill_tracepoint_attach_probe()
         tracepoint_detach = self.fill_tracepoint_detach_helper()
 
-        main_c = main_c.replace("MIN_TYPE", min_type)
-        main_c = main_c.replace("MODEL_NAME", self.name)
-        main_c = main_c.replace("NR_EVENTS", str(nr_events))
-        main_c = main_c.replace("TRACEPOINT_HANDLERS_SKEL", tracepoint_handlers)
-        main_c = main_c.replace("TRACEPOINT_ATTACH", tracepoint_attach)
-        main_c = main_c.replace("TRACEPOINT_DETACH", tracepoint_detach)
+        main_c = main_c.replace("%%MONITOR_TYPE%%", monitor_type)
+        main_c = main_c.replace("%%MIN_TYPE%%", min_type)
+        main_c = main_c.replace("%%MODEL_NAME%%", self.name)
+        main_c = main_c.replace("%%NR_EVENTS%%", str(nr_events))
+        main_c = main_c.replace("%%TRACEPOINT_HANDLERS_SKEL%%", tracepoint_handlers)
+        main_c = main_c.replace("%%TRACEPOINT_ATTACH%%", tracepoint_attach)
+        main_c = main_c.replace("%%TRACEPOINT_DETACH%%", tracepoint_detach)
+        main_c = main_c.replace("%%DESCRIPTION%%", self.description)
 
         return main_c
 
@@ -137,31 +183,142 @@ class dot2k(Dot2c):
 
         return self.__buff_to_string(buff)
 
+    def fill_monitor_class_type(self):
+        if self.monitor_type == "per_task":
+            return "DA_MON_EVENTS_ID"
+        return "DA_MON_EVENTS_IMPLICIT"
+
+    def fill_monitor_class(self):
+        if self.monitor_type == "per_task":
+            return "da_monitor_id"
+        return "da_monitor"
+
+    def fill_tracepoint_args_skel(self, tp_type):
+        buff = []
+        tp_args_event = [
+                ("char *", "state"),
+                ("char *", "event"),
+                ("char *", "next_state"),
+                ("bool ",  "final_state"),
+                ]
+        tp_args_error = [
+                ("char *", "state"),
+                ("char *", "event"),
+                ]
+        tp_args_id = ("int ", "id")
+        tp_args = tp_args_event if tp_type == "event" else tp_args_error
+        if self.monitor_type == "per_task":
+            tp_args.insert(0, tp_args_id)
+        tp_proto_c = ", ".join([a+b for a,b in tp_args])
+        tp_args_c = ", ".join([b for a,b in tp_args])
+        buff.append("	     TP_PROTO(%s)," % tp_proto_c)
+        buff.append("	     TP_ARGS(%s)" % tp_args_c)
+        return self.__buff_to_string(buff)
+
+    def fill_trace_h(self):
+        trace_h = self.trace_h
+        monitor_class = self.fill_monitor_class()
+        monitor_class_type = self.fill_monitor_class_type()
+        tracepoint_args_skel_event = self.fill_tracepoint_args_skel("event")
+        tracepoint_args_skel_error = self.fill_tracepoint_args_skel("error")
+        trace_h = trace_h.replace("%%MODEL_NAME%%", self.name)
+        trace_h = trace_h.replace("%%MODEL_NAME_UP%%", self.name.upper())
+        trace_h = trace_h.replace("%%MONITOR_CLASS%%", monitor_class)
+        trace_h = trace_h.replace("%%MONITOR_CLASS_TYPE%%", monitor_class_type)
+        trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_EVENT%%", tracepoint_args_skel_event)
+        trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_ERROR%%", tracepoint_args_skel_error)
+        return trace_h
+
+    def fill_kconfig(self):
+        kconfig = self.kconfig
+        monitor_class_type = self.fill_monitor_class_type()
+        kconfig = kconfig.replace("%%MODEL_NAME%%", self.name)
+        kconfig = kconfig.replace("%%MODEL_NAME_UP%%", self.name.upper())
+        kconfig = kconfig.replace("%%MONITOR_CLASS_TYPE%%", monitor_class_type)
+        kconfig = kconfig.replace("%%DESCRIPTION%%", self.description)
+        return kconfig
+
+    def __patch_file(self, file, marker, line):
+        file_to_patch = os.path.join(self.rv_dir, file)
+        content = self.__read_file(file_to_patch)
+        content = content.replace(marker, line + "\n" + marker)
+        self.__write_file(file_to_patch, content)
+
+    def fill_tracepoint_tooltip(self):
+        monitor_class_type = self.fill_monitor_class_type()
+        if self.auto_patch:
+            self.__patch_file("rv_trace.h",
+                            "// Add new monitors based on CONFIG_%s here" % monitor_class_type,
+                            "#include <monitors/%s/%s_trace.h>" % (self.name, self.name))
+            return "  - Patching %s/rv_trace.h, double check the result" % self.rv_dir
+
+        return """  - Edit %s/rv_trace.h:
+Add this line where other tracepoints are included and %s is defined:
+#include <monitors/%s/%s_trace.h>
+""" % (self.rv_dir, monitor_class_type, self.name, self.name)
+
+    def fill_kconfig_tooltip(self):
+        if self.auto_patch:
+            self.__patch_file("Kconfig",
+                            "# Add new monitors here",
+                            "source \"kernel/trace/rv/monitors/%s/Kconfig\"" % (self.name))
+            return "  - Patching %s/Kconfig, double check the result" % self.rv_dir
+
+        return """  - Edit %s/Kconfig:
+Add this line where other monitors are included:
+source \"kernel/trace/rv/monitors/%s/Kconfig\"
+""" % (self.rv_dir, self.name)
+
+    def fill_makefile_tooltip(self):
+        name = self.name
+        name_up = name.upper()
+        if self.auto_patch:
+            self.__patch_file("Makefile",
+                            "# Add new monitors here",
+                            "obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o" % (name_up, name, name))
+            return "  - Patching %s/Makefile, double check the result" % self.rv_dir
+
+        return """  - Edit %s/Makefile:
+Add this line where other monitors are included:
+obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o
+""" % (self.rv_dir, name_up, name, name)
+
+    def fill_monitor_tooltip(self):
+        if self.auto_patch:
+            return "  - Monitor created in %s/monitors/%s" % (self.rv_dir, self. name)
+        return "  - Move %s/ to the kernel's monitor directory (%s/monitors)" % (self.name, self.rv_dir)
+
     def __create_directory(self):
+        path = self.name
+        if self.auto_patch:
+            path = os.path.join(self.rv_dir, "monitors", path)
         try:
-            os.mkdir(self.name)
+            os.mkdir(path)
         except FileExistsError:
             return
         except:
             print("Fail creating the output dir: %s" % self.name)
 
-    def __create_file(self, file_name, content):
-        path = "%s/%s" % (self.name, file_name)
+    def __write_file(self, file_name, content):
         try:
-            file = open(path, 'w')
-        except FileExistsError:
-            return
+            file = open(file_name, 'w')
         except:
-            print("Fail creating file: %s" % path)
+            print("Fail writing to file: %s" % file_name)
 
         file.write(content)
 
         file.close()
 
+    def __create_file(self, file_name, content):
+        path = "%s/%s" % (self.name, file_name)
+        if self.auto_patch:
+            path = os.path.join(self.rv_dir, "monitors", path)
+        self.__write_file(path, content)
+
     def __get_main_name(self):
         path = "%s/%s" % (self.name, "main.c")
-        if os.path.exists(path) == False:
-           return "main.c"
+        if not os.path.exists(path):
+            return "main.c"
         return "__main.c"
 
     def print_files(self):
@@ -175,3 +332,10 @@ class dot2k(Dot2c):
 
         path = "%s.h" % self.name
         self.__create_file(path, model_h)
+
+        trace_h = self.fill_trace_h()
+        path = "%s_trace.h" % self.name
+        self.__create_file(path, trace_h)
+
+        kconfig = self.fill_kconfig()
+        self.__create_file("Kconfig", kconfig)
diff --git a/tools/verification/dot2/dot2k_templates/Kconfig b/tools/verification/dot2/dot2k_templates/Kconfig
new file mode 100644
index 000000000000..90cdc1e9379e
--- /dev/null
+++ b/tools/verification/dot2/dot2k_templates/Kconfig
@@ -0,0 +1,6 @@
+config RV_MON_%%MODEL_NAME_UP%%
+	depends on RV
+	select %%MONITOR_CLASS_TYPE%%
+	bool "%%MODEL_NAME%% monitor"
+	help
+	  %%DESCRIPTION%%
diff --git a/tools/verification/dot2/dot2k_templates/main.c b/tools/verification/dot2/dot2k_templates/main.c
new file mode 100644
index 000000000000..9605ca994416
--- /dev/null
+++ b/tools/verification/dot2/dot2k_templates/main.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
+#include <linux/tracepoint.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rv.h>
+#include <rv/instrumentation.h>
+#include <rv/da_monitor.h>
+
+#define MODULE_NAME "%%MODEL_NAME%%"
+
+/*
+ * XXX: include required tracepoint headers, e.g.,
+ * #include <trace/events/sched.h>
+ */
+#include <rv_trace.h>
+
+/*
+ * This is the self-generated part of the monitor. Generally, there is no need
+ * to touch this section.
+ */
+#include "%%MODEL_NAME%%.h"
+
+/*
+ * Declare the deterministic automata monitor.
+ *
+ * The rv monitor reference is needed for the monitor declaration.
+ */
+static struct rv_monitor rv_%%MODEL_NAME%%;
+DECLARE_DA_MON_%%MONITOR_TYPE%%(%%MODEL_NAME%%, %%MIN_TYPE%%);
+
+/*
+ * This is the instrumentation part of the monitor.
+ *
+ * This is the section where manual work is required. Here the kernel events
+ * are translated into model's event.
+ *
+ */
+%%TRACEPOINT_HANDLERS_SKEL%%
+static int enable_%%MODEL_NAME%%(void)
+{
+	int retval;
+
+	retval = da_monitor_init_%%MODEL_NAME%%();
+	if (retval)
+		return retval;
+
+%%TRACEPOINT_ATTACH%%
+
+	return 0;
+}
+
+static void disable_%%MODEL_NAME%%(void)
+{
+	rv_%%MODEL_NAME%%.enabled = 0;
+
+%%TRACEPOINT_DETACH%%
+
+	da_monitor_destroy_%%MODEL_NAME%%();
+}
+
+/*
+ * This is the monitor register section.
+ */
+static struct rv_monitor rv_%%MODEL_NAME%% = {
+	.name = "%%MODEL_NAME%%",
+	.description = "%%DESCRIPTION%%",
+	.enable = enable_%%MODEL_NAME%%,
+	.disable = disable_%%MODEL_NAME%%,
+	.reset = da_monitor_reset_all_%%MODEL_NAME%%,
+	.enabled = 0,
+};
+
+static int __init register_%%MODEL_NAME%%(void)
+{
+	rv_register_monitor(&rv_%%MODEL_NAME%%);
+	return 0;
+}
+
+static void __exit unregister_%%MODEL_NAME%%(void)
+{
+	rv_unregister_monitor(&rv_%%MODEL_NAME%%);
+}
+
+module_init(register_%%MODEL_NAME%%);
+module_exit(unregister_%%MODEL_NAME%%);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("dot2k: auto-generated");
+MODULE_DESCRIPTION("%%MODEL_NAME%%: %%DESCRIPTION%%");
diff --git a/tools/verification/dot2/dot2k_templates/main_global.c b/tools/verification/dot2/dot2k_templates/main_global.c
deleted file mode 100644
index a5658bfb9044..000000000000
--- a/tools/verification/dot2/dot2k_templates/main_global.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/ftrace.h>
-#include <linux/tracepoint.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/rv.h>
-#include <rv/instrumentation.h>
-#include <rv/da_monitor.h>
-
-#define MODULE_NAME "MODEL_NAME"
-
-/*
- * XXX: include required tracepoint headers, e.g.,
- * #include <trace/events/sched.h>
- */
-#include <trace/events/rv.h>
-
-/*
- * This is the self-generated part of the monitor. Generally, there is no need
- * to touch this section.
- */
-#include "MODEL_NAME.h"
-
-/*
- * Declare the deterministic automata monitor.
- *
- * The rv monitor reference is needed for the monitor declaration.
- */
-static struct rv_monitor rv_MODEL_NAME;
-DECLARE_DA_MON_GLOBAL(MODEL_NAME, MIN_TYPE);
-
-/*
- * This is the instrumentation part of the monitor.
- *
- * This is the section where manual work is required. Here the kernel events
- * are translated into model's event.
- *
- */
-TRACEPOINT_HANDLERS_SKEL
-static int enable_MODEL_NAME(void)
-{
-	int retval;
-
-	retval = da_monitor_init_MODEL_NAME();
-	if (retval)
-		return retval;
-
-TRACEPOINT_ATTACH
-
-	return 0;
-}
-
-static void disable_MODEL_NAME(void)
-{
-	rv_MODEL_NAME.enabled = 0;
-
-TRACEPOINT_DETACH
-
-	da_monitor_destroy_MODEL_NAME();
-}
-
-/*
- * This is the monitor register section.
- */
-static struct rv_monitor rv_MODEL_NAME = {
-	.name = "MODEL_NAME",
-	.description = "auto-generated MODEL_NAME",
-	.enable = enable_MODEL_NAME,
-	.disable = disable_MODEL_NAME,
-	.reset = da_monitor_reset_all_MODEL_NAME,
-	.enabled = 0,
-};
-
-static int __init register_MODEL_NAME(void)
-{
-	rv_register_monitor(&rv_MODEL_NAME);
-	return 0;
-}
-
-static void __exit unregister_MODEL_NAME(void)
-{
-	rv_unregister_monitor(&rv_MODEL_NAME);
-}
-
-module_init(register_MODEL_NAME);
-module_exit(unregister_MODEL_NAME);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("dot2k: auto-generated");
-MODULE_DESCRIPTION("MODEL_NAME");
diff --git a/tools/verification/dot2/dot2k_templates/main_per_cpu.c b/tools/verification/dot2/dot2k_templates/main_per_cpu.c
deleted file mode 100644
index 03539a97633f..000000000000
--- a/tools/verification/dot2/dot2k_templates/main_per_cpu.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/ftrace.h>
-#include <linux/tracepoint.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/rv.h>
-#include <rv/instrumentation.h>
-#include <rv/da_monitor.h>
-
-#define MODULE_NAME "MODEL_NAME"
-
-/*
- * XXX: include required tracepoint headers, e.g.,
- * #include <linux/trace/events/sched.h>
- */
-#include <trace/events/rv.h>
-
-/*
- * This is the self-generated part of the monitor. Generally, there is no need
- * to touch this section.
- */
-#include "MODEL_NAME.h"
-
-/*
- * Declare the deterministic automata monitor.
- *
- * The rv monitor reference is needed for the monitor declaration.
- */
-static struct rv_monitor rv_MODEL_NAME;
-DECLARE_DA_MON_PER_CPU(MODEL_NAME, MIN_TYPE);
-
-/*
- * This is the instrumentation part of the monitor.
- *
- * This is the section where manual work is required. Here the kernel events
- * are translated into model's event.
- *
- */
-TRACEPOINT_HANDLERS_SKEL
-static int enable_MODEL_NAME(void)
-{
-	int retval;
-
-	retval = da_monitor_init_MODEL_NAME();
-	if (retval)
-		return retval;
-
-TRACEPOINT_ATTACH
-
-	return 0;
-}
-
-static void disable_MODEL_NAME(void)
-{
-	rv_MODEL_NAME.enabled = 0;
-
-TRACEPOINT_DETACH
-
-	da_monitor_destroy_MODEL_NAME();
-}
-
-/*
- * This is the monitor register section.
- */
-static struct rv_monitor rv_MODEL_NAME = {
-	.name = "MODEL_NAME",
-	.description = "auto-generated MODEL_NAME",
-	.enable = enable_MODEL_NAME,
-	.disable = disable_MODEL_NAME,
-	.reset = da_monitor_reset_all_MODEL_NAME,
-	.enabled = 0,
-};
-
-static int __init register_MODEL_NAME(void)
-{
-	rv_register_monitor(&rv_MODEL_NAME);
-	return 0;
-}
-
-static void __exit unregister_MODEL_NAME(void)
-{
-	rv_unregister_monitor(&rv_MODEL_NAME);
-}
-
-module_init(register_MODEL_NAME);
-module_exit(unregister_MODEL_NAME);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("dot2k: auto-generated");
-MODULE_DESCRIPTION("MODEL_NAME");
diff --git a/tools/verification/dot2/dot2k_templates/main_per_task.c b/tools/verification/dot2/dot2k_templates/main_per_task.c
deleted file mode 100644
index ffd92af87a86..000000000000
--- a/tools/verification/dot2/dot2k_templates/main_per_task.c
+++ /dev/null
@@ -1,91 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/ftrace.h>
-#include <linux/tracepoint.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/rv.h>
-#include <rv/instrumentation.h>
-#include <rv/da_monitor.h>
-
-#define MODULE_NAME "MODEL_NAME"
-
-/*
- * XXX: include required tracepoint headers, e.g.,
- * #include <linux/trace/events/sched.h>
- */
-#include <trace/events/rv.h>
-
-/*
- * This is the self-generated part of the monitor. Generally, there is no need
- * to touch this section.
- */
-#include "MODEL_NAME.h"
-
-/*
- * Declare the deterministic automata monitor.
- *
- * The rv monitor reference is needed for the monitor declaration.
- */
-static struct rv_monitor rv_MODEL_NAME;
-DECLARE_DA_MON_PER_TASK(MODEL_NAME, MIN_TYPE);
-
-/*
- * This is the instrumentation part of the monitor.
- *
- * This is the section where manual work is required. Here the kernel events
- * are translated into model's event.
- *
- */
-TRACEPOINT_HANDLERS_SKEL
-static int enable_MODEL_NAME(void)
-{
-	int retval;
-
-	retval = da_monitor_init_MODEL_NAME();
-	if (retval)
-		return retval;
-
-TRACEPOINT_ATTACH
-
-	return 0;
-}
-
-static void disable_MODEL_NAME(void)
-{
-	rv_MODEL_NAME.enabled = 0;
-
-TRACEPOINT_DETACH
-
-	da_monitor_destroy_MODEL_NAME();
-}
-
-/*
- * This is the monitor register section.
- */
-static struct rv_monitor rv_MODEL_NAME = {
-	.name = "MODEL_NAME",
-	.description = "auto-generated MODEL_NAME",
-	.enable = enable_MODEL_NAME,
-	.disable = disable_MODEL_NAME,
-	.reset = da_monitor_reset_all_MODEL_NAME,
-	.enabled = 0,
-};
-
-static int __init register_MODEL_NAME(void)
-{
-	rv_register_monitor(&rv_MODEL_NAME);
-	return 0;
-}
-
-static void __exit unregister_MODEL_NAME(void)
-{
-	rv_unregister_monitor(&rv_MODEL_NAME);
-}
-
-module_init(register_MODEL_NAME);
-module_exit(unregister_MODEL_NAME);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("dot2k: auto-generated");
-MODULE_DESCRIPTION("MODEL_NAME");
diff --git a/tools/verification/dot2/dot2k_templates/trace.h b/tools/verification/dot2/dot2k_templates/trace.h
new file mode 100644
index 000000000000..87d3a1308926
--- /dev/null
+++ b/tools/verification/dot2/dot2k_templates/trace.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_%%MODEL_NAME_UP%%
+DEFINE_EVENT(event_%%MONITOR_CLASS%%, event_%%MODEL_NAME%%,
+%%TRACEPOINT_ARGS_SKEL_EVENT%%);
+
+DEFINE_EVENT(error_%%MONITOR_CLASS%%, error_%%MODEL_NAME%%,
+%%TRACEPOINT_ARGS_SKEL_ERROR%%);
+#endif /* CONFIG_RV_MON_%%MODEL_NAME_UP%% */
diff --git a/tools/verification/rv/Makefile b/tools/verification/rv/Makefile
index 411d62b3d8eb..5b898360ba48 100644
--- a/tools/verification/rv/Makefile
+++ b/tools/verification/rv/Makefile
@@ -35,12 +35,6 @@ FEATURE_TESTS	+= libtracefs
 FEATURE_DISPLAY	:= libtraceevent
 FEATURE_DISPLAY	+= libtracefs
 
-ifeq ($(V),1)
-  Q		=
-else
-  Q		= @
-endif
-
 all: $(RV)
 
 include $(srctree)/tools/build/Makefile.include