454 files changed, 33977 insertions, 4450 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 28a715a8ef2b..97ad4d551d44 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -4,11 +4,10 @@ bind_timewait
 bind_wildcard
 busy_poller
 cmsg_sender
-diag_uid
 epoll_busy_poll
 fin_ack_lat
-gro
 hwtstamp_config
+icmp_rfc4884
 io_uring_zerocopy_tx
 ioam6_parser
 ip_defrag
@@ -16,11 +15,12 @@ ip_local_port_range
 ipsec
 ipv6_flowlabel
 ipv6_flowlabel_mgr
+ipv6_fragmentation
 log.txt
-msg_oob
 msg_zerocopy
 netlink-dumps
 nettest
+proc_net_pktgen
 psock_fanout
 psock_snd
 psock_tpacket
@@ -33,23 +33,24 @@ reuseport_bpf_numa
 reuseport_dualstack
 rxtimestamp
 sctp_hello
-scm_pidfd
-scm_rights
 sk_bind_sendto_listen
 sk_connect_zero_addr
 sk_so_peek_off
+skf_net_off
 socket
 so_incoming_cpu
 so_netns_cookie
 so_txtime
+so_rcv_listener
 stress_reuseport_listen
 tap
 tcp_fastopen_backup_key
 tcp_inq
 tcp_mmap
+tcp_port_share
+tfo
 timestamping
 tls
-toeplitz
 tools
 tun
 txring_overwrite
@@ -57,4 +58,3 @@ txtimestamp
 udpgso
 udpgso_bench_rx
 udpgso_bench_tx
-unix_connect
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 73ee88d6b043..a275ed584026 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -1,112 +1,214 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for net selftests
 
-CFLAGS +=  -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES)
 # Additional include paths needed by kselftest.h
 CFLAGS += -I../
 
-TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
-	      rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
-TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
-TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
-TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
-TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
-TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh
-TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh
-TEST_PROGS += route_localnet.sh
-TEST_PROGS += reuseaddr_ports_exhausted.sh
-TEST_PROGS += txtimestamp.sh
-TEST_PROGS += vrf-xfrm-tests.sh
-TEST_PROGS += rxtimestamp.sh
-TEST_PROGS += drop_monitor_tests.sh
-TEST_PROGS += vrf_route_leaking.sh
-TEST_PROGS += bareudp.sh
-TEST_PROGS += amt.sh
-TEST_PROGS += unicast_extensions.sh
-TEST_PROGS += udpgro_fwd.sh
-TEST_PROGS += udpgro_frglist.sh
-TEST_PROGS += veth.sh
-TEST_PROGS += ioam6.sh
-TEST_PROGS += gro.sh
-TEST_PROGS += gre_gso.sh
-TEST_PROGS += cmsg_so_mark.sh
-TEST_PROGS += cmsg_so_priority.sh
-TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
-TEST_PROGS += netns-name.sh
-TEST_PROGS += nl_netdev.py
-TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
-TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
-TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
-TEST_PROGS += srv6_hencap_red_l3vpn_test.sh
-TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh
-TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh
-TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh
-TEST_PROGS += srv6_end_flavors_test.sh
-TEST_PROGS += srv6_end_dx4_netfilter_test.sh
-TEST_PROGS += srv6_end_dx6_netfilter_test.sh
-TEST_PROGS += vrf_strict_mode_test.sh
-TEST_PROGS += arp_ndisc_evict_nocarrier.sh
-TEST_PROGS += ndisc_unsolicited_na_test.sh
-TEST_PROGS += arp_ndisc_untracked_subnets.sh
-TEST_PROGS += stress_reuseport_listen.sh
-TEST_PROGS += l2_tos_ttl_inherit.sh
-TEST_PROGS += bind_bhash.sh
-TEST_PROGS += ip_local_port_range.sh
-TEST_PROGS += rps_default_mask.sh
-TEST_PROGS += big_tcp.sh
-TEST_PROGS += netns-sysctl.sh
-TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh
-TEST_GEN_FILES =  socket nettest
-TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
-TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
-TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
-TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie
-TEST_GEN_FILES += tcp_fastopen_backup_key
-TEST_GEN_FILES += fin_ack_lat
-TEST_GEN_FILES += reuseaddr_ports_exhausted
-TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
-TEST_GEN_FILES += ipsec
-TEST_GEN_FILES += ioam6_parser
-TEST_GEN_FILES += gro
-TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll
-TEST_GEN_FILES += toeplitz
-TEST_GEN_FILES += cmsg_sender
-TEST_GEN_FILES += stress_reuseport_listen
-TEST_PROGS += test_vxlan_vnifiltering.sh
-TEST_GEN_FILES += io_uring_zerocopy_tx
-TEST_PROGS += io_uring_zerocopy_tx.sh
-TEST_GEN_FILES += bind_bhash
-TEST_GEN_PROGS += sk_bind_sendto_listen
-TEST_GEN_PROGS += sk_connect_zero_addr
-TEST_GEN_PROGS += sk_so_peek_off
-TEST_PROGS += test_ingress_egress_chaining.sh
-TEST_GEN_PROGS += so_incoming_cpu
-TEST_PROGS += sctp_vrf.sh
-TEST_GEN_FILES += sctp_hello
-TEST_GEN_FILES += ip_local_port_range
-TEST_GEN_PROGS += bind_wildcard
-TEST_GEN_PROGS += bind_timewait
-TEST_PROGS += test_vxlan_mdb.sh
-TEST_PROGS += test_bridge_neigh_suppress.sh
-TEST_PROGS += test_vxlan_nolocalbypass.sh
-TEST_PROGS += test_bridge_backup_port.sh
-TEST_PROGS += fdb_flush.sh fdb_notify.sh
-TEST_PROGS += fq_band_pktlimit.sh
-TEST_PROGS += vlan_hw_filter.sh
-TEST_PROGS += vlan_bridge_binding.sh
-TEST_PROGS += bpf_offload.py
-TEST_PROGS += ipv6_route_update_soft_lockup.sh
-TEST_PROGS += busy_poll_test.sh
+TEST_PROGS := \
+	altnames.sh \
+	amt.sh \
+	arp_ndisc_evict_nocarrier.sh \
+	arp_ndisc_untracked_subnets.sh \
+	bareudp.sh \
+	big_tcp.sh \
+	bind_bhash.sh \
+	bpf_offload.py \
+	bridge_stp_mode.sh \
+	bridge_vlan_dump.sh \
+	broadcast_ether_dst.sh \
+	broadcast_pmtu.sh \
+	busy_poll_test.sh \
+	cmsg_ip.sh \
+	cmsg_so_mark.sh \
+	cmsg_so_priority.sh \
+	cmsg_time.sh \
+	double_udp_encap.sh \
+	drop_monitor_tests.sh \
+	fcnal-ipv4.sh \
+	fcnal-ipv6.sh \
+	fcnal-other.sh \
+	fdb_flush.sh \
+	fdb_notify.sh \
+	fib-onlink-tests.sh \
+	fib_nexthop_multiprefix.sh \
+	fib_nexthop_nongw.sh \
+	fib_nexthops.sh \
+	fib_rule_tests.sh \
+	fib_tests.sh \
+	fin_ack_lat.sh \
+	fq_band_pktlimit.sh \
+	gre_gso.sh \
+	gre_ipv6_lladdr.sh \
+	icmp.sh \
+	icmp_redirect.sh \
+	io_uring_zerocopy_tx.sh \
+	ioam6.sh \
+	ip6_gre_headroom.sh \
+	ip6_tunnel.sh \
+	ip_defrag.sh \
+	ip_local_port_range.sh \
+	ipv6_flowlabel.sh \
+	ipv6_force_forwarding.sh \
+	ipv6_route_update_soft_lockup.sh \
+	ipvtap_test.sh \
+	l2_tos_ttl_inherit.sh \
+	l2tp.sh \
+	link_netns.py \
+	lwt_dst_cache_ref_loop.sh \
+	macvlan_mcast_shared_mac.sh \
+	msg_zerocopy.sh \
+	nat6to4.sh \
+	ndisc_unsolicited_na_test.sh \
+	netdev-l2addr.sh \
+	netdevice.sh \
+	netns-name.sh \
+	netns-sysctl.sh \
+	nk_qlease.py \
+	nl_netdev.py \
+	nl_nlctrl.py \
+	pmtu.sh \
+	psock_snd.sh \
+	reuseaddr_ports_exhausted.sh \
+	reuseport_addr_any.sh \
+	route_hint.sh \
+	route_localnet.sh \
+	rps_default_mask.sh \
+	rtnetlink.py \
+	rtnetlink.sh \
+	rtnetlink_notification.sh \
+	run_afpackettests \
+	run_netsocktests \
+	rxtimestamp.sh \
+	sctp_vrf.sh \
+	skf_net_off.sh \
+	so_txtime.sh \
+	srv6_end_dt46_l3vpn_test.sh \
+	srv6_end_dt4_l3vpn_test.sh \
+	srv6_end_dt6_l3vpn_test.sh \
+	srv6_end_dx4_netfilter_test.sh \
+	srv6_end_dx6_netfilter_test.sh \
+	srv6_end_flavors_test.sh \
+	srv6_end_next_csid_l3vpn_test.sh \
+	srv6_end_x_next_csid_l3vpn_test.sh \
+	srv6_hencap_red_l3vpn_test.sh \
+	srv6_hl2encap_red_l2vpn_test.sh \
+	srv6_iptunnel_cache.sh \
+	stress_reuseport_listen.sh \
+	tcp_fastopen_backup_key.sh \
+	test_bpf.sh \
+	test_bridge_backup_port.sh \
+	test_bridge_neigh_suppress.sh \
+	test_ingress_egress_chaining.sh \
+	test_neigh.sh \
+	test_so_rcv.sh \
+	test_vxlan_fdb_changelink.sh \
+	test_vxlan_mdb.sh \
+	test_vxlan_nh.sh \
+	test_vxlan_nolocalbypass.sh \
+	test_vxlan_under_vrf.sh \
+	test_vxlan_vnifiltering.sh \
+	tfo_passive.sh \
+	traceroute.sh \
+	txtimestamp.sh \
+	udpgro.sh \
+	udpgro_bench.sh \
+	udpgro_frglist.sh \
+	udpgro_fwd.sh \
+	udpgso.sh \
+	udpgso_bench.sh \
+	unicast_extensions.sh \
+	veth.sh \
+	vlan_bridge_binding.sh \
+	vlan_hw_filter.sh \
+	vrf-xfrm-tests.sh \
+	vrf_route_leaking.sh \
+	vrf_strict_mode_test.sh \
+	xfrm_policy.sh \
+	xfrm_state.sh \
+# end of TEST_PROGS
+
+TEST_PROGS_EXTENDED := \
+	xfrm_policy_add_speed.sh \
+# end of TEST_PROGS_EXTENDED
+
+TEST_GEN_FILES := \
+	bind_bhash \
+	cmsg_sender \
+	fin_ack_lat \
+	hwtstamp_config \
+	io_uring_zerocopy_tx \
+	ioam6_parser \
+	ip_defrag \
+	ip_local_port_range \
+	ipsec \
+	ipv6_flowlabel \
+	ipv6_flowlabel_mgr \
+	msg_zerocopy \
+	nettest \
+	psock_fanout \
+	psock_snd \
+	psock_tpacket \
+	reuseaddr_ports_exhausted \
+	reuseport_addr_any \
+	rxtimestamp \
+	sctp_hello \
+	skf_net_off \
+	so_netns_cookie \
+	so_rcv_listener \
+	so_txtime \
+	socket \
+	stress_reuseport_listen \
+	tcp_fastopen_backup_key \
+	tcp_inq \
+	tcp_mmap \
+	tfo \
+	timestamping \
+	txring_overwrite \
+	txtimestamp \
+	udpgso \
+	udpgso_bench_rx \
+	udpgso_bench_tx \
+# end of TEST_GEN_FILES
+
+TEST_GEN_PROGS := \
+	bind_timewait \
+	bind_wildcard \
+	epoll_busy_poll \
+	icmp_rfc4884 \
+	ipv6_fragmentation \
+	proc_net_pktgen \
+	reuseaddr_conflict \
+	reuseport_bpf \
+	reuseport_bpf_cpu \
+	reuseport_bpf_numa \
+	reuseport_dualstack \
+	sk_bind_sendto_listen \
+	sk_connect_zero_addr \
+	sk_so_peek_off \
+	so_incoming_cpu \
+	tap \
+	tcp_port_share \
+	tls \
+# end of TEST_GEN_PROGS
+
+TEST_FILES := \
+	fcnal-test.sh \
+	in_netns.sh \
+	lib.sh \
+	settings \
+# end of TEST_FILES
 
 # YNL files, must be before "include ..lib.mk"
-YNL_GEN_FILES := busy_poller netlink-dumps
-TEST_GEN_FILES += $(YNL_GEN_FILES)
+YNL_GEN_FILES := busy_poller
+YNL_GEN_PROGS := \
+	netlink-dumps \
+	tun \
+# end of YNL_GEN_PROGS
 
-TEST_FILES := settings
-TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
+TEST_GEN_FILES += $(YNL_GEN_FILES)
+TEST_GEN_PROGS += $(YNL_GEN_PROGS)
 
 TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
 
@@ -115,7 +217,14 @@ TEST_INCLUDES := forwarding/lib.sh
 include ../lib.mk
 
 # YNL build
-YNL_GENS := netdev
+YNL_GENS := \
+	netdev \
+	rt-addr \
+	rt-link \
+	rt-neigh \
+	rt-route \
+# end of YNL_GENS
+
 include ynl.mk
 
 $(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
diff --git a/tools/testing/selftests/net/af_unix/.gitignore b/tools/testing/selftests/net/af_unix/.gitignore
new file mode 100644
index 000000000000..240b26740c9e
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/.gitignore
@@ -0,0 +1,8 @@
+diag_uid
+msg_oob
+scm_inq
+scm_pidfd
+scm_rights
+so_peek_off
+unix_connect
+unix_connreset
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 50584479540b..4c0375e28bbe 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,19 @@
-CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect
+top_srcdir := ../../../../..
+include $(top_srcdir)/scripts/Makefile.compiler
+
+cc-option = $(call __cc-option, $(CC),,$(1),$(2))
+
+CFLAGS += $(KHDR_INCLUDES) -Wall $(call cc-option,-Wflex-array-member-not-at-end)
+
+TEST_GEN_PROGS := \
+	diag_uid \
+	msg_oob \
+	scm_inq \
+	scm_pidfd \
+	scm_rights \
+	so_peek_off \
+	unix_connect \
+	unix_connreset \
+# end of TEST_GEN_PROGS
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config
index 37368567768c..b5429c15a53c 100644
--- a/tools/testing/selftests/net/af_unix/config
+++ b/tools/testing/selftests/net/af_unix/config
@@ -1,3 +1,3 @@
-CONFIG_UNIX=y
 CONFIG_AF_UNIX_OOB=y
+CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c
index 79a3dd75590e..da7d50cedee6 100644
--- a/tools/testing/selftests/net/af_unix/diag_uid.c
+++ b/tools/testing/selftests/net/af_unix/diag_uid.c
@@ -14,7 +14,7 @@
 #include <sys/types.h>
 #include <sys/un.h>
 
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 FIXTURE(diag_uid)
 {
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
index 3ed3882a93b8..1b499d56656c 100644
--- a/tools/testing/selftests/net/af_unix/msg_oob.c
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -11,7 +11,7 @@
 #include <sys/signalfd.h>
 #include <sys/socket.h>
 
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 #define BUF_SZ	32
 
@@ -210,7 +210,7 @@ static void __sendpair(struct __test_metadata *_metadata,
 static void __recvpair(struct __test_metadata *_metadata,
 		       FIXTURE_DATA(msg_oob) *self,
 		       const char *expected_buf, int expected_len,
-		       int buf_len, int flags)
+		       int buf_len, int flags, bool is_sender)
 {
 	int i, ret[2], recv_errno[2], expected_errno = 0;
 	char recv_buf[2][BUF_SZ] = {};
@@ -221,7 +221,9 @@ static void __recvpair(struct __test_metadata *_metadata,
 	errno = 0;
 
 	for (i = 0; i < 2; i++) {
-		ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags);
+		int index = is_sender ? i * 2 : i * 2 + 1;
+
+		ret[i] = recv(self->fd[index], recv_buf[i], buf_len, flags);
 		recv_errno[i] = errno;
 	}
 
@@ -308,6 +310,20 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
 		ASSERT_EQ(answ[0], answ[1]);
 }
 
+static void __resetpair(struct __test_metadata *_metadata,
+			FIXTURE_DATA(msg_oob) *self,
+			const FIXTURE_VARIANT(msg_oob) *variant,
+			bool reset)
+{
+	int i;
+
+	for (i = 0; i < 2; i++)
+		close(self->fd[i * 2 + 1]);
+
+	__recvpair(_metadata, self, "", reset ? -ECONNRESET : 0, 1,
+		   variant->peek ? MSG_PEEK : 0, true);
+}
+
 #define sendpair(buf, len, flags)					\
 	__sendpair(_metadata, self, buf, len, flags)
 
@@ -316,9 +332,10 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
 		if (variant->peek)					\
 			__recvpair(_metadata, self,			\
 				   expected_buf, expected_len,		\
-				   buf_len, (flags) | MSG_PEEK);	\
+				   buf_len, (flags) | MSG_PEEK, false);	\
 		__recvpair(_metadata, self,				\
-			   expected_buf, expected_len, buf_len, flags);	\
+			   expected_buf, expected_len,			\
+			   buf_len, flags, false);			\
 	} while (0)
 
 #define epollpair(oob_remaining)					\
@@ -330,6 +347,9 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
 #define setinlinepair()							\
 	__setinlinepair(_metadata, self)
 
+#define resetpair(reset)						\
+	__resetpair(_metadata, self, variant, reset)
+
 #define tcp_incompliant							\
 	for (self->tcp_compliant = false;				\
 	     self->tcp_compliant == false;				\
@@ -344,6 +364,21 @@ TEST_F(msg_oob, non_oob)
 	recvpair("", -EINVAL, 1, MSG_OOB);
 	epollpair(false);
 	siocatmarkpair(false);
+
+	resetpair(true);
+}
+
+TEST_F(msg_oob, non_oob_no_reset)
+{
+	sendpair("x", 1, 0);
+	epollpair(false);
+	siocatmarkpair(false);
+
+	recvpair("x", 1, 1, 0);
+	epollpair(false);
+	siocatmarkpair(false);
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, oob)
@@ -355,6 +390,19 @@ TEST_F(msg_oob, oob)
 	recvpair("x", 1, 1, MSG_OOB);
 	epollpair(false);
 	siocatmarkpair(true);
+
+	tcp_incompliant {
+		resetpair(false);		/* TCP sets -ECONNRESET for ex-OOB. */
+	}
+}
+
+TEST_F(msg_oob, oob_reset)
+{
+	sendpair("x", 1, MSG_OOB);
+	epollpair(true);
+	siocatmarkpair(true);
+
+	resetpair(true);
 }
 
 TEST_F(msg_oob, oob_drop)
@@ -370,6 +418,8 @@ TEST_F(msg_oob, oob_drop)
 	recvpair("", -EINVAL, 1, MSG_OOB);
 	epollpair(false);
 	siocatmarkpair(false);
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, oob_ahead)
@@ -385,6 +435,10 @@ TEST_F(msg_oob, oob_ahead)
 	recvpair("hell", 4, 4, 0);
 	epollpair(false);
 	siocatmarkpair(true);
+
+	tcp_incompliant {
+		resetpair(false);		/* TCP sets -ECONNRESET for ex-OOB. */
+	}
 }
 
 TEST_F(msg_oob, oob_break)
@@ -403,6 +457,8 @@ TEST_F(msg_oob, oob_break)
 
 	recvpair("", -EAGAIN, 1, 0);
 	siocatmarkpair(false);
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, oob_ahead_break)
@@ -426,6 +482,8 @@ TEST_F(msg_oob, oob_ahead_break)
 	recvpair("world", 5, 5, 0);
 	epollpair(false);
 	siocatmarkpair(false);
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, oob_break_drop)
@@ -449,6 +507,8 @@ TEST_F(msg_oob, oob_break_drop)
 	recvpair("", -EINVAL, 1, MSG_OOB);
 	epollpair(false);
 	siocatmarkpair(false);
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, ex_oob_break)
@@ -476,6 +536,8 @@ TEST_F(msg_oob, ex_oob_break)
 	recvpair("ld", 2, 2, 0);
 	epollpair(false);
 	siocatmarkpair(false);
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, ex_oob_drop)
@@ -498,6 +560,8 @@ TEST_F(msg_oob, ex_oob_drop)
 		epollpair(false);
 		siocatmarkpair(true);
 	}
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, ex_oob_drop_2)
@@ -523,6 +587,8 @@ TEST_F(msg_oob, ex_oob_drop_2)
 		epollpair(false);
 		siocatmarkpair(true);
 	}
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, ex_oob_oob)
@@ -546,6 +612,54 @@ TEST_F(msg_oob, ex_oob_oob)
 	recvpair("", -EINVAL, 1, MSG_OOB);
 	epollpair(false);
 	siocatmarkpair(false);
+
+	resetpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_ex_oob)
+{
+	sendpair("x", 1, MSG_OOB);
+	epollpair(true);
+	siocatmarkpair(true);
+
+	recvpair("x", 1, 1, MSG_OOB);
+	epollpair(false);
+	siocatmarkpair(true);
+
+	sendpair("y", 1, MSG_OOB);
+	epollpair(true);
+	siocatmarkpair(true);
+
+	recvpair("y", 1, 1, MSG_OOB);
+	epollpair(false);
+	siocatmarkpair(true);
+
+	tcp_incompliant {
+		resetpair(false);		/* TCP sets -ECONNRESET for ex-OOB. */
+	}
+}
+
+TEST_F(msg_oob, ex_oob_ex_oob_oob)
+{
+	sendpair("x", 1, MSG_OOB);
+	epollpair(true);
+	siocatmarkpair(true);
+
+	recvpair("x", 1, 1, MSG_OOB);
+	epollpair(false);
+	siocatmarkpair(true);
+
+	sendpair("y", 1, MSG_OOB);
+	epollpair(true);
+	siocatmarkpair(true);
+
+	recvpair("y", 1, 1, MSG_OOB);
+	epollpair(false);
+	siocatmarkpair(true);
+
+	sendpair("z", 1, MSG_OOB);
+	epollpair(true);
+	siocatmarkpair(true);
 }
 
 TEST_F(msg_oob, ex_oob_ahead_break)
@@ -576,6 +690,10 @@ TEST_F(msg_oob, ex_oob_ahead_break)
 	recvpair("d", 1, 1, MSG_OOB);
 	epollpair(false);
 	siocatmarkpair(true);
+
+	tcp_incompliant {
+		resetpair(false);		/* TCP sets -ECONNRESET for ex-OOB. */
+	}
 }
 
 TEST_F(msg_oob, ex_oob_siocatmark)
@@ -595,6 +713,8 @@ TEST_F(msg_oob, ex_oob_siocatmark)
 	recvpair("hell", 4, 4, 0);		/* Intentionally stop at ex-OOB. */
 	epollpair(true);
 	siocatmarkpair(false);
+
+	resetpair(true);
 }
 
 TEST_F(msg_oob, inline_oob)
@@ -612,6 +732,8 @@ TEST_F(msg_oob, inline_oob)
 	recvpair("x", 1, 1, 0);
 	epollpair(false);
 	siocatmarkpair(false);
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, inline_oob_break)
@@ -633,6 +755,8 @@ TEST_F(msg_oob, inline_oob_break)
 	recvpair("o", 1, 1, 0);
 	epollpair(false);
 	siocatmarkpair(false);
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, inline_oob_ahead_break)
@@ -661,6 +785,8 @@ TEST_F(msg_oob, inline_oob_ahead_break)
 
 	epollpair(false);
 	siocatmarkpair(false);
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, inline_ex_oob_break)
@@ -686,6 +812,8 @@ TEST_F(msg_oob, inline_ex_oob_break)
 	recvpair("rld", 3, 3, 0);
 	epollpair(false);
 	siocatmarkpair(false);
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, inline_ex_oob_no_drop)
@@ -707,6 +835,8 @@ TEST_F(msg_oob, inline_ex_oob_no_drop)
 	recvpair("y", 1, 1, 0);
 	epollpair(false);
 	siocatmarkpair(false);
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, inline_ex_oob_drop)
@@ -731,6 +861,8 @@ TEST_F(msg_oob, inline_ex_oob_drop)
 		epollpair(false);
 		siocatmarkpair(false);
 	}
+
+	resetpair(false);
 }
 
 TEST_F(msg_oob, inline_ex_oob_siocatmark)
@@ -752,6 +884,8 @@ TEST_F(msg_oob, inline_ex_oob_siocatmark)
 	recvpair("hell", 4, 4, 0);		/* Intentionally stop at ex-OOB. */
 	epollpair(true);
 	siocatmarkpair(false);
+
+	resetpair(true);
 }
 
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c
new file mode 100644
index 000000000000..3a86be9bda17
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_inq.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "kselftest_harness.h"
+
+#define NR_CHUNKS	100
+#define MSG_LEN		256
+
+FIXTURE(scm_inq)
+{
+	int fd[2];
+};
+
+FIXTURE_VARIANT(scm_inq)
+{
+	int type;
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, stream)
+{
+	.type = SOCK_STREAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, dgram)
+{
+	.type = SOCK_DGRAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, seqpacket)
+{
+	.type = SOCK_SEQPACKET,
+};
+
+FIXTURE_SETUP(scm_inq)
+{
+	int err;
+
+	err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd);
+	ASSERT_EQ(0, err);
+}
+
+FIXTURE_TEARDOWN(scm_inq)
+{
+	close(self->fd[0]);
+	close(self->fd[1]);
+}
+
+static void send_chunks(struct __test_metadata *_metadata,
+			FIXTURE_DATA(scm_inq) *self)
+{
+	char buf[MSG_LEN] = {};
+	int i, ret;
+
+	for (i = 0; i < NR_CHUNKS; i++) {
+		ret = send(self->fd[0], buf, sizeof(buf), 0);
+		ASSERT_EQ(sizeof(buf), ret);
+	}
+}
+
+static void recv_chunks(struct __test_metadata *_metadata,
+			FIXTURE_DATA(scm_inq) *self)
+{
+	char cmsg_buf[CMSG_SPACE(sizeof(int))];
+	struct msghdr msg = {};
+	struct iovec iov = {};
+	struct cmsghdr *cmsg;
+	char buf[MSG_LEN];
+	int i, ret;
+	int inq;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cmsg_buf;
+	msg.msg_controllen = sizeof(cmsg_buf);
+
+	iov.iov_base = buf;
+	iov.iov_len = sizeof(buf);
+
+	for (i = 0; i < NR_CHUNKS; i++) {
+		memset(buf, 0, sizeof(buf));
+		memset(cmsg_buf, 0, sizeof(cmsg_buf));
+
+		ret = recvmsg(self->fd[1], &msg, 0);
+		ASSERT_EQ(MSG_LEN, ret);
+
+		cmsg = CMSG_FIRSTHDR(&msg);
+		ASSERT_NE(NULL, cmsg);
+		ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len);
+		ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level);
+		ASSERT_EQ(SCM_INQ, cmsg->cmsg_type);
+
+		ret = ioctl(self->fd[1], SIOCINQ, &inq);
+		ASSERT_EQ(0, ret);
+		ASSERT_EQ(*(int *)CMSG_DATA(cmsg), inq);
+	}
+}
+
+TEST_F(scm_inq, basic)
+{
+	int err, inq;
+
+	err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int));
+	if (variant->type != SOCK_STREAM) {
+		ASSERT_EQ(-ENOPROTOOPT, -errno);
+		return;
+	}
+
+	ASSERT_EQ(0, err);
+
+	err = ioctl(self->fd[1], SIOCINQ, &inq);
+	ASSERT_EQ(0, err);
+	ASSERT_EQ(0, inq);
+
+	send_chunks(_metadata, self);
+	recv_chunks(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c
index 7e534594167e..2c18b92a2603 100644
--- a/tools/testing/selftests/net/af_unix/scm_pidfd.c
+++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c
@@ -15,7 +15,8 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 
-#include "../../kselftest_harness.h"
+#include "../../pidfd/pidfd.h"
+#include "kselftest_harness.h"
 
 #define clean_errno() (errno == 0 ? "None" : strerror(errno))
 #define log_err(MSG, ...)                                                   \
@@ -26,6 +27,8 @@
 #define SCM_PIDFD 0x04
 #endif
 
+#define CHILD_EXIT_CODE_OK 123
+
 static void child_die()
 {
 	exit(1);
@@ -126,16 +129,64 @@ out:
 	return result;
 }
 
+struct cmsg_data {
+	struct ucred *ucred;
+	int *pidfd;
+};
+
+static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res)
+{
+	struct cmsghdr *cmsg;
+
+	if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+		log_err("recvmsg: truncated");
+		return 1;
+	}
+
+	for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+	     cmsg = CMSG_NXTHDR(msg, cmsg)) {
+		if (cmsg->cmsg_level == SOL_SOCKET &&
+		    cmsg->cmsg_type == SCM_PIDFD) {
+			if (cmsg->cmsg_len < sizeof(*res->pidfd)) {
+				log_err("CMSG parse: SCM_PIDFD wrong len");
+				return 1;
+			}
+
+			res->pidfd = (void *)CMSG_DATA(cmsg);
+		}
+
+		if (cmsg->cmsg_level == SOL_SOCKET &&
+		    cmsg->cmsg_type == SCM_CREDENTIALS) {
+			if (cmsg->cmsg_len < sizeof(*res->ucred)) {
+				log_err("CMSG parse: SCM_CREDENTIALS wrong len");
+				return 1;
+			}
+
+			res->ucred = (void *)CMSG_DATA(cmsg);
+		}
+	}
+
+	if (!res->pidfd) {
+		log_err("CMSG parse: SCM_PIDFD not found");
+		return 1;
+	}
+
+	if (!res->ucred) {
+		log_err("CMSG parse: SCM_CREDENTIALS not found");
+		return 1;
+	}
+
+	return 0;
+}
+
 static int cmsg_check(int fd)
 {
 	struct msghdr msg = { 0 };
-	struct cmsghdr *cmsg;
+	struct cmsg_data res;
 	struct iovec iov;
-	struct ucred *ucred = NULL;
 	int data = 0;
 	char control[CMSG_SPACE(sizeof(struct ucred)) +
 		     CMSG_SPACE(sizeof(int))] = { 0 };
-	int *pidfd = NULL;
 	pid_t parent_pid;
 	int err;
 
@@ -158,53 +209,98 @@ static int cmsg_check(int fd)
 		return 1;
 	}
 
-	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
-	     cmsg = CMSG_NXTHDR(&msg, cmsg)) {
-		if (cmsg->cmsg_level == SOL_SOCKET &&
-		    cmsg->cmsg_type == SCM_PIDFD) {
-			if (cmsg->cmsg_len < sizeof(*pidfd)) {
-				log_err("CMSG parse: SCM_PIDFD wrong len");
-				return 1;
-			}
+	/* send(pfd, "x", sizeof(char), 0) */
+	if (data != 'x') {
+		log_err("recvmsg: data corruption");
+		return 1;
+	}
 
-			pidfd = (void *)CMSG_DATA(cmsg);
-		}
+	if (parse_cmsg(&msg, &res)) {
+		log_err("CMSG parse: parse_cmsg() failed");
+		return 1;
+	}
 
-		if (cmsg->cmsg_level == SOL_SOCKET &&
-		    cmsg->cmsg_type == SCM_CREDENTIALS) {
-			if (cmsg->cmsg_len < sizeof(*ucred)) {
-				log_err("CMSG parse: SCM_CREDENTIALS wrong len");
-				return 1;
-			}
+	/* pidfd from SCM_PIDFD should point to the parent process PID */
+	parent_pid =
+		get_pid_from_fdinfo_file(*res.pidfd, "Pid:", sizeof("Pid:") - 1);
+	if (parent_pid != getppid()) {
+		log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+		close(*res.pidfd);
+		return 1;
+	}
 
-			ucred = (void *)CMSG_DATA(cmsg);
-		}
+	close(*res.pidfd);
+	return 0;
+}
+
+static int cmsg_check_dead(int fd, int expected_pid)
+{
+	int err;
+	struct msghdr msg = { 0 };
+	struct cmsg_data res;
+	struct iovec iov;
+	int data = 0;
+	char control[CMSG_SPACE(sizeof(struct ucred)) +
+		     CMSG_SPACE(sizeof(int))] = { 0 };
+	struct pidfd_info info = {
+		.mask = PIDFD_INFO_EXIT,
+	};
+
+	iov.iov_base = &data;
+	iov.iov_len = sizeof(data);
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	err = recvmsg(fd, &msg, 0);
+	if (err < 0) {
+		log_err("recvmsg");
+		return 1;
 	}
 
-	/* send(pfd, "x", sizeof(char), 0) */
-	if (data != 'x') {
+	if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+		log_err("recvmsg: truncated");
+		return 1;
+	}
+
+	/* send(cfd, "y", sizeof(char), 0) */
+	if (data != 'y') {
 		log_err("recvmsg: data corruption");
 		return 1;
 	}
 
-	if (!pidfd) {
-		log_err("CMSG parse: SCM_PIDFD not found");
+	if (parse_cmsg(&msg, &res)) {
+		log_err("CMSG parse: parse_cmsg() failed");
 		return 1;
 	}
 
-	if (!ucred) {
-		log_err("CMSG parse: SCM_CREDENTIALS not found");
+	/*
+	 * pidfd from SCM_PIDFD should point to the client_pid.
+	 * Let's read exit information and check if it's what
+	 * we expect to see.
+	 */
+	if (ioctl(*res.pidfd, PIDFD_GET_INFO, &info)) {
+		log_err("%s: ioctl(PIDFD_GET_INFO) failed", __func__);
+		close(*res.pidfd);
 		return 1;
 	}
 
-	/* pidfd from SCM_PIDFD should point to the parent process PID */
-	parent_pid =
-		get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1);
-	if (parent_pid != getppid()) {
-		log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+	if (!(info.mask & PIDFD_INFO_EXIT)) {
+		log_err("%s: No exit information from ioctl(PIDFD_GET_INFO)", __func__);
+		close(*res.pidfd);
+		return 1;
+	}
+
+	err = WIFEXITED(info.exit_code) ? WEXITSTATUS(info.exit_code) : 1;
+	if (err != CHILD_EXIT_CODE_OK) {
+		log_err("%s: wrong exit_code %d != %d", __func__, err, CHILD_EXIT_CODE_OK);
+		close(*res.pidfd);
 		return 1;
 	}
 
+	close(*res.pidfd);
 	return 0;
 }
 
@@ -291,6 +387,24 @@ static void fill_sockaddr(struct sock_addr *addr, bool abstract)
 	memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name));
 }
 
+static int sk_enable_cred_pass(int sk)
+{
+	int on = 0;
+
+	on = 1;
+	if (setsockopt(sk, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
+		log_err("Failed to set SO_PASSCRED");
+		return 1;
+	}
+
+	if (setsockopt(sk, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
+		log_err("Failed to set SO_PASSPIDFD");
+		return 1;
+	}
+
+	return 0;
+}
+
 static void client(FIXTURE_DATA(scm_pidfd) *self,
 		   const FIXTURE_VARIANT(scm_pidfd) *variant)
 {
@@ -299,7 +413,6 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
 	struct ucred peer_cred;
 	int peer_pidfd;
 	pid_t peer_pid;
-	int on = 0;
 
 	cfd = socket(AF_UNIX, variant->type, 0);
 	if (cfd < 0) {
@@ -322,14 +435,8 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
 		child_die();
 	}
 
-	on = 1;
-	if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
-		log_err("Failed to set SO_PASSCRED");
-		child_die();
-	}
-
-	if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
-		log_err("Failed to set SO_PASSPIDFD");
+	if (sk_enable_cred_pass(cfd)) {
+		log_err("sk_enable_cred_pass() failed");
 		child_die();
 	}
 
@@ -340,6 +447,12 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
 		child_die();
 	}
 
+	/* send something to the parent so it can receive SCM_PIDFD too and validate it */
+	if (send(cfd, "y", sizeof(char), 0) == -1) {
+		log_err("Failed to send(cfd, \"y\", sizeof(char), 0)");
+		child_die();
+	}
+
 	/* skip further for SOCK_DGRAM as it's not applicable */
 	if (variant->type == SOCK_DGRAM)
 		return;
@@ -398,7 +511,13 @@ TEST_F(scm_pidfd, test)
 		close(self->server);
 		close(self->startup_pipe[0]);
 		client(self, variant);
-		exit(0);
+
+		/*
+		 * It's a bit unusual, but in case of success we return non-zero
+		 * exit code (CHILD_EXIT_CODE_OK) and then we expect to read it
+		 * from ioctl(PIDFD_GET_INFO) in cmsg_check_dead().
+		 */
+		exit(CHILD_EXIT_CODE_OK);
 	}
 	close(self->startup_pipe[1]);
 
@@ -421,9 +540,17 @@ TEST_F(scm_pidfd, test)
 		ASSERT_NE(-1, err);
 	}
 
-	close(pfd);
 	waitpid(self->client_pid, &child_status, 0);
-	ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+	/* see comment before exit(CHILD_EXIT_CODE_OK) */
+	ASSERT_EQ(CHILD_EXIT_CODE_OK, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+
+	err = sk_enable_cred_pass(pfd);
+	ASSERT_EQ(0, err);
+
+	err = cmsg_check_dead(pfd, self->client_pid);
+	ASSERT_EQ(0, err);
+
+	close(pfd);
 }
 
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
index d66336256580..d82a79c21c17 100644
--- a/tools/testing/selftests/net/af_unix/scm_rights.c
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -10,7 +10,7 @@
 #include <sys/socket.h>
 #include <sys/un.h>
 
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 FIXTURE(scm_rights)
 {
@@ -23,6 +23,7 @@ FIXTURE_VARIANT(scm_rights)
 	int type;
 	int flags;
 	bool test_listener;
+	bool disabled;
 };
 
 FIXTURE_VARIANT_ADD(scm_rights, dgram)
@@ -31,6 +32,16 @@ FIXTURE_VARIANT_ADD(scm_rights, dgram)
 	.type = SOCK_DGRAM,
 	.flags = 0,
 	.test_listener = false,
+	.disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, dgram_disabled)
+{
+	.name = "UNIX ",
+	.type = SOCK_DGRAM,
+	.flags = 0,
+	.test_listener = false,
+	.disabled = true,
 };
 
 FIXTURE_VARIANT_ADD(scm_rights, stream)
@@ -39,6 +50,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream)
 	.type = SOCK_STREAM,
 	.flags = 0,
 	.test_listener = false,
+	.disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_disabled)
+{
+	.name = "UNIX-STREAM ",
+	.type = SOCK_STREAM,
+	.flags = 0,
+	.test_listener = false,
+	.disabled = true,
 };
 
 FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
@@ -47,6 +68,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
 	.type = SOCK_STREAM,
 	.flags = MSG_OOB,
 	.test_listener = false,
+	.disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_oob_disabled)
+{
+	.name = "UNIX-STREAM ",
+	.type = SOCK_STREAM,
+	.flags = MSG_OOB,
+	.test_listener = false,
+	.disabled = true,
 };
 
 FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
@@ -55,6 +86,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
 	.type = SOCK_STREAM,
 	.flags = 0,
 	.test_listener = true,
+	.disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_disabled)
+{
+	.name = "UNIX-STREAM ",
+	.type = SOCK_STREAM,
+	.flags = 0,
+	.test_listener = true,
+	.disabled = true,
 };
 
 FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
@@ -63,6 +104,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
 	.type = SOCK_STREAM,
 	.flags = MSG_OOB,
 	.test_listener = true,
+	.disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob_disabled)
+{
+	.name = "UNIX-STREAM ",
+	.type = SOCK_STREAM,
+	.flags = MSG_OOB,
+	.test_listener = true,
+	.disabled = true,
 };
 
 static int count_sockets(struct __test_metadata *_metadata,
@@ -105,6 +156,9 @@ FIXTURE_SETUP(scm_rights)
 	ret = unshare(CLONE_NEWNET);
 	ASSERT_EQ(0, ret);
 
+	if (variant->disabled)
+		return;
+
 	ret = count_sockets(_metadata, variant);
 	ASSERT_EQ(0, ret);
 }
@@ -113,6 +167,9 @@ FIXTURE_TEARDOWN(scm_rights)
 {
 	int ret;
 
+	if (variant->disabled)
+		return;
+
 	sleep(1);
 
 	ret = count_sockets(_metadata, variant);
@@ -121,6 +178,7 @@ FIXTURE_TEARDOWN(scm_rights)
 
 static void create_listeners(struct __test_metadata *_metadata,
 			     FIXTURE_DATA(scm_rights) *self,
+			     const FIXTURE_VARIANT(scm_rights) *variant,
 			     int n)
 {
 	struct sockaddr_un addr = {
@@ -140,6 +198,12 @@ static void create_listeners(struct __test_metadata *_metadata,
 		ret = listen(self->fd[i], -1);
 		ASSERT_EQ(0, ret);
 
+		if (variant->disabled) {
+			ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS,
+					 &(int){0}, sizeof(int));
+			ASSERT_EQ(0, ret);
+		}
+
 		addrlen = sizeof(addr);
 		ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen);
 		ASSERT_EQ(0, ret);
@@ -164,6 +228,12 @@ static void create_socketpairs(struct __test_metadata *_metadata,
 	for (i = 0; i < n * 2; i += 2) {
 		ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i);
 		ASSERT_EQ(0, ret);
+
+		if (variant->disabled) {
+			ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS,
+					 &(int){0}, sizeof(int));
+			ASSERT_EQ(0, ret);
+		}
 	}
 }
 
@@ -175,7 +245,7 @@ static void __create_sockets(struct __test_metadata *_metadata,
 	ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0]));
 
 	if (variant->test_listener)
-		create_listeners(_metadata, self, n);
+		create_listeners(_metadata, self, variant, n);
 	else
 		create_socketpairs(_metadata, self, variant, n);
 }
@@ -201,20 +271,11 @@ void __send_fd(struct __test_metadata *_metadata,
 {
 #define MSG "x"
 #define MSGLEN 1
-	struct {
-		struct cmsghdr cmsghdr;
-		int fd[2];
-	} cmsg = {
-		.cmsghdr = {
-			.cmsg_len = CMSG_LEN(sizeof(cmsg.fd)),
-			.cmsg_level = SOL_SOCKET,
-			.cmsg_type = SCM_RIGHTS,
-		},
-		.fd = {
-			self->fd[inflight * 2],
-			self->fd[inflight * 2],
-		},
+	int fds[2] = {
+		self->fd[inflight * 2],
+		self->fd[inflight * 2],
 	};
+	char cmsg_buf[CMSG_SPACE(sizeof(fds))];
 	struct iovec iov = {
 		.iov_base = MSG,
 		.iov_len = MSGLEN,
@@ -224,13 +285,26 @@ void __send_fd(struct __test_metadata *_metadata,
 		.msg_namelen = 0,
 		.msg_iov = &iov,
 		.msg_iovlen = 1,
-		.msg_control = &cmsg,
-		.msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)),
+		.msg_control = cmsg_buf,
+		.msg_controllen = sizeof(cmsg_buf),
 	};
+	struct cmsghdr *cmsg;
 	int ret;
 
+	cmsg = CMSG_FIRSTHDR(&msg);
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_RIGHTS;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
+	memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
+
 	ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags);
-	ASSERT_EQ(MSGLEN, ret);
+
+	if (variant->disabled) {
+		ASSERT_EQ(-1, ret);
+		ASSERT_EQ(-EPERM, -errno);
+	} else {
+		ASSERT_EQ(MSGLEN, ret);
+	}
 }
 
 #define create_sockets(n)					\
diff --git a/tools/testing/selftests/net/af_unix/so_peek_off.c b/tools/testing/selftests/net/af_unix/so_peek_off.c
new file mode 100644
index 000000000000..f6466a717f49
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/so_peek_off.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(so_peek_off)
+{
+	int fd[2];	/* 0: sender, 1: receiver */
+};
+
+FIXTURE_VARIANT(so_peek_off)
+{
+	int type;
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, stream)
+{
+	.type = SOCK_STREAM,
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, dgram)
+{
+	.type = SOCK_DGRAM,
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, seqpacket)
+{
+	.type = SOCK_SEQPACKET,
+};
+
+FIXTURE_SETUP(so_peek_off)
+{
+	struct timeval timeout = {
+		.tv_sec = 5,
+		.tv_usec = 0,
+	};
+	int ret;
+
+	ret = socketpair(AF_UNIX, variant->type, 0, self->fd);
+	ASSERT_EQ(0, ret);
+
+	ret = setsockopt(self->fd[1], SOL_SOCKET, SO_RCVTIMEO_NEW,
+			 &timeout, sizeof(timeout));
+	ASSERT_EQ(0, ret);
+
+	ret = setsockopt(self->fd[1], SOL_SOCKET, SO_PEEK_OFF,
+			 &(int){0}, sizeof(int));
+	ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(so_peek_off)
+{
+	close_range(self->fd[0], self->fd[1], 0);
+}
+
+#define sendeq(fd, str, flags)					\
+	do {							\
+		int bytes, len = strlen(str);			\
+								\
+		bytes = send(fd, str, len, flags);		\
+		ASSERT_EQ(len, bytes);				\
+	} while (0)
+
+#define recveq(fd, str, buflen, flags)				\
+	do {							\
+		char buf[(buflen) + 1] = {};			\
+		int bytes;					\
+								\
+		bytes = recv(fd, buf, buflen, flags);		\
+		ASSERT_NE(-1, bytes);				\
+		ASSERT_STREQ(str, buf);				\
+	} while (0)
+
+#define peekoffeq(fd, expected)					\
+	do {							\
+		socklen_t optlen = sizeof(int);			\
+		int off = -1;					\
+		int ret;					\
+								\
+		ret = getsockopt(fd, SOL_SOCKET, SO_PEEK_OFF,	\
+				 &off, &optlen);		\
+		ASSERT_EQ(0, ret);				\
+		ASSERT_EQ((socklen_t)sizeof(off), optlen);	\
+		ASSERT_EQ(expected, off);			\
+	} while (0)
+
+#define async							\
+	for (pid_t pid = (pid = fork(),				\
+			  pid < 0 ?				\
+			  __TH_LOG("Failed to start async {}"),	\
+			  _metadata->exit_code = KSFT_FAIL,	\
+			  __bail(1, _metadata),			\
+			  0xdead :				\
+			  pid);					\
+	     !pid; exit(0))
+
+TEST_F(so_peek_off, single_chunk)
+{
+	sendeq(self->fd[0], "aaaabbbb", 0);
+
+	recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+	peekoffeq(self->fd[1], 4);
+	recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+	peekoffeq(self->fd[1], 8);
+
+	recveq(self->fd[1], "aaaabbbb", 8, 0);
+	peekoffeq(self->fd[1], 0);
+}
+
+TEST_F(so_peek_off, two_chunks)
+{
+	sendeq(self->fd[0], "aaaa", 0);
+	sendeq(self->fd[0], "bbbb", 0);
+
+	recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+	peekoffeq(self->fd[1], 4);
+	recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+	peekoffeq(self->fd[1], 8);
+
+	recveq(self->fd[1], "aaaa", 4, 0);
+	recveq(self->fd[1], "bbbb", 4, 0);
+	peekoffeq(self->fd[1], 0);
+}
+
+TEST_F(so_peek_off, two_chunks_blocking)
+{
+	async {
+		usleep(1000);
+		sendeq(self->fd[0], "aaaa", 0);
+	}
+
+	recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+	peekoffeq(self->fd[1], 4);
+
+	async {
+		usleep(1000);
+		sendeq(self->fd[0], "bbbb", 0);
+	}
+
+	/* goto again; -> goto redo; in unix_stream_read_generic(). */
+	recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+	peekoffeq(self->fd[1], 8);
+
+	recveq(self->fd[1], "aaaa", 4, 0);
+	recveq(self->fd[1], "bbbb", 4, 0);
+	peekoffeq(self->fd[1], 0);
+}
+
+TEST_F(so_peek_off, two_chunks_overlap)
+{
+	sendeq(self->fd[0], "aaaa", 0);
+	recveq(self->fd[1], "aa", 2, MSG_PEEK);
+	peekoffeq(self->fd[1], 2);
+
+	sendeq(self->fd[0], "bbbb", 0);
+
+	if (variant->type == SOCK_STREAM) {
+		/* SOCK_STREAM tries to fill the buffer. */
+		recveq(self->fd[1], "aabb", 4, MSG_PEEK);
+		peekoffeq(self->fd[1], 6);
+		recveq(self->fd[1], "bb", 100, MSG_PEEK);
+		peekoffeq(self->fd[1], 8);
+	} else {
+		/* SOCK_DGRAM and SOCK_SEQPACKET returns at the skb boundary. */
+		recveq(self->fd[1], "aa", 100, MSG_PEEK);
+		peekoffeq(self->fd[1], 4);
+		recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+		peekoffeq(self->fd[1], 8);
+	}
+
+	recveq(self->fd[1], "aaaa", 4, 0);
+	recveq(self->fd[1], "bbbb", 4, 0);
+	peekoffeq(self->fd[1], 0);
+}
+
+TEST_F(so_peek_off, two_chunks_overlap_blocking)
+{
+	async {
+		usleep(1000);
+		sendeq(self->fd[0], "aaaa", 0);
+	}
+
+	recveq(self->fd[1], "aa", 2, MSG_PEEK);
+	peekoffeq(self->fd[1], 2);
+
+	async {
+		usleep(1000);
+		sendeq(self->fd[0], "bbbb", 0);
+	}
+
+	/* Even SOCK_STREAM does not wait if at least one byte is read. */
+	recveq(self->fd[1], "aa", 100, MSG_PEEK);
+	peekoffeq(self->fd[1], 4);
+
+	recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+	peekoffeq(self->fd[1], 8);
+
+	recveq(self->fd[1], "aaaa", 4, 0);
+	recveq(self->fd[1], "bbbb", 4, 0);
+	peekoffeq(self->fd[1], 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c
index d799fd8f5c7c..870ca96fa8ea 100644
--- a/tools/testing/selftests/net/af_unix/unix_connect.c
+++ b/tools/testing/selftests/net/af_unix/unix_connect.c
@@ -10,7 +10,7 @@
 #include <sys/socket.h>
 #include <sys/un.h>
 
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 FIXTURE(unix_connect)
 {
diff --git a/tools/testing/selftests/net/af_unix/unix_connreset.c b/tools/testing/selftests/net/af_unix/unix_connreset.c
new file mode 100644
index 000000000000..08c1de8f5a98
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/unix_connreset.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Selftest for AF_UNIX socket close and ECONNRESET behaviour.
+ *
+ * This test verifies:
+ *  1. SOCK_STREAM returns EOF when the peer closes normally.
+ *  2. SOCK_STREAM returns ECONNRESET if peer closes with unread data.
+ *  3. SOCK_SEQPACKET returns EOF when the peer closes normally.
+ *  4. SOCK_SEQPACKET returns ECONNRESET if the peer closes with unread data.
+ *  5. SOCK_DGRAM does not return ECONNRESET when the peer closes.
+ *
+ * These tests document the intended Linux behaviour.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include "../../kselftest_harness.h"
+
+#define SOCK_PATH "/tmp/af_unix_connreset.sock"
+
+static void remove_socket_file(void)
+{
+	unlink(SOCK_PATH);
+}
+
+FIXTURE(unix_sock)
+{
+	int server;
+	int client;
+	int child;
+};
+
+FIXTURE_VARIANT(unix_sock)
+{
+	int socket_type;
+	const char *name;
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, stream) {
+	.socket_type = SOCK_STREAM,
+	.name = "SOCK_STREAM",
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, dgram) {
+	.socket_type = SOCK_DGRAM,
+	.name = "SOCK_DGRAM",
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, seqpacket) {
+	.socket_type = SOCK_SEQPACKET,
+	.name = "SOCK_SEQPACKET",
+};
+
+FIXTURE_SETUP(unix_sock)
+{
+	struct sockaddr_un addr = {};
+	int err;
+
+	addr.sun_family = AF_UNIX;
+	strcpy(addr.sun_path, SOCK_PATH);
+	remove_socket_file();
+
+	self->server = socket(AF_UNIX, variant->socket_type, 0);
+	ASSERT_LT(-1, self->server);
+
+	err = bind(self->server, (struct sockaddr *)&addr, sizeof(addr));
+	ASSERT_EQ(0, err);
+
+	if (variant->socket_type == SOCK_STREAM ||
+	    variant->socket_type == SOCK_SEQPACKET) {
+		err = listen(self->server, 1);
+		ASSERT_EQ(0, err);
+	}
+
+	self->client = socket(AF_UNIX, variant->socket_type | SOCK_NONBLOCK, 0);
+	ASSERT_LT(-1, self->client);
+
+	err = connect(self->client, (struct sockaddr *)&addr, sizeof(addr));
+	ASSERT_EQ(0, err);
+}
+
+FIXTURE_TEARDOWN(unix_sock)
+{
+	if (variant->socket_type == SOCK_STREAM ||
+	    variant->socket_type == SOCK_SEQPACKET)
+		close(self->child);
+
+	close(self->client);
+	close(self->server);
+	remove_socket_file();
+}
+
+/* Test 1: peer closes normally */
+TEST_F(unix_sock, eof)
+{
+	char buf[16] = {};
+	ssize_t n;
+
+	if (variant->socket_type == SOCK_STREAM ||
+	    variant->socket_type == SOCK_SEQPACKET) {
+		self->child = accept(self->server, NULL, NULL);
+		ASSERT_LT(-1, self->child);
+
+		close(self->child);
+	} else {
+		close(self->server);
+	}
+
+	n = recv(self->client, buf, sizeof(buf), 0);
+
+	if (variant->socket_type == SOCK_STREAM ||
+	    variant->socket_type == SOCK_SEQPACKET) {
+		ASSERT_EQ(0, n);
+	} else {
+		ASSERT_EQ(-1, n);
+		ASSERT_EQ(EAGAIN, errno);
+	}
+}
+
+/* Test 2: peer closes with unread data */
+TEST_F(unix_sock, reset_unread_behavior)
+{
+	char buf[16] = {};
+	ssize_t n;
+
+	/* Send data that will remain unread */
+	send(self->client, "hello", 5, 0);
+
+	if (variant->socket_type == SOCK_DGRAM) {
+		/* No real connection, just close the server */
+		close(self->server);
+	} else {
+		self->child = accept(self->server, NULL, NULL);
+		ASSERT_LT(-1, self->child);
+
+		/* Peer closes before client reads */
+		close(self->child);
+	}
+
+	n = recv(self->client, buf, sizeof(buf), 0);
+	ASSERT_EQ(-1, n);
+
+	if (variant->socket_type == SOCK_STREAM ||
+	    variant->socket_type == SOCK_SEQPACKET) {
+		ASSERT_EQ(ECONNRESET, errno);
+	} else {
+		ASSERT_EQ(EAGAIN, errno);
+	}
+}
+
+/* Test 3: closing unaccepted (embryo) server socket should reset client. */
+TEST_F(unix_sock, reset_closed_embryo)
+{
+	char buf[16] = {};
+	ssize_t n;
+
+	if (variant->socket_type == SOCK_DGRAM) {
+		snprintf(_metadata->results->reason,
+			 sizeof(_metadata->results->reason),
+			 "Test only applies to SOCK_STREAM and SOCK_SEQPACKET");
+		exit(KSFT_XFAIL);
+	}
+
+	/* Close server without accept()ing */
+	close(self->server);
+
+	n = recv(self->client, buf, sizeof(buf), 0);
+
+	ASSERT_EQ(-1, n);
+	ASSERT_EQ(ECONNRESET, errno);
+}
+
+TEST_HARNESS_MAIN
+
diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh
index d458b45c775b..663744305e52 100755
--- a/tools/testing/selftests/net/amt.sh
+++ b/tools/testing/selftests/net/amt.sh
@@ -73,6 +73,8 @@
 #       +------------------------+
 #==============================================================================
 
+source lib.sh
+
 readonly LISTENER=$(mktemp -u listener-XXXXXXXX)
 readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX)
 readonly RELAY=$(mktemp -u relay-XXXXXXXX)
@@ -194,15 +196,21 @@ test_remote_ip()
 
 send_mcast_torture4()
 {
-	ip netns exec "${SOURCE}" bash -c \
-		'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001'
+	for i in `seq 10`; do
+		ip netns exec "${SOURCE}" bash -c \
+		   'cat /dev/urandom | head -c 100M | nc -w 1 -u 239.0.0.1 4001'
+		echo -n "."
+	done
 }
 
 
 send_mcast_torture6()
 {
-	ip netns exec "${SOURCE}" bash -c \
-		'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001'
+	for i in `seq 10`; do
+		ip netns exec "${SOURCE}" bash -c \
+		   'cat /dev/urandom | head -c 100M | nc -w 1 -u ff0e::5:6 6001'
+		echo -n "."
+	done
 }
 
 check_features()
@@ -240,14 +248,15 @@ test_ipv6_forward()
 
 send_mcast4()
 {
-	sleep 2
+	sleep 5
+	wait_local_port_listen ${LISTENER} 4000 udp
 	ip netns exec "${SOURCE}" bash -c \
 		'printf "%s %128s" 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' &
 }
 
 send_mcast6()
 {
-	sleep 2
+	wait_local_port_listen ${LISTENER} 6000 udp
 	ip netns exec "${SOURCE}" bash -c \
 		'printf "%s %128s" 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' &
 }
@@ -278,10 +287,12 @@ wait $pid || err=$?
 if [ $err -eq 1 ]; then
 	ERR=1
 fi
+printf "TEST: %-50s" "IPv4 amt traffic forwarding torture"
 send_mcast_torture4
-printf "TEST: %-60s  [ OK ]\n" "IPv4 amt traffic forwarding torture"
+printf "  [ OK ]\n"
+printf "TEST: %-50s" "IPv6 amt traffic forwarding torture"
 send_mcast_torture6
-printf "TEST: %-60s  [ OK ]\n" "IPv6 amt traffic forwarding torture"
+printf "  [ OK ]\n"
 sleep 5
 if [ "${ERR}" -eq 1 ]; then
         echo "Some tests failed." >&2
diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
index 92eb880c52f2..00758f00efbf 100755
--- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
+++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
@@ -75,7 +75,7 @@ setup_v4() {
     ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
     if [ $? -ne 0 ]; then
         cleanup_v4
-        echo "failed"
+        echo "failed; is the system using MACAddressPolicy=persistent ?"
         exit 1
     fi
 
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
index f366cadbc5e8..d9e5b967f815 100755
--- a/tools/testing/selftests/net/bareudp.sh
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 # Test various bareudp tunnel configurations.
@@ -106,26 +106,16 @@
 # |                                                                       |
 # +-----------------------------------------------------------------------+
 
+. ./lib.sh
+
 ERR=4 # Return 4 by default, which is the SKIP code for kselftest
 PING6="ping"
 PAUSE_ON_FAIL="no"
 
-readonly NS0=$(mktemp -u ns0-XXXXXXXX)
-readonly NS1=$(mktemp -u ns1-XXXXXXXX)
-readonly NS2=$(mktemp -u ns2-XXXXXXXX)
-readonly NS3=$(mktemp -u ns3-XXXXXXXX)
-
 # Exit the script after having removed the network namespaces it created
-#
-# Parameters:
-#
-#   * The list of network namespaces to delete before exiting.
-#
 exit_cleanup()
 {
-	for ns in "$@"; do
-		ip netns delete "${ns}" 2>/dev/null || true
-	done
+	cleanup_all_ns
 
 	if [ "${ERR}" -eq 4 ]; then
 		echo "Error: Setting up the testing environment failed." >&2
@@ -140,17 +130,7 @@ exit_cleanup()
 # namespaces created by this script are deleted.
 create_namespaces()
 {
-	ip netns add "${NS0}" || exit_cleanup
-	ip netns add "${NS1}" || exit_cleanup "${NS0}"
-	ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}"
-	ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}"
-}
-
-# The trap function handler
-#
-exit_cleanup_all()
-{
-	exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}"
+	setup_ns NS0 NS1 NS2 NS3 || exit_cleanup
 }
 
 # Configure a network interface using a host route
@@ -188,10 +168,6 @@ iface_config()
 #
 setup_underlay()
 {
-	for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do
-		ip -netns "${ns}" link set dev lo up
-	done;
-
 	ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}"
 	ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}"
 	ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}"
@@ -234,14 +210,6 @@ setup_overlay_ipv4()
 	ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1
 	ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10
 	ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33
-
-	# The intermediate namespaces don't have routes for the reverse path,
-	# as it will be handled by tc. So we need to ensure that rp_filter is
-	# not going to block the traffic.
-	ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0
-	ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0
 }
 
 setup_overlay_ipv6()
@@ -521,13 +489,10 @@ done
 
 check_features
 
-# Create namespaces before setting up the exit trap.
-# Otherwise, exit_cleanup_all() could delete namespaces that were not created
-# by this script.
-create_namespaces
-
 set -e
-trap exit_cleanup_all EXIT
+trap exit_cleanup EXIT
+
+create_namespaces
 
 setup_underlay
 setup_overlay_ipv4
diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile
new file mode 100644
index 000000000000..2546c45e42f7
--- /dev/null
+++ b/tools/testing/selftests/net/bench/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_MODS_DIR := page_pool
+
+TEST_PROGS += test_bench_page_pool.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile
new file mode 100644
index 000000000000..0549a16ba275
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/Makefile
@@ -0,0 +1,17 @@
+BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+obj-m	+= bench_page_pool.o
+bench_page_pool-y += bench_page_pool_simple.o time_bench.o
+
+all:
+	+$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules
+
+clean:
+	+$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean
diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
new file mode 100644
index 000000000000..cb6468adbda4
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmark module for page_pool.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/interrupt.h>
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <net/page_pool/helpers.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+#define MY_POOL_SIZE 1024
+
+/* Makes tests selectable. Useful for perf-record to analyze a single test.
+ * Hint: Bash shells support writing binary number like: $((2#101010)
+ *
+ * # modprobe bench_page_pool_simple run_flags=$((2#100))
+ */
+static unsigned long run_flags = 0xFFFFFFFF;
+module_param(run_flags, ulong, 0);
+MODULE_PARM_DESC(run_flags, "Limit which bench test that runs");
+
+/* Count the bit number from the enum */
+enum benchmark_bit {
+	bit_run_bench_baseline,
+	bit_run_bench_no_softirq01,
+	bit_run_bench_no_softirq02,
+	bit_run_bench_no_softirq03,
+};
+
+#define bit(b)		(1 << (b))
+#define enabled(b)	((run_flags & (bit(b))))
+
+/* notice time_bench is limited to U32_MAX nr loops */
+static unsigned long loops = 10000000;
+module_param(loops, ulong, 0);
+MODULE_PARM_DESC(loops, "Specify loops bench will run");
+
+/* Timing at the nanosec level, we need to know the overhead
+ * introduced by the for loop itself
+ */
+static int time_bench_for_loop(struct time_bench_record *rec, void *data)
+{
+	uint64_t loops_cnt = 0;
+	int i;
+
+	time_bench_start(rec);
+	/** Loop to measure **/
+	for (i = 0; i < rec->loops; i++) {
+		loops_cnt++;
+		barrier(); /* avoid compiler to optimize this loop */
+	}
+	time_bench_stop(rec, loops_cnt);
+	return loops_cnt;
+}
+
+static int time_bench_atomic_inc(struct time_bench_record *rec, void *data)
+{
+	uint64_t loops_cnt = 0;
+	atomic_t cnt;
+	int i;
+
+	atomic_set(&cnt, 0);
+
+	time_bench_start(rec);
+	/** Loop to measure **/
+	for (i = 0; i < rec->loops; i++) {
+		atomic_inc(&cnt);
+		barrier(); /* avoid compiler to optimize this loop */
+	}
+	loops_cnt = atomic_read(&cnt);
+	time_bench_stop(rec, loops_cnt);
+	return loops_cnt;
+}
+
+/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum
+ * overhead of taking+releasing a spinlock, to know the cycles that can be saved
+ * by e.g. amortizing this via bulking.
+ */
+static int time_bench_lock(struct time_bench_record *rec, void *data)
+{
+	uint64_t loops_cnt = 0;
+	spinlock_t lock;
+	int i;
+
+	spin_lock_init(&lock);
+
+	time_bench_start(rec);
+	/** Loop to measure **/
+	for (i = 0; i < rec->loops; i++) {
+		spin_lock(&lock);
+		loops_cnt++;
+		barrier(); /* avoid compiler to optimize this loop */
+		spin_unlock(&lock);
+	}
+	time_bench_stop(rec, loops_cnt);
+	return loops_cnt;
+}
+
+/* Helper for filling some page's into ptr_ring */
+static void pp_fill_ptr_ring(struct page_pool *pp, int elems)
+{
+	/* GFP_ATOMIC needed when under run softirq */
+	gfp_t gfp_mask = GFP_ATOMIC;
+	struct page **array;
+	int i;
+
+	array = kcalloc(elems, sizeof(struct page *), gfp_mask);
+
+	for (i = 0; i < elems; i++)
+		array[i] = page_pool_alloc_pages(pp, gfp_mask);
+	for (i = 0; i < elems; i++)
+		page_pool_put_page(pp, array[i], -1, false);
+
+	kfree(array);
+}
+
+enum test_type { type_fast_path, type_ptr_ring, type_page_allocator };
+
+/* Depends on compile optimizing this function */
+static int time_bench_page_pool(struct time_bench_record *rec, void *data,
+				enum test_type type, const char *func)
+{
+	uint64_t loops_cnt = 0;
+	gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */
+	int i, err;
+
+	struct page_pool *pp;
+	struct page *page;
+
+	struct page_pool_params pp_params = {
+		.order = 0,
+		.flags = 0,
+		.pool_size = MY_POOL_SIZE,
+		.nid = NUMA_NO_NODE,
+		.dev = NULL, /* Only use for DMA mapping */
+		.dma_dir = DMA_BIDIRECTIONAL,
+	};
+
+	pp = page_pool_create(&pp_params);
+	if (IS_ERR(pp)) {
+		err = PTR_ERR(pp);
+		pr_warn("%s: Error(%d) creating page_pool\n", func, err);
+		goto out;
+	}
+	pp_fill_ptr_ring(pp, 64);
+
+	if (in_serving_softirq())
+		pr_warn("%s(): in_serving_softirq fast-path\n", func);
+	else
+		pr_warn("%s(): Cannot use page_pool fast-path\n", func);
+
+	time_bench_start(rec);
+	/** Loop to measure **/
+	for (i = 0; i < rec->loops; i++) {
+		/* Common fast-path alloc that depend on in_serving_softirq() */
+		page = page_pool_alloc_pages(pp, gfp_mask);
+		if (!page)
+			break;
+		loops_cnt++;
+		barrier(); /* avoid compiler to optimize this loop */
+
+		/* The benchmarks purpose it to test different return paths.
+		 * Compiler should inline optimize other function calls out
+		 */
+		if (type == type_fast_path) {
+			/* Fast-path recycling e.g. XDP_DROP use-case */
+			page_pool_recycle_direct(pp, page);
+
+		} else if (type == type_ptr_ring) {
+			/* Normal return path */
+			page_pool_put_page(pp, page, -1, false);
+
+		} else if (type == type_page_allocator) {
+			/* Test if not pages are recycled, but instead
+			 * returned back into systems page allocator
+			 */
+			get_page(page); /* cause no-recycling */
+			page_pool_put_page(pp, page, -1, false);
+			put_page(page);
+		} else {
+			BUILD_BUG();
+		}
+	}
+	time_bench_stop(rec, loops_cnt);
+out:
+	page_pool_destroy(pp);
+	return loops_cnt;
+}
+
+static int time_bench_page_pool01_fast_path(struct time_bench_record *rec,
+					    void *data)
+{
+	return time_bench_page_pool(rec, data, type_fast_path, __func__);
+}
+
+static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec,
+					   void *data)
+{
+	return time_bench_page_pool(rec, data, type_ptr_ring, __func__);
+}
+
+static int time_bench_page_pool03_slow(struct time_bench_record *rec,
+				       void *data)
+{
+	return time_bench_page_pool(rec, data, type_page_allocator, __func__);
+}
+
+static int run_benchmark_tests(void)
+{
+	uint32_t nr_loops = loops;
+
+	/* Baseline tests */
+	if (enabled(bit_run_bench_baseline)) {
+		time_bench_loop(nr_loops * 10, 0, "for_loop", NULL,
+				time_bench_for_loop);
+		time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL,
+				time_bench_atomic_inc);
+		time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock);
+	}
+
+	/* This test cannot activate correct code path, due to no-softirq ctx */
+	if (enabled(bit_run_bench_no_softirq01))
+		time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL,
+				time_bench_page_pool01_fast_path);
+	if (enabled(bit_run_bench_no_softirq02))
+		time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL,
+				time_bench_page_pool02_ptr_ring);
+	if (enabled(bit_run_bench_no_softirq03))
+		time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL,
+				time_bench_page_pool03_slow);
+
+	return 0;
+}
+
+static int __init bench_page_pool_simple_module_init(void)
+{
+	if (verbose)
+		pr_info("Loaded\n");
+
+	if (loops > U32_MAX) {
+		pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops,
+		       U32_MAX);
+		return -ECHRNG;
+	}
+
+	run_benchmark_tests();
+
+	return 0;
+}
+module_init(bench_page_pool_simple_module_init);
+
+static void __exit bench_page_pool_simple_module_exit(void)
+{
+	if (verbose)
+		pr_info("Unloaded\n");
+}
+module_exit(bench_page_pool_simple_module_exit);
+
+MODULE_DESCRIPTION("Benchmark of page_pool simple cases");
+MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c
new file mode 100644
index 000000000000..073bb36ec5f2
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/time.h>
+
+#include <linux/perf_event.h> /* perf_event_create_kernel_counter() */
+
+/* For concurrency testing */
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+
+/** TSC (Time-Stamp Counter) based **
+ * See: linux/time_bench.h
+ *  tsc_start_clock() and tsc_stop_clock()
+ */
+
+/** Wall-clock based **
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ */
+#define PERF_FORMAT                                                            \
+	(PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \
+	 PERF_FORMAT_TOTAL_TIME_RUNNING)
+
+struct raw_perf_event {
+	uint64_t config; /* event */
+	uint64_t config1; /* umask */
+	struct perf_event *save;
+	char *desc;
+};
+
+/* if HT is enable a maximum of 4 events (5 if one is instructions
+ * retired can be specified, if HT is disabled a maximum of 8 (9 if
+ * one is instructions retired) can be specified.
+ *
+ * From Table 19-1. Architectural Performance Events
+ * Architectures Software Developer’s Manual Volume 3: System Programming
+ * Guide
+ */
+struct raw_perf_event perf_events[] = {
+	{ 0x3c, 0x00, NULL, "Unhalted CPU Cycles" },
+	{ 0xc0, 0x00, NULL, "Instruction Retired" }
+};
+
+#define NUM_EVTS (ARRAY_SIZE(perf_events))
+
+/* WARNING: PMU config is currently broken!
+ */
+bool time_bench_PMU_config(bool enable)
+{
+	int i;
+	struct perf_event_attr perf_conf;
+	struct perf_event *perf_event;
+	int cpu;
+
+	preempt_disable();
+	cpu = smp_processor_id();
+	pr_info("DEBUG: cpu:%d\n", cpu);
+	preempt_enable();
+
+	memset(&perf_conf, 0, sizeof(struct perf_event_attr));
+	perf_conf.type           = PERF_TYPE_RAW;
+	perf_conf.size           = sizeof(struct perf_event_attr);
+	perf_conf.read_format    = PERF_FORMAT;
+	perf_conf.pinned         = 1;
+	perf_conf.exclude_user   = 1; /* No userspace events */
+	perf_conf.exclude_kernel = 0; /* Only kernel events */
+
+	for (i = 0; i < NUM_EVTS; i++) {
+		perf_conf.disabled = enable;
+		//perf_conf.disabled = (i == 0) ? 1 : 0;
+		perf_conf.config   = perf_events[i].config;
+		perf_conf.config1  = perf_events[i].config1;
+		if (verbose)
+			pr_info("%s() enable PMU counter: %s\n",
+				__func__, perf_events[i].desc);
+		perf_event = perf_event_create_kernel_counter(&perf_conf, cpu,
+							      NULL /* task */,
+							      NULL /* overflow_handler*/,
+							      NULL /* context */);
+		if (perf_event) {
+			perf_events[i].save = perf_event;
+			pr_info("%s():DEBUG perf_event success\n", __func__);
+
+			perf_event_enable(perf_event);
+		} else {
+			pr_info("%s():DEBUG perf_event is NULL\n", __func__);
+		}
+	}
+
+	return true;
+}
+
+/** Generic functions **
+ */
+
+/* Calculate stats, store results in record */
+bool time_bench_calc_stats(struct time_bench_record *rec)
+{
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+	uint64_t ns_per_call_tmp_rem = 0;
+	uint32_t ns_per_call_remainder = 0;
+	uint64_t pmc_ipc_tmp_rem = 0;
+	uint32_t pmc_ipc_remainder = 0;
+	uint32_t pmc_ipc_div = 0;
+	uint32_t invoked_cnt_precision = 0;
+	uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */
+
+	if (rec->flags & TIME_BENCH_LOOP) {
+		if (rec->invoked_cnt < 1000) {
+			pr_err("ERR: need more(>1000) loops(%llu) for timing\n",
+			       rec->invoked_cnt);
+			return false;
+		}
+		if (rec->invoked_cnt > ((1ULL << 32) - 1)) {
+			/* div_u64_rem() can only support div with 32bit*/
+			pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n",
+			       rec->invoked_cnt);
+			return false;
+		}
+		invoked_cnt = (uint32_t)rec->invoked_cnt;
+	}
+
+	/* TSC (Time-Stamp Counter) records */
+	if (rec->flags & TIME_BENCH_TSC) {
+		rec->tsc_interval = rec->tsc_stop - rec->tsc_start;
+		if (rec->tsc_interval == 0) {
+			pr_err("ABORT: timing took ZERO TSC time\n");
+			return false;
+		}
+		/* Calculate stats */
+		if (rec->flags & TIME_BENCH_LOOP)
+			rec->tsc_cycles = rec->tsc_interval / invoked_cnt;
+		else
+			rec->tsc_cycles = rec->tsc_interval;
+	}
+
+	/* Wall-clock time calc */
+	if (rec->flags & TIME_BENCH_WALLCLOCK) {
+		rec->time_start = rec->ts_start.tv_nsec +
+				  (NANOSEC_PER_SEC * rec->ts_start.tv_sec);
+		rec->time_stop = rec->ts_stop.tv_nsec +
+				 (NANOSEC_PER_SEC * rec->ts_stop.tv_sec);
+		rec->time_interval = rec->time_stop - rec->time_start;
+		if (rec->time_interval == 0) {
+			pr_err("ABORT: timing took ZERO wallclock time\n");
+			return false;
+		}
+		/* Calculate stats */
+		/*** Division in kernel it tricky ***/
+		/* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */
+		/* remainder only correct because NANOSEC_PER_SEC is 10^9 */
+		rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC,
+					    &rec->time_sec_remainder);
+		//TODO: use existing struct timespec records instead of div?
+
+		if (rec->flags & TIME_BENCH_LOOP) {
+			/*** Division in kernel it tricky ***/
+			/* Orig: ns = ((double)time_interval / invoked_cnt); */
+			/* First get quotient */
+			rec->ns_per_call_quotient =
+				div_u64_rem(rec->time_interval, invoked_cnt,
+					    &ns_per_call_remainder);
+			/* Now get decimals .xxx precision (incorrect roundup)*/
+			ns_per_call_tmp_rem = ns_per_call_remainder;
+			invoked_cnt_precision = invoked_cnt / 1000;
+			if (invoked_cnt_precision > 0) {
+				rec->ns_per_call_decimal =
+					div_u64_rem(ns_per_call_tmp_rem,
+						    invoked_cnt_precision,
+						    &ns_per_call_remainder);
+			}
+		}
+	}
+
+	/* Performance Monitor Unit (PMU) counters */
+	if (rec->flags & TIME_BENCH_PMU) {
+		//FIXME: Overflow handling???
+		rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start;
+		rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start;
+
+		/* Calc Instruction Per Cycle (IPC) */
+		/* First get quotient */
+		rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk,
+						    &pmc_ipc_remainder);
+		/* Now get decimals .xxx precision (incorrect roundup)*/
+		pmc_ipc_tmp_rem = pmc_ipc_remainder;
+		pmc_ipc_div = rec->pmc_clk / 1000;
+		if (pmc_ipc_div > 0) {
+			rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem,
+							   pmc_ipc_div,
+							   &pmc_ipc_remainder);
+		}
+	}
+
+	return true;
+}
+
+/* Generic function for invoking a loop function and calculating
+ * execution time stats.  The function being called/timed is assumed
+ * to perform a tight loop, and update the timing record struct.
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+		     int (*func)(struct time_bench_record *record, void *data))
+{
+	struct time_bench_record rec;
+
+	/* Setup record */
+	memset(&rec, 0, sizeof(rec)); /* zero func might not update all */
+	rec.version_abi = 1;
+	rec.loops       = loops;
+	rec.step        = step;
+	rec.flags       = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK);
+
+	/*** Loop function being timed ***/
+	if (!func(&rec, data)) {
+		pr_err("ABORT: function being timed failed\n");
+		return false;
+	}
+
+	if (rec.invoked_cnt < loops)
+		pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n",
+			rec.invoked_cnt, loops);
+
+	/* Calculate stats */
+	time_bench_calc_stats(&rec);
+
+	pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+		txt, rec.tsc_cycles, rec.ns_per_call_quotient,
+		rec.ns_per_call_decimal, rec.step, rec.time_sec,
+		rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt,
+		rec.tsc_interval);
+	if (rec.flags & TIME_BENCH_PMU)
+		pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n",
+			txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient,
+			rec.pmc_ipc_decimal);
+	return true;
+}
+
+/* Function getting invoked by kthread */
+static int invoke_test_on_cpu_func(void *private)
+{
+	struct time_bench_cpu *cpu = private;
+	struct time_bench_sync *sync = cpu->sync;
+	cpumask_t newmask = CPU_MASK_NONE;
+	void *data = cpu->data;
+
+	/* Restrict CPU */
+	cpumask_set_cpu(cpu->rec.cpu, &newmask);
+	set_cpus_allowed_ptr(current, &newmask);
+
+	/* Synchronize start of concurrency test */
+	atomic_inc(&sync->nr_tests_running);
+	wait_for_completion(&sync->start_event);
+
+	/* Start benchmark function */
+	if (!cpu->bench_func(&cpu->rec, data)) {
+		pr_err("ERROR: function being timed failed on CPU:%d(%d)\n",
+		       cpu->rec.cpu, smp_processor_id());
+	} else {
+		if (verbose)
+			pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu,
+				smp_processor_id());
+	}
+	cpu->did_bench_run = true;
+
+	/* End test */
+	atomic_dec(&sync->nr_tests_running);
+	/*  Wait for kthread_stop() telling us to stop */
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	}
+	__set_current_state(TASK_RUNNING);
+	return 0;
+}
+
+void time_bench_print_stats_cpumask(const char *desc,
+				    struct time_bench_cpu *cpu_tasks,
+				    const struct cpumask *mask)
+{
+	uint64_t average = 0;
+	int cpu;
+	int step = 0;
+	struct sum {
+		uint64_t tsc_cycles;
+		int records;
+	} sum = { 0 };
+
+	/* Get stats */
+	for_each_cpu(cpu, mask) {
+		struct time_bench_cpu *c = &cpu_tasks[cpu];
+		struct time_bench_record *rec = &c->rec;
+
+		/* Calculate stats */
+		time_bench_calc_stats(rec);
+
+		pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+			desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient,
+			rec->ns_per_call_decimal, rec->step, rec->time_sec,
+			rec->time_sec_remainder, rec->time_interval,
+			rec->invoked_cnt, rec->tsc_interval);
+
+		/* Collect average */
+		sum.records++;
+		sum.tsc_cycles += rec->tsc_cycles;
+		step = rec->step;
+	}
+
+	if (sum.records) /* avoid div-by-zero */
+		average = sum.tsc_cycles / sum.records;
+	pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc,
+		average, sum.records, step);
+}
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+			       const struct cpumask *mask, /* Support masking outsome CPUs*/
+			       struct time_bench_sync *sync,
+			       struct time_bench_cpu *cpu_tasks,
+			       int (*func)(struct time_bench_record *record, void *data))
+{
+	int cpu, running = 0;
+
+	if (verbose) // DEBUG
+		pr_warn("%s() Started on CPU:%d\n", __func__,
+			smp_processor_id());
+
+	/* Reset sync conditions */
+	atomic_set(&sync->nr_tests_running, 0);
+	init_completion(&sync->start_event);
+
+	/* Spawn off jobs on all CPUs */
+	for_each_cpu(cpu, mask) {
+		struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+		running++;
+		c->sync = sync; /* Send sync variable along */
+		c->data = data; /* Send opaque along */
+
+		/* Init benchmark record */
+		memset(&c->rec, 0, sizeof(struct time_bench_record));
+		c->rec.version_abi = 1;
+		c->rec.loops       = loops;
+		c->rec.step        = step;
+		c->rec.flags       = (TIME_BENCH_LOOP | TIME_BENCH_TSC |
+				      TIME_BENCH_WALLCLOCK);
+		c->rec.cpu = cpu;
+		c->bench_func = func;
+		c->task = kthread_run(invoke_test_on_cpu_func, c,
+				      "time_bench%d", cpu);
+		if (IS_ERR(c->task)) {
+			pr_err("%s(): Failed to start test func\n", __func__);
+			return; /* Argh, what about cleanup?! */
+		}
+	}
+
+	/* Wait until all processes are running */
+	while (atomic_read(&sync->nr_tests_running) < running) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(10);
+	}
+	/* Kick off all CPU concurrently on completion event */
+	complete_all(&sync->start_event);
+
+	/* Wait for CPUs to finish */
+	while (atomic_read(&sync->nr_tests_running)) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(10);
+	}
+
+	/* Stop the kthreads */
+	for_each_cpu(cpu, mask) {
+		struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+		kthread_stop(c->task);
+	}
+
+	if (verbose) // DEBUG - happens often, finish on another CPU
+		pr_warn("%s() Finished on CPU:%d\n", __func__,
+			smp_processor_id());
+}
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h
new file mode 100644
index 000000000000..e113fcf341dc
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ *  for licensing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_TIME_BENCH_H
+#define _LINUX_TIME_BENCH_H
+
+/* Main structure used for recording a benchmark run */
+struct time_bench_record {
+	uint32_t version_abi;
+	uint32_t loops;		/* Requested loop invocations */
+	uint32_t step;		/* option for e.g. bulk invocations */
+
+	uint32_t flags;		/* Measurements types enabled */
+#define TIME_BENCH_LOOP		BIT(0)
+#define TIME_BENCH_TSC		BIT(1)
+#define TIME_BENCH_WALLCLOCK	BIT(2)
+#define TIME_BENCH_PMU		BIT(3)
+
+	uint32_t cpu; /* Used when embedded in time_bench_cpu */
+
+	/* Records */
+	uint64_t invoked_cnt;	/* Returned actual invocations */
+	uint64_t tsc_start;
+	uint64_t tsc_stop;
+	struct timespec64 ts_start;
+	struct timespec64 ts_stop;
+	/* PMU counters for instruction and cycles
+	 * instructions counter including pipelined instructions
+	 */
+	uint64_t pmc_inst_start;
+	uint64_t pmc_inst_stop;
+	/* CPU unhalted clock counter */
+	uint64_t pmc_clk_start;
+	uint64_t pmc_clk_stop;
+
+	/* Result records */
+	uint64_t tsc_interval;
+	uint64_t time_start, time_stop, time_interval; /* in nanosec */
+	uint64_t pmc_inst, pmc_clk;
+
+	/* Derived result records */
+	uint64_t tsc_cycles; // +decimal?
+	uint64_t ns_per_call_quotient, ns_per_call_decimal;
+	uint64_t time_sec;
+	uint32_t time_sec_remainder;
+	uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */
+};
+
+/* For synchronizing parallel CPUs to run concurrently */
+struct time_bench_sync {
+	atomic_t nr_tests_running;
+	struct completion start_event;
+};
+
+/* Keep track of CPUs executing our bench function.
+ *
+ * Embed a time_bench_record for storing info per cpu
+ */
+struct time_bench_cpu {
+	struct time_bench_record rec;
+	struct time_bench_sync *sync; /* back ptr */
+	struct task_struct *task;
+	/* "data" opaque could have been placed in time_bench_sync,
+	 * but to avoid any false sharing, place it per CPU
+	 */
+	void *data;
+	/* Support masking outsome CPUs, mark if it ran */
+	bool did_bench_run;
+	/* int cpu; // note CPU stored in time_bench_record */
+	int (*bench_func)(struct time_bench_record *record, void *data);
+};
+
+/*
+ * Below TSC assembler code is not compatible with other archs, and
+ * can also fail on guests if cpu-flags are not correct.
+ *
+ * The way TSC reading is used, many iterations, does not require as
+ * high accuracy as described below (in Intel Doc #324264).
+ *
+ * Considering changing to use get_cycles() (#include <asm/timex.h>).
+ */
+
+/** TSC (Time-Stamp Counter) based **
+ * Recommend reading, to understand details of reading TSC accurately:
+ *  Intel Doc #324264, "How to Benchmark Code Execution Times on Intel"
+ *
+ * Consider getting exclusive ownership of CPU by using:
+ *   unsigned long flags;
+ *   preempt_disable();
+ *   raw_local_irq_save(flags);
+ *   _your_code_
+ *   raw_local_irq_restore(flags);
+ *   preempt_enable();
+ *
+ * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx"
+ *  RDTSC only change "%rax" and "%rdx" but
+ *  CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx)
+ */
+static __always_inline uint64_t tsc_start_clock(void)
+{
+	/* See: Intel Doc #324264 */
+	unsigned int hi, lo;
+
+	asm volatile("CPUID\n\t"
+		     "RDTSC\n\t"
+		     "mov %%edx, %0\n\t"
+		     "mov %%eax, %1\n\t"
+		     : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+	//FIXME: on 32bit use clobbered %eax + %edx
+	return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+static __always_inline uint64_t tsc_stop_clock(void)
+{
+	/* See: Intel Doc #324264 */
+	unsigned int hi, lo;
+
+	asm volatile("RDTSCP\n\t"
+		     "mov %%edx, %0\n\t"
+		     "mov %%eax, %1\n\t"
+		     "CPUID\n\t"
+		     : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+	return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+/** Wall-clock based **
+ *
+ * use: getnstimeofday()
+ *  getnstimeofday(&rec->ts_start);
+ *  getnstimeofday(&rec->ts_stop);
+ *
+ * API changed see: Documentation/core-api/timekeeping.rst
+ *  https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday
+ *
+ * We should instead use: ktime_get_real_ts64() is a direct
+ *  replacement, but consider using monotonic time (ktime_get_ts64())
+ *  and/or a ktime_t based interface (ktime_get()/ktime_get_real()).
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ *
+ * Needed for calculating: Instructions Per Cycle (IPC)
+ * - The IPC number tell how efficient the CPU pipelining were
+ */
+//lookup: perf_event_create_kernel_counter()
+
+bool time_bench_PMU_config(bool enable);
+
+/* Raw reading via rdpmc() using fixed counters
+ *
+ * From: https://github.com/andikleen/simple-pmu
+ */
+enum {
+	FIXED_SELECT = (1U << 30), /* == 0x40000000 */
+	FIXED_INST_RETIRED_ANY = 0,
+	FIXED_CPU_CLK_UNHALTED_CORE = 1,
+	FIXED_CPU_CLK_UNHALTED_REF = 2,
+};
+
+static __always_inline unsigned int long long p_rdpmc(unsigned int in)
+{
+	unsigned int d, a;
+
+	asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory");
+	return ((unsigned long long)d << 32) | a;
+}
+
+/* These PMU counter needs to be enabled, but I don't have the
+ * configure code implemented.  My current hack is running:
+ *  sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko
+ */
+/* Reading all pipelined instruction */
+static __always_inline unsigned long long pmc_inst(void)
+{
+	return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY);
+}
+
+/* Reading CPU clock cycles */
+static __always_inline unsigned long long pmc_clk(void)
+{
+	return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE);
+}
+
+/* Raw reading via MSR rdmsr() is likely wrong
+ * FIXME: How can I know which raw MSR registers are conf for what?
+ */
+#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */
+#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */
+#define MSR_IA32_PCM2 0x400000C3
+static inline uint64_t msr_inst(unsigned long long *msr_result)
+{
+	return rdmsrq_safe(MSR_IA32_PCM0, msr_result);
+}
+
+/** Generic functions **
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+		     int (*func)(struct time_bench_record *rec, void *data));
+bool time_bench_calc_stats(struct time_bench_record *rec);
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+			       const struct cpumask *mask, /* Support masking outsome CPUs*/
+			       struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks,
+			       int (*func)(struct time_bench_record *record, void *data));
+void time_bench_print_stats_cpumask(const char *desc,
+				    struct time_bench_cpu *cpu_tasks,
+				    const struct cpumask *mask);
+
+//FIXME: use rec->flags to select measurement, should be MACRO
+static __always_inline void time_bench_start(struct time_bench_record *rec)
+{
+	//getnstimeofday(&rec->ts_start);
+	ktime_get_real_ts64(&rec->ts_start);
+	if (rec->flags & TIME_BENCH_PMU) {
+		rec->pmc_inst_start = pmc_inst();
+		rec->pmc_clk_start = pmc_clk();
+	}
+	rec->tsc_start = tsc_start_clock();
+}
+
+static __always_inline void time_bench_stop(struct time_bench_record *rec,
+					    uint64_t invoked_cnt)
+{
+	rec->tsc_stop = tsc_stop_clock();
+	if (rec->flags & TIME_BENCH_PMU) {
+		rec->pmc_inst_stop = pmc_inst();
+		rec->pmc_clk_stop = pmc_clk();
+	}
+	//getnstimeofday(&rec->ts_stop);
+	ktime_get_real_ts64(&rec->ts_stop);
+	rec->invoked_cnt = invoked_cnt;
+}
+
+#endif /* _LINUX_TIME_BENCH_H */
diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
new file mode 100755
index 000000000000..7b8b18cfedce
--- /dev/null
+++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+set -e
+
+DRIVER="./page_pool/bench_page_pool.ko"
+result=""
+
+function run_test()
+{
+	rmmod "bench_page_pool.ko" || true
+	insmod $DRIVER > /dev/null 2>&1
+	result=$(dmesg | tail -10)
+	echo "$result"
+
+	echo
+	echo "Fast path results:"
+	echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+	echo
+	echo "ptr_ring results:"
+	echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+	echo
+	echo "slow path results:"
+	echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+}
+
+run_test
+
+exit 0
diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c
index 57ff67a3751e..da04b0b19b73 100644
--- a/tools/testing/selftests/net/bind_bhash.c
+++ b/tools/testing/selftests/net/bind_bhash.c
@@ -75,7 +75,7 @@ static void *setup(void *arg)
 	int *array = (int *)arg;
 
 	for (i = 0; i < MAX_CONNECTIONS; i++) {
-		sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+		sock_fd = bind_socket(SO_REUSEPORT, setup_addr);
 		if (sock_fd < 0) {
 			ret = sock_fd;
 			pthread_exit(&ret);
@@ -103,7 +103,7 @@ int main(int argc, const char *argv[])
 
 	setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4;
 
-	listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+	listener_fd = bind_socket(SO_REUSEPORT, setup_addr);
 	if (listen(listener_fd, 100) < 0) {
 		perror("listen failed");
 		return -1;
diff --git a/tools/testing/selftests/net/bind_timewait.c b/tools/testing/selftests/net/bind_timewait.c
index cb9fdf51ea59..40126f9b901e 100644
--- a/tools/testing/selftests/net/bind_timewait.c
+++ b/tools/testing/selftests/net/bind_timewait.c
@@ -4,7 +4,7 @@
 #include <sys/socket.h>
 #include <netinet/in.h>
 
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 FIXTURE(bind_timewait)
 {
diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
index b7b54d646b93..7d11548b2c61 100644
--- a/tools/testing/selftests/net/bind_wildcard.c
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -4,7 +4,7 @@
 #include <sys/socket.h>
 #include <netinet/in.h>
 
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 static const __u32 in4addr_any = INADDR_ANY;
 static const __u32 in4addr_loopback = INADDR_LOOPBACK;
diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py
index fd0d959914e4..c856d266c8f3 100755
--- a/tools/testing/selftests/net/bpf_offload.py
+++ b/tools/testing/selftests/net/bpf_offload.py
@@ -184,8 +184,8 @@ def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True):
         progs = [ p for p in progs if not p['orphaned'] ]
     if expected is not None:
         if len(progs) != expected:
-            fail(True, "%d BPF programs loaded, expected %d" %
-                 (len(progs), expected))
+            fail(True, "%d BPF programs loaded, expected %d\nLoaded Progs:\n%s" %
+                 (len(progs), expected, pp.pformat(progs)))
     return progs
 
 def bpftool_map_list(expected=None, ns=""):
@@ -207,9 +207,11 @@ def bpftool_prog_list_wait(expected=0, n_retry=20):
     raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
 
 def bpftool_map_list_wait(expected=0, n_retry=20, ns=""):
+    nmaps = None
     for i in range(n_retry):
         maps = bpftool_map_list(ns=ns)
-        if len(maps) == expected:
+        nmaps = len(maps)
+        if nmaps == expected:
             return maps
         time.sleep(0.05)
     raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
@@ -710,6 +712,7 @@ _, base_maps = bpftool("map")
 base_map_names = [
     'pid_iter.rodata', # created on each bpftool invocation
     'libbpf_det_bind', # created on each bpftool invocation
+    'libbpf_global',
 ]
 
 # Check netdevsim
diff --git a/tools/testing/selftests/net/bridge_stp_mode.sh b/tools/testing/selftests/net/bridge_stp_mode.sh
new file mode 100755
index 000000000000..0c81fd029d79
--- /dev/null
+++ b/tools/testing/selftests/net/bridge_stp_mode.sh
@@ -0,0 +1,288 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# shellcheck disable=SC2034,SC2154,SC2317,SC2329
+#
+# Test for bridge STP mode selection (IFLA_BR_STP_MODE).
+#
+# Verifies that:
+# - stp_mode defaults to auto on new bridges
+# - stp_mode can be toggled between user, kernel, and auto
+# - stp_mode change is rejected while STP is active (-EBUSY)
+# - stp_mode user in a netns yields userspace STP (stp_state=2)
+# - stp_mode kernel forces kernel STP (stp_state=1)
+# - stp_mode auto preserves traditional fallback to kernel STP
+# - stp_mode and stp_state can be set atomically in one message
+# - stp_mode persists across STP disable/enable cycles
+
+source lib.sh
+
+require_command jq
+
+ALL_TESTS="
+	test_default_auto
+	test_set_modes
+	test_reject_change_while_stp_active
+	test_idempotent_mode_while_stp_active
+	test_user_mode_in_netns
+	test_kernel_mode
+	test_auto_mode
+	test_atomic_mode_and_state
+	test_mode_persistence
+"
+
+bridge_info_get()
+{
+	ip -n "$NS1" -d -j link show "$1" | \
+		jq -r ".[0].linkinfo.info_data.$2"
+}
+
+check_stp_mode()
+{
+	local br=$1; shift
+	local expected=$1; shift
+	local msg=$1; shift
+	local val
+
+	val=$(bridge_info_get "$br" stp_mode)
+	[ "$val" = "$expected" ]
+	check_err $? "$msg: expected $expected, got $val"
+}
+
+check_stp_state()
+{
+	local br=$1; shift
+	local expected=$1; shift
+	local msg=$1; shift
+	local val
+
+	val=$(bridge_info_get "$br" stp_state)
+	[ "$val" = "$expected" ]
+	check_err $? "$msg: expected $expected, got $val"
+}
+
+# Create a bridge in NS1, bring it up, and defer its deletion.
+bridge_create()
+{
+	ip -n "$NS1" link add "$1" type bridge
+	ip -n "$NS1" link set "$1" up
+	defer ip -n "$NS1" link del "$1"
+}
+
+setup_prepare()
+{
+	setup_ns NS1
+}
+
+cleanup()
+{
+	defer_scopes_cleanup
+	cleanup_all_ns
+}
+
+# Check that stp_mode defaults to auto when creating a bridge.
+test_default_auto()
+{
+	RET=0
+
+	ip -n "$NS1" link add br-test type bridge
+	defer ip -n "$NS1" link del br-test
+
+	check_stp_mode br-test auto "stp_mode default"
+
+	log_test "stp_mode defaults to auto"
+}
+
+# Test setting stp_mode to user, kernel, and back to auto.
+test_set_modes()
+{
+	RET=0
+
+	ip -n "$NS1" link add br-test type bridge
+	defer ip -n "$NS1" link del br-test
+
+	ip -n "$NS1" link set dev br-test type bridge stp_mode user
+	check_err $? "Failed to set stp_mode to user"
+	check_stp_mode br-test user "after set user"
+
+	ip -n "$NS1" link set dev br-test type bridge stp_mode kernel
+	check_err $? "Failed to set stp_mode to kernel"
+	check_stp_mode br-test kernel "after set kernel"
+
+	ip -n "$NS1" link set dev br-test type bridge stp_mode auto
+	check_err $? "Failed to set stp_mode to auto"
+	check_stp_mode br-test auto "after set auto"
+
+	log_test "stp_mode set user/kernel/auto"
+}
+
+# Verify that stp_mode cannot be changed while STP is active.
+test_reject_change_while_stp_active()
+{
+	RET=0
+
+	bridge_create br-test
+
+	ip -n "$NS1" link set dev br-test type bridge stp_mode kernel
+	check_err $? "Failed to set stp_mode to kernel"
+
+	ip -n "$NS1" link set dev br-test type bridge stp_state 1
+	check_err $? "Failed to enable STP"
+
+	# Changing stp_mode while STP is active should fail.
+	ip -n "$NS1" link set dev br-test type bridge stp_mode auto 2>/dev/null
+	check_fail $? "Changing stp_mode should fail while STP is active"
+
+	check_stp_mode br-test kernel "mode unchanged after rejected change"
+
+	# Disable STP, then change should succeed.
+	ip -n "$NS1" link set dev br-test type bridge stp_state 0
+	check_err $? "Failed to disable STP"
+
+	ip -n "$NS1" link set dev br-test type bridge stp_mode auto
+	check_err $? "Changing stp_mode should succeed after STP is disabled"
+
+	log_test "reject stp_mode change while STP is active"
+}
+
+# Verify that re-setting the same stp_mode while STP is active succeeds.
+test_idempotent_mode_while_stp_active()
+{
+	RET=0
+
+	bridge_create br-test
+
+	ip -n "$NS1" link set dev br-test type bridge stp_mode user stp_state 1
+	check_err $? "Failed to enable STP with user mode"
+
+	# Re-setting the same mode while STP is active should succeed.
+	ip -n "$NS1" link set dev br-test type bridge stp_mode user
+	check_err $? "Idempotent stp_mode set should succeed while STP is active"
+
+	check_stp_state br-test 2 "stp_state after idempotent set"
+
+	# Changing mode while disabling STP in the same message should succeed.
+	ip -n "$NS1" link set dev br-test type bridge stp_mode auto stp_state 0
+	check_err $? "Mode change with simultaneous STP disable should succeed"
+
+	check_stp_mode br-test auto "mode changed after disable+change"
+	check_stp_state br-test 0 "stp_state after disable+change"
+
+	log_test "idempotent and simultaneous mode change while STP active"
+}
+
+# Test that stp_mode user in a non-init netns yields userspace STP
+# (stp_state == 2). This is the key use case: userspace STP without
+# needing /sbin/bridge-stp or being in init_net.
+test_user_mode_in_netns()
+{
+	RET=0
+
+	bridge_create br-test
+
+	ip -n "$NS1" link set dev br-test type bridge stp_mode user
+	check_err $? "Failed to set stp_mode to user"
+
+	ip -n "$NS1" link set dev br-test type bridge stp_state 1
+	check_err $? "Failed to enable STP"
+
+	check_stp_state br-test 2 "stp_state with user mode"
+
+	log_test "stp_mode user in netns yields userspace STP"
+}
+
+# Test that stp_mode kernel forces kernel STP (stp_state == 1)
+# regardless of whether /sbin/bridge-stp exists.
+test_kernel_mode()
+{
+	RET=0
+
+	bridge_create br-test
+
+	ip -n "$NS1" link set dev br-test type bridge stp_mode kernel
+	check_err $? "Failed to set stp_mode to kernel"
+
+	ip -n "$NS1" link set dev br-test type bridge stp_state 1
+	check_err $? "Failed to enable STP"
+
+	check_stp_state br-test 1 "stp_state with kernel mode"
+
+	log_test "stp_mode kernel forces kernel STP"
+}
+
+# Test that stp_mode auto preserves traditional behavior: in a netns
+# (non-init_net), bridge-stp is not called and STP falls back to
+# kernel mode (stp_state == 1).
+test_auto_mode()
+{
+	RET=0
+
+	bridge_create br-test
+
+	# Auto mode is the default; enable STP in a netns.
+	ip -n "$NS1" link set dev br-test type bridge stp_state 1
+	check_err $? "Failed to enable STP"
+
+	# In a netns with auto mode, bridge-stp is skipped (init_net only),
+	# so STP should fall back to kernel mode (stp_state == 1).
+	check_stp_state br-test 1 "stp_state with auto mode in netns"
+
+	log_test "stp_mode auto preserves traditional behavior"
+}
+
+# Test that stp_mode and stp_state can be set in a single netlink
+# message. This is the intended atomic usage pattern.
+test_atomic_mode_and_state()
+{
+	RET=0
+
+	bridge_create br-test
+
+	# Set both stp_mode and stp_state in one command.
+	ip -n "$NS1" link set dev br-test type bridge stp_mode user stp_state 1
+	check_err $? "Failed to set stp_mode user and stp_state 1 atomically"
+
+	check_stp_state br-test 2 "stp_state after atomic set"
+
+	log_test "atomic stp_mode user + stp_state 1 in single message"
+}
+
+# Test that stp_mode persists across STP disable/enable cycles.
+test_mode_persistence()
+{
+	RET=0
+
+	bridge_create br-test
+
+	# Set user mode and enable STP.
+	ip -n "$NS1" link set dev br-test type bridge stp_mode user
+	ip -n "$NS1" link set dev br-test type bridge stp_state 1
+	check_err $? "Failed to enable STP with user mode"
+
+	# Disable STP.
+	ip -n "$NS1" link set dev br-test type bridge stp_state 0
+	check_err $? "Failed to disable STP"
+
+	# Verify mode is still user.
+	check_stp_mode br-test user "stp_mode after STP disable"
+
+	# Re-enable STP -- should use user mode again.
+	ip -n "$NS1" link set dev br-test type bridge stp_state 1
+	check_err $? "Failed to re-enable STP"
+
+	check_stp_state br-test 2 "stp_state after re-enable"
+
+	log_test "stp_mode persists across STP disable/enable cycles"
+}
+
+# Check iproute2 support before setting up resources.
+if ! ip link add type bridge help 2>&1 | grep -q "stp_mode"; then
+	echo "SKIP: iproute2 too old, missing stp_mode support"
+	exit "$ksft_skip"
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/bridge_vlan_dump.sh b/tools/testing/selftests/net/bridge_vlan_dump.sh
new file mode 100755
index 000000000000..ad66731d2a6f
--- /dev/null
+++ b/tools/testing/selftests/net/bridge_vlan_dump.sh
@@ -0,0 +1,204 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bridge VLAN range grouping. VLANs are collapsed into a range entry in
+# the dump if they have the same per-VLAN options. These tests verify that
+# VLANs with different per-VLAN option values are not grouped together.
+
+# shellcheck disable=SC1091,SC2034,SC2154,SC2317
+source lib.sh
+
+ALL_TESTS="
+	vlan_range_neigh_suppress
+	vlan_range_mcast_max_groups
+	vlan_range_mcast_n_groups
+	vlan_range_mcast_enabled
+"
+
+setup_prepare()
+{
+	setup_ns NS
+	defer cleanup_all_ns
+
+	ip -n "$NS" link add name br0 type bridge vlan_filtering 1 \
+		vlan_default_pvid 0 mcast_snooping 1 mcast_vlan_snooping 1
+	ip -n "$NS" link set dev br0 up
+
+	ip -n "$NS" link add name dummy0 type dummy
+	ip -n "$NS" link set dev dummy0 master br0
+	ip -n "$NS" link set dev dummy0 up
+}
+
+vlan_range_neigh_suppress()
+{
+	RET=0
+
+	# Add two new consecutive VLANs for range grouping test
+	bridge -n "$NS" vlan add vid 10 dev dummy0
+	defer bridge -n "$NS" vlan del vid 10 dev dummy0
+
+	bridge -n "$NS" vlan add vid 11 dev dummy0
+	defer bridge -n "$NS" vlan del vid 11 dev dummy0
+
+	# Configure different neigh_suppress values and verify no range grouping
+	bridge -n "$NS" vlan set vid 10 dev dummy0 neigh_suppress on
+	check_err $? "Failed to set neigh_suppress for VLAN 10"
+
+	bridge -n "$NS" vlan set vid 11 dev dummy0 neigh_suppress off
+	check_err $? "Failed to set neigh_suppress for VLAN 11"
+
+	# Verify VLANs are not shown as a range, but individual entries exist
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+	check_fail $? "VLANs with different neigh_suppress incorrectly grouped"
+
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$"
+	check_err $? "VLAN 10 individual entry not found"
+
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$"
+	check_err $? "VLAN 11 individual entry not found"
+
+	# Configure same neigh_suppress value and verify range grouping
+	bridge -n "$NS" vlan set vid 11 dev dummy0 neigh_suppress on
+	check_err $? "Failed to set neigh_suppress for VLAN 11"
+
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+	check_err $? "VLANs with same neigh_suppress not grouped"
+
+	log_test "VLAN range grouping with neigh_suppress"
+}
+
+vlan_range_mcast_max_groups()
+{
+	RET=0
+
+	# Add two new consecutive VLANs for range grouping test
+	bridge -n "$NS" vlan add vid 10 dev dummy0
+	defer bridge -n "$NS" vlan del vid 10 dev dummy0
+
+	bridge -n "$NS" vlan add vid 11 dev dummy0
+	defer bridge -n "$NS" vlan del vid 11 dev dummy0
+
+	# Configure different mcast_max_groups values and verify no range grouping
+	bridge -n "$NS" vlan set vid 10 dev dummy0 mcast_max_groups 100
+	check_err $? "Failed to set mcast_max_groups for VLAN 10"
+
+	bridge -n "$NS" vlan set vid 11 dev dummy0 mcast_max_groups 200
+	check_err $? "Failed to set mcast_max_groups for VLAN 11"
+
+	# Verify VLANs are not shown as a range, but individual entries exist
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+	check_fail $? "VLANs with different mcast_max_groups incorrectly grouped"
+
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$"
+	check_err $? "VLAN 10 individual entry not found"
+
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$"
+	check_err $? "VLAN 11 individual entry not found"
+
+	# Configure same mcast_max_groups value and verify range grouping
+	bridge -n "$NS" vlan set vid 11 dev dummy0 mcast_max_groups 100
+	check_err $? "Failed to set mcast_max_groups for VLAN 11"
+
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+	check_err $? "VLANs with same mcast_max_groups not grouped"
+
+	log_test "VLAN range grouping with mcast_max_groups"
+}
+
+vlan_range_mcast_n_groups()
+{
+	RET=0
+
+	# Add two new consecutive VLANs for range grouping test
+	bridge -n "$NS" vlan add vid 10 dev dummy0
+	defer bridge -n "$NS" vlan del vid 10 dev dummy0
+
+	bridge -n "$NS" vlan add vid 11 dev dummy0
+	defer bridge -n "$NS" vlan del vid 11 dev dummy0
+
+	# Add different numbers of multicast groups to each VLAN
+	bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.1 vid 10
+	check_err $? "Failed to add mdb entry to VLAN 10"
+	defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.1 vid 10
+
+	bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.2 vid 10
+	check_err $? "Failed to add second mdb entry to VLAN 10"
+	defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.2 vid 10
+
+	bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.1 vid 11
+	check_err $? "Failed to add mdb entry to VLAN 11"
+	defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.1 vid 11
+
+	# Verify VLANs are not shown as a range due to different mcast_n_groups
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+	check_fail $? "VLANs with different mcast_n_groups incorrectly grouped"
+
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$"
+	check_err $? "VLAN 10 individual entry not found"
+
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$"
+	check_err $? "VLAN 11 individual entry not found"
+
+	# Add another group to VLAN 11 to match VLAN 10's count
+	bridge -n "$NS" mdb add dev br0 port dummy0 grp 239.1.1.2 vid 11
+	check_err $? "Failed to add second mdb entry to VLAN 11"
+	defer bridge -n "$NS" mdb del dev br0 port dummy0 grp 239.1.1.2 vid 11
+
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+	check_err $? "VLANs with same mcast_n_groups not grouped"
+
+	log_test "VLAN range grouping with mcast_n_groups"
+}
+
+vlan_range_mcast_enabled()
+{
+	RET=0
+
+	# Add two new consecutive VLANs for range grouping test
+	bridge -n "$NS" vlan add vid 10 dev br0 self
+	defer bridge -n "$NS" vlan del vid 10 dev br0 self
+
+	bridge -n "$NS" vlan add vid 11 dev br0 self
+	defer bridge -n "$NS" vlan del vid 11 dev br0 self
+
+	bridge -n "$NS" vlan add vid 10 dev dummy0
+	defer bridge -n "$NS" vlan del vid 10 dev dummy0
+
+	bridge -n "$NS" vlan add vid 11 dev dummy0
+	defer bridge -n "$NS" vlan del vid 11 dev dummy0
+
+	# Configure different mcast_snooping for bridge VLANs
+	# Port VLANs inherit BR_VLFLAG_MCAST_ENABLED from bridge VLANs
+	bridge -n "$NS" vlan global set dev br0 vid 10 mcast_snooping 1
+	bridge -n "$NS" vlan global set dev br0 vid 11 mcast_snooping 0
+
+	# Verify port VLANs are not grouped due to different mcast_enabled
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+	check_fail $? "VLANs with different mcast_enabled incorrectly grouped"
+
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+10$|^\s+10$"
+	check_err $? "VLAN 10 individual entry not found"
+
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -Eq "^\S+\s+11$|^\s+11$"
+	check_err $? "VLAN 11 individual entry not found"
+
+	# Configure same mcast_snooping and verify range grouping
+	bridge -n "$NS" vlan global set dev br0 vid 11 mcast_snooping 1
+
+	bridge -n "$NS" -d vlan show dev dummy0 | grep -q "10-11"
+	check_err $? "VLANs with same mcast_enabled not grouped"
+
+	log_test "VLAN range grouping with mcast_enabled"
+}
+
+# Verify the newest tested option is supported
+if ! bridge vlan help 2>&1 | grep -q "neigh_suppress"; then
+	echo "SKIP: iproute2 too old, missing per-VLAN neighbor suppression support"
+	exit "$ksft_skip"
+fi
+
+trap defer_scopes_cleanup EXIT
+setup_prepare
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/broadcast_ether_dst.sh b/tools/testing/selftests/net/broadcast_ether_dst.sh
new file mode 100755
index 000000000000..334a7eca8a80
--- /dev/null
+++ b/tools/testing/selftests/net/broadcast_ether_dst.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Author: Brett A C Sheffield <bacs@librecast.net>
+# Author: Oscar Maes <oscmaes92@gmail.com>
+#
+# Ensure destination ethernet field is correctly set for
+# broadcast packets
+
+source lib.sh
+
+CLIENT_IP4="192.168.0.1"
+GW_IP4="192.168.0.2"
+
+setup() {
+	setup_ns CLIENT_NS SERVER_NS
+
+	ip -net "${SERVER_NS}" link add link1 type veth \
+		peer name link0 netns "${CLIENT_NS}"
+
+	ip -net "${CLIENT_NS}" link set link0 up
+	ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}"/24 dev link0
+
+	ip -net "${SERVER_NS}" link set link1 up
+
+	ip -net "${CLIENT_NS}" route add default via "${GW_IP4}"
+	ip netns exec "${CLIENT_NS}" arp -s "${GW_IP4}" 00:11:22:33:44:55
+}
+
+cleanup() {
+	rm -f "${CAPFILE}" "${OUTPUT}"
+	ip -net "${SERVER_NS}" link del link1
+	cleanup_ns "${CLIENT_NS}" "${SERVER_NS}"
+}
+
+test_broadcast_ether_dst() {
+	local rc=0
+	CAPFILE=$(mktemp -u cap.XXXXXXXXXX)
+	OUTPUT=$(mktemp -u out.XXXXXXXXXX)
+
+	echo "Testing ethernet broadcast destination"
+
+	# start tcpdump listening for icmp
+	# tcpdump will exit after receiving a single packet
+	# timeout will kill tcpdump if it is still running after 2s
+	timeout 2s ip netns exec "${CLIENT_NS}" \
+		tcpdump -i link0 -c 1 -w "${CAPFILE}" icmp &> "${OUTPUT}" &
+	pid=$!
+	slowwait 1 grep -qs "listening" "${OUTPUT}"
+
+	# send broadcast ping
+	ip netns exec "${CLIENT_NS}" \
+		ping -W0.01 -c1 -b 255.255.255.255 &> /dev/null
+
+	# wait for tcpdump for exit after receiving packet
+	wait "${pid}"
+
+	# compare ethernet destination field to ff:ff:ff:ff:ff:ff
+	ether_dst=$(tcpdump -r "${CAPFILE}" -tnne 2>/dev/null | \
+			awk '{sub(/,/,"",$3); print $3}')
+	if [[ "${ether_dst}" == "ff:ff:ff:ff:ff:ff" ]]; then
+		echo "[ OK ]"
+		rc="${ksft_pass}"
+	else
+		echo "[FAIL] expected dst ether addr to be ff:ff:ff:ff:ff:ff," \
+			"got ${ether_dst}"
+		rc="${ksft_fail}"
+	fi
+
+	return "${rc}"
+}
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+	echo "SKIP: Could not run test without tcpdump tool"
+	exit "${ksft_skip}"
+fi
+
+trap cleanup EXIT
+
+setup
+test_broadcast_ether_dst
+
+exit $?
diff --git a/tools/testing/selftests/net/broadcast_pmtu.sh b/tools/testing/selftests/net/broadcast_pmtu.sh
new file mode 100755
index 000000000000..726eb5d25839
--- /dev/null
+++ b/tools/testing/selftests/net/broadcast_pmtu.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Ensures broadcast route MTU is respected
+
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+CLIENT_IP4="192.168.0.1/24"
+CLIENT_BROADCAST_ADDRESS="192.168.0.255"
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+SERVER_IP4="192.168.0.2/24"
+
+setup() {
+	ip netns add "${CLIENT_NS}"
+	ip netns add "${SERVER_NS}"
+
+	ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}"
+
+	ip -net "${CLIENT_NS}" link set link0 up
+	ip -net "${CLIENT_NS}" link set link0 mtu 9000
+	ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}" dev link0
+
+	ip -net "${SERVER_NS}" link set link1 up
+	ip -net "${SERVER_NS}" link set link1 mtu 1500
+	ip -net "${SERVER_NS}" addr add "${SERVER_IP4}" dev link1
+
+	read -r -a CLIENT_BROADCAST_ENTRY <<< "$(ip -net "${CLIENT_NS}" route show table local type broadcast)"
+	ip -net "${CLIENT_NS}" route del "${CLIENT_BROADCAST_ENTRY[@]}"
+	ip -net "${CLIENT_NS}" route add "${CLIENT_BROADCAST_ENTRY[@]}" mtu 1500
+
+	ip net exec "${SERVER_NS}" sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0
+}
+
+cleanup() {
+	ip -net "${SERVER_NS}" link del link1
+	ip netns del "${CLIENT_NS}"
+	ip netns del "${SERVER_NS}"
+}
+
+trap cleanup EXIT
+
+setup &&
+	echo "Testing for broadcast route MTU" &&
+	ip net exec "${CLIENT_NS}" ping -f -M want -q -c 1 -s 8000 -w 1 -b "${CLIENT_BROADCAST_ADDRESS}" > /dev/null 2>&1
+
+exit $?
+
diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh
index 7db292ec4884..5ec1c85c1623 100755
--- a/tools/testing/selftests/net/busy_poll_test.sh
+++ b/tools/testing/selftests/net/busy_poll_test.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
-source net_helper.sh
+source lib.sh
 
 NSIM_SV_ID=$((256 + RANDOM % 256))
 NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID
@@ -27,6 +27,8 @@ NAPI_DEFER_HARD_IRQS=100
 GRO_FLUSH_TIMEOUT=50000
 SUSPEND_TIMEOUT=20000000
 
+NAPI_THREADED_MODE_BUSY_POLL=2
+
 setup_ns()
 {
 	set -e
@@ -62,6 +64,9 @@ cleanup_ns()
 test_busypoll()
 {
 	suspend_value=${1:-0}
+	napi_threaded_value=${2:-0}
+	prefer_busy_poll_value=${3:-$PREFER_BUSY_POLL}
+
 	tmp_file=$(mktemp)
 	out_file=$(mktemp)
 
@@ -73,10 +78,11 @@ test_busypoll()
 					     -b${SERVER_IP}        \
 					     -m${MAX_EVENTS}       \
 					     -u${BUSY_POLL_USECS}  \
-					     -P${PREFER_BUSY_POLL} \
+					     -P${prefer_busy_poll_value} \
 					     -g${BUSY_POLL_BUDGET} \
 					     -i${NSIM_SV_IFIDX}    \
 					     -s${suspend_value}    \
+					     -t${napi_threaded_value} \
 					     -o${out_file}&
 
 	wait_local_port_listen nssv ${SERVER_PORT} tcp
@@ -109,6 +115,15 @@ test_busypoll_with_suspend()
 	return $?
 }
 
+test_busypoll_with_napi_threaded()
+{
+	# Only enable napi threaded poll. Set suspend timeout and prefer busy
+	# poll to 0.
+	test_busypoll 0 ${NAPI_THREADED_MODE_BUSY_POLL} 0
+
+	return $?
+}
+
 ###
 ### Code start
 ###
@@ -154,6 +169,13 @@ if [ $? -ne 0 ]; then
 	exit 1
 fi
 
+test_busypoll_with_napi_threaded
+if [ $? -ne 0 ]; then
+	echo "test_busypoll_with_napi_threaded failed"
+	cleanup_ns
+	exit 1
+fi
+
 echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
 
 echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c
index 04c7ff577bb8..3a81f9c94795 100644
--- a/tools/testing/selftests/net/busy_poller.c
+++ b/tools/testing/selftests/net/busy_poller.c
@@ -65,15 +65,16 @@ static uint32_t cfg_busy_poll_usecs;
 static uint16_t cfg_busy_poll_budget;
 static uint8_t cfg_prefer_busy_poll;
 
-/* IRQ params */
+/* NAPI params */
 static uint32_t cfg_defer_hard_irqs;
 static uint64_t cfg_gro_flush_timeout;
 static uint64_t cfg_irq_suspend_timeout;
+static enum netdev_napi_threaded cfg_napi_threaded_poll = NETDEV_NAPI_THREADED_DISABLED;
 
 static void usage(const char *filepath)
 {
 	error(1, 0,
-	      "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>",
+	      "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -t<napi_threaded_poll> -i<ifindex>",
 	      filepath);
 }
 
@@ -86,7 +87,7 @@ static void parse_opts(int argc, char **argv)
 	if (argc <= 1)
 		usage(argv[0]);
 
-	while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) {
+	while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:t:")) != -1) {
 		/* most options take integer values, except o and b, so reduce
 		 * code duplication a bit for the common case by calling
 		 * strtoull here and leave bounds checking and casting per
@@ -168,6 +169,12 @@ static void parse_opts(int argc, char **argv)
 
 			cfg_ifindex = (int)tmp;
 			break;
+		case 't':
+			if (tmp > 2)
+				error(1, ERANGE, "napi threaded poll value must be 0-2");
+
+			cfg_napi_threaded_poll = (enum netdev_napi_threaded)tmp;
+			break;
 		}
 	}
 
@@ -247,6 +254,9 @@ static void setup_queue(void)
 	netdev_napi_set_req_set_irq_suspend_timeout(set_req,
 						    cfg_irq_suspend_timeout);
 
+	if (cfg_napi_threaded_poll)
+		netdev_napi_set_req_set_threaded(set_req, cfg_napi_threaded_poll);
+
 	if (netdev_napi_set(ys, set_req))
 		error(1, 0, "can't set NAPI params: %s\n", yerr.msg);
 
diff --git a/tools/testing/selftests/net/can/.gitignore b/tools/testing/selftests/net/can/.gitignore
new file mode 100644
index 000000000000..764a53fc837f
--- /dev/null
+++ b/tools/testing/selftests/net/can/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+test_raw_filter
diff --git a/tools/testing/selftests/net/can/Makefile b/tools/testing/selftests/net/can/Makefile
new file mode 100644
index 000000000000..5b82e60a03e7
--- /dev/null
+++ b/tools/testing/selftests/net/can/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+
+TEST_PROGS := test_raw_filter.sh
+
+TEST_GEN_FILES := test_raw_filter
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/can/config b/tools/testing/selftests/net/can/config
new file mode 100644
index 000000000000..188f79796670
--- /dev/null
+++ b/tools/testing/selftests/net/can/config
@@ -0,0 +1,3 @@
+CONFIG_CAN=m
+CONFIG_CAN_DEV=m
+CONFIG_CAN_VCAN=m
diff --git a/tools/testing/selftests/net/can/test_raw_filter.c b/tools/testing/selftests/net/can/test_raw_filter.c
new file mode 100644
index 000000000000..bb8ae8854273
--- /dev/null
+++ b/tools/testing/selftests/net/can/test_raw_filter.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+/*
+ * Copyright (c) 2011 Volkswagen Group Electronic Research
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <net/if.h>
+#include <linux/if.h>
+
+#include <linux/can.h>
+#include <linux/can/raw.h>
+
+#include "kselftest_harness.h"
+
+#define ID 0x123
+
+char CANIF[IFNAMSIZ];
+
+static int send_can_frames(int sock, int testcase)
+{
+	struct can_frame frame;
+
+	frame.can_dlc = 1;
+	frame.data[0] = testcase;
+
+	frame.can_id = ID;
+	if (write(sock, &frame, sizeof(frame)) < 0)
+		goto write_err;
+
+	frame.can_id = (ID | CAN_RTR_FLAG);
+	if (write(sock, &frame, sizeof(frame)) < 0)
+		goto write_err;
+
+	frame.can_id = (ID | CAN_EFF_FLAG);
+	if (write(sock, &frame, sizeof(frame)) < 0)
+		goto write_err;
+
+	frame.can_id = (ID | CAN_EFF_FLAG | CAN_RTR_FLAG);
+	if (write(sock, &frame, sizeof(frame)) < 0)
+		goto write_err;
+
+	return 0;
+
+write_err:
+	perror("write");
+	return 1;
+}
+
+FIXTURE(can_filters) {
+	int sock;
+};
+
+FIXTURE_SETUP(can_filters)
+{
+	struct sockaddr_can addr;
+	struct ifreq ifr;
+	int recv_own_msgs = 1;
+	int s, ret;
+
+	s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+	ASSERT_GE(s, 0)
+		TH_LOG("failed to create CAN_RAW socket: %d", errno);
+
+	strncpy(ifr.ifr_name, CANIF, sizeof(ifr.ifr_name));
+	ret = ioctl(s, SIOCGIFINDEX, &ifr);
+	ASSERT_GE(ret, 0)
+		TH_LOG("failed SIOCGIFINDEX: %d", errno);
+
+	addr.can_family = AF_CAN;
+	addr.can_ifindex = ifr.ifr_ifindex;
+
+	setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
+		   &recv_own_msgs, sizeof(recv_own_msgs));
+
+	ret = bind(s, (struct sockaddr *)&addr, sizeof(addr));
+	ASSERT_EQ(ret, 0)
+		TH_LOG("failed bind socket: %d", errno);
+
+	self->sock = s;
+}
+
+FIXTURE_TEARDOWN(can_filters)
+{
+	close(self->sock);
+}
+
+FIXTURE_VARIANT(can_filters) {
+	int testcase;
+	canid_t id;
+	canid_t mask;
+	int exp_num_rx;
+	canid_t exp_flags[];
+};
+
+/* Receive all frames when filtering for the ID in standard frame format */
+FIXTURE_VARIANT_ADD(can_filters, base) {
+	.testcase = 1,
+	.id = ID,
+	.mask = CAN_SFF_MASK,
+	.exp_num_rx = 4,
+	.exp_flags = {
+		0,
+		CAN_RTR_FLAG,
+		CAN_EFF_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Ignore EFF flag in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_eff) {
+	.testcase = 2,
+	.id = ID | CAN_EFF_FLAG,
+	.mask = CAN_SFF_MASK,
+	.exp_num_rx = 4,
+	.exp_flags = {
+		0,
+		CAN_RTR_FLAG,
+		CAN_EFF_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Ignore RTR flag in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_rtr) {
+	.testcase = 3,
+	.id = ID | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK,
+	.exp_num_rx = 4,
+	.exp_flags = {
+		0,
+		CAN_RTR_FLAG,
+		CAN_EFF_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Ignore EFF and RTR flags in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_effrtr) {
+	.testcase = 4,
+	.id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK,
+	.exp_num_rx = 4,
+	.exp_flags = {
+		0,
+		CAN_RTR_FLAG,
+		CAN_EFF_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only SFF frames when expecting no EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff) {
+	.testcase = 5,
+	.id = ID,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		0,
+		CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only EFF frames when filter id and filter mask include EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_eff) {
+	.testcase = 6,
+	.id = ID | CAN_EFF_FLAG,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		CAN_EFF_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only SFF frames when expecting no EFF flag, ignoring RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_rtr) {
+	.testcase = 7,
+	.id = ID | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		0,
+		CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only EFF frames when filter id and filter mask include EFF flag,
+ * ignoring RTR flag
+ */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_effrtr) {
+	.testcase = 8,
+	.id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		CAN_EFF_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Receive no remote frames when filtering for no RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr) {
+	.testcase = 9,
+	.id = ID,
+	.mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		0,
+		CAN_EFF_FLAG,
+	},
+};
+
+/* Receive no remote frames when filtering for no RTR flag, ignoring EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_eff) {
+	.testcase = 10,
+	.id = ID | CAN_EFF_FLAG,
+	.mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		0,
+		CAN_EFF_FLAG,
+	},
+};
+
+/* Receive only remote frames when filter includes RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_rtr) {
+	.testcase = 11,
+	.id = ID | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		CAN_RTR_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only remote frames when filter includes RTR flag, ignoring EFF
+ * flag
+ */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_effrtr) {
+	.testcase = 12,
+	.id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+	.exp_num_rx = 2,
+	.exp_flags = {
+		CAN_RTR_FLAG,
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only SFF data frame when filtering for no flags */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr) {
+	.testcase = 13,
+	.id = ID,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.exp_num_rx = 1,
+	.exp_flags = {
+		0,
+	},
+};
+
+/* Receive only EFF data frame when filtering for EFF but no RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_eff) {
+	.testcase = 14,
+	.id = ID | CAN_EFF_FLAG,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.exp_num_rx = 1,
+	.exp_flags = {
+		CAN_EFF_FLAG,
+	},
+};
+
+/* Receive only SFF remote frame when filtering for RTR but no EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_rtr) {
+	.testcase = 15,
+	.id = ID | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.exp_num_rx = 1,
+	.exp_flags = {
+		CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only EFF remote frame when filtering for EFF and RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_effrtr) {
+	.testcase = 16,
+	.id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.exp_num_rx = 1,
+	.exp_flags = {
+		CAN_EFF_FLAG | CAN_RTR_FLAG,
+	},
+};
+
+/* Receive only SFF data frame when filtering for no EFF flag and no RTR flag
+ * but based on EFF mask
+ */
+FIXTURE_VARIANT_ADD(can_filters, eff) {
+	.testcase = 17,
+	.id = ID,
+	.mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.exp_num_rx = 1,
+	.exp_flags = {
+		0,
+	},
+};
+
+/* Receive only EFF data frame when filtering for EFF flag and no RTR flag but
+ * based on EFF mask
+ */
+FIXTURE_VARIANT_ADD(can_filters, eff_eff) {
+	.testcase = 18,
+	.id = ID | CAN_EFF_FLAG,
+	.mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+	.exp_num_rx = 1,
+	.exp_flags = {
+		CAN_EFF_FLAG,
+	},
+};
+
+/* This test verifies that the raw CAN filters work, by checking if only frames
+ * with the expected set of flags are received. For each test case, the given
+ * filter (id and mask) is added and four CAN frames are sent with every
+ * combination of set/unset EFF/RTR flags.
+ */
+TEST_F(can_filters, test_filter)
+{
+	struct can_filter rfilter;
+	int ret;
+
+	rfilter.can_id = variant->id;
+	rfilter.can_mask = variant->mask;
+	setsockopt(self->sock, SOL_CAN_RAW, CAN_RAW_FILTER,
+		   &rfilter, sizeof(rfilter));
+
+	TH_LOG("filters: can_id = 0x%08X can_mask = 0x%08X",
+		rfilter.can_id, rfilter.can_mask);
+
+	ret = send_can_frames(self->sock, variant->testcase);
+	ASSERT_EQ(ret, 0)
+		TH_LOG("failed to send CAN frames");
+
+	for (int i = 0; i <= variant->exp_num_rx; i++) {
+		struct can_frame frame;
+		struct timeval tv = {
+			.tv_sec = 0,
+			.tv_usec = 50000, /* 50ms timeout */
+		};
+		fd_set rdfs;
+
+		FD_ZERO(&rdfs);
+		FD_SET(self->sock, &rdfs);
+
+		ret = select(self->sock + 1, &rdfs, NULL, NULL, &tv);
+		ASSERT_GE(ret, 0)
+			TH_LOG("failed select for frame %d, err: %d)", i, errno);
+
+		ret = FD_ISSET(self->sock, &rdfs);
+		if (i == variant->exp_num_rx) {
+			ASSERT_EQ(ret, 0)
+				TH_LOG("too many frames received");
+		} else {
+			ASSERT_NE(ret, 0)
+				TH_LOG("too few frames received");
+
+			ret = read(self->sock, &frame, sizeof(frame));
+			ASSERT_GE(ret, 0)
+				TH_LOG("failed to read frame %d, err: %d", i, errno);
+
+			TH_LOG("rx: can_id = 0x%08X rx = %d", frame.can_id, i);
+
+			ASSERT_EQ(ID, frame.can_id & CAN_SFF_MASK)
+				TH_LOG("received wrong can_id");
+			ASSERT_EQ(variant->testcase, frame.data[0])
+				TH_LOG("received wrong test case");
+
+			ASSERT_EQ(frame.can_id & ~CAN_ERR_MASK,
+				  variant->exp_flags[i])
+				TH_LOG("received unexpected flags");
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	char *ifname = getenv("CANIF");
+
+	if (!ifname) {
+		printf("CANIF environment variable must contain the test interface\n");
+		return KSFT_FAIL;
+	}
+
+	strncpy(CANIF, ifname, sizeof(CANIF) - 1);
+
+	return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/net/can/test_raw_filter.sh b/tools/testing/selftests/net/can/test_raw_filter.sh
new file mode 100755
index 000000000000..276d6c06ac95
--- /dev/null
+++ b/tools/testing/selftests/net/can/test_raw_filter.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	test_raw_filter
+"
+
+net_dir=$(dirname $0)/..
+source $net_dir/lib.sh
+
+export CANIF=${CANIF:-"vcan0"}
+BITRATE=${BITRATE:-500000}
+
+setup()
+{
+	if [[ $CANIF == vcan* ]]; then
+		ip link add name $CANIF type vcan || exit $ksft_skip
+	else
+		ip link set dev $CANIF type can bitrate $BITRATE || exit $ksft_skip
+	fi
+	ip link set dev $CANIF up
+	pwd
+}
+
+cleanup()
+{
+	ip link set dev $CANIF down
+	if [[ $CANIF == vcan* ]]; then
+		ip link delete $CANIF
+	fi
+}
+
+test_raw_filter()
+{
+	./test_raw_filter
+	check_err $?
+	log_test "test_raw_filter"
+}
+
+trap cleanup EXIT
+setup
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/cmsg_ip.sh b/tools/testing/selftests/net/cmsg_ip.sh
new file mode 100755
index 000000000000..b55680e081ad
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_ip.sh
@@ -0,0 +1,187 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+IP4=172.16.0.1/24
+TGT4=172.16.0.2
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+TMPF=$(mktemp --suffix ".pcap")
+
+cleanup()
+{
+    rm -f $TMPF
+    cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+tcpdump -h | grep immediate-mode >> /dev/null
+if [ $? -ne 0 ]; then
+    echo "SKIP - tcpdump with --immediate-mode option required"
+    exit $ksft_skip
+fi
+
+# Namespaces
+setup_ns NS
+NSEXE="ip netns exec $NS"
+
+$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP4 dev dummy0
+ip -netns $NS addr add $IP6 dev dummy0
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+    ((TOTAL++))
+    if [ $1 -ne $2 ]; then
+	echo "  Case $3 returned $1, expected $2"
+	((BAD++))
+    fi
+}
+
+# IPV6_DONTFRAG
+for ovr in setsock cmsg both diff; do
+    for df in 0 1; do
+	for p in u U i r; do
+	    [ $p == "u" ] && prot=UDP
+	    [ $p == "U" ] && prot=UDP
+	    [ $p == "i" ] && prot=ICMP
+	    [ $p == "r" ] && prot=RAW
+
+	    [ $ovr == "setsock" ] && m="-F $df"
+	    [ $ovr == "cmsg" ]    && m="-f $df"
+	    [ $ovr == "both" ]    && m="-F $df -f $df"
+	    [ $ovr == "diff" ]    && m="-F $((1 - df)) -f $df"
+
+	    $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234
+	    check_result $? $df "DONTFRAG $prot $ovr"
+	done
+    done
+done
+
+# IP_TOS + IPV6_TCLASS
+
+test_dscp() {
+    local -r IPVER=$1
+    local -r TGT=$2
+    local -r MATCH=$3
+
+    local -r TOS=0x10
+    local -r TOS2=0x20
+    local -r ECN=0x3
+
+    ip $IPVER -netns $NS rule add tos $TOS lookup 300
+    ip $IPVER -netns $NS route add table 300 prohibit any
+
+    for ovr in setsock cmsg both diff; do
+	for p in u U i r; do
+	    [ $p == "u" ] && prot=UDP
+	    [ $p == "U" ] && prot=UDP
+	    [ $p == "i" ] && prot=ICMP
+	    [ $p == "r" ] && prot=RAW
+
+	    [ $ovr == "setsock" ] && m="-C"
+	    [ $ovr == "cmsg" ]    && m="-c"
+	    [ $ovr == "both" ]    && m="-C $((TOS2)) -c"
+	    [ $ovr == "diff" ]    && m="-C $((TOS )) -c"
+
+	    $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+	    BG=$!
+	    sleep 0.05
+
+	    $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234
+	    check_result $? 0 "$MATCH $prot $ovr - pass"
+
+	    while [ -d /proc/$BG ]; do
+	        $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234
+	    done
+
+	    tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $TOS2" >> /dev/null
+	    check_result $? 0 "$MATCH $prot $ovr - packet data"
+	    rm $TMPF
+
+	    [ $ovr == "both" ]    && m="-C $((TOS )) -c"
+	    [ $ovr == "diff" ]    && m="-C $((TOS2)) -c"
+
+	    # Match prohibit rule: expect failure
+	    $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS)) -s $TGT 1234
+	    check_result $? 1 "$MATCH $prot $ovr - rejection"
+
+	    # Match prohibit rule: IPv4 masks ECN: expect failure
+	    if [[ "$IPVER" == "-4" ]]; then
+		$NSEXE ./cmsg_sender $IPVER -p $p $m "$((TOS | ECN))" -s $TGT 1234
+		check_result $? 1 "$MATCH $prot $ovr - rejection (ECN)"
+	    fi
+	done
+    done
+}
+
+test_dscp -4 $TGT4 tos
+test_dscp -6 $TGT6 class
+
+# IP_TTL + IPV6_HOPLIMIT
+test_ttl_hoplimit() {
+    local -r IPVER=$1
+    local -r TGT=$2
+    local -r MATCH=$3
+
+    local -r LIM=4
+
+    for ovr in setsock cmsg both diff; do
+	for p in u U i r; do
+	    [ $p == "u" ] && prot=UDP
+	    [ $p == "U" ] && prot=UDP
+	    [ $p == "i" ] && prot=ICMP
+	    [ $p == "r" ] && prot=RAW
+
+	    [ $ovr == "setsock" ] && m="-L"
+	    [ $ovr == "cmsg" ]    && m="-l"
+	    [ $ovr == "both" ]    && m="-L $LIM -l"
+	    [ $ovr == "diff" ]    && m="-L $((LIM + 1)) -l"
+
+	    $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+	    BG=$!
+	    sleep 0.05
+
+	    $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234
+	    check_result $? 0 "$MATCH $prot $ovr - pass"
+
+	    while [ -d /proc/$BG ]; do
+		$NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234
+	    done
+
+	    tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $LIM[^0-9]" >> /dev/null
+	    check_result $? 0 "$MATCH $prot $ovr - packet data"
+	    rm $TMPF
+	done
+    done
+}
+
+test_ttl_hoplimit -4 $TGT4 ttl
+test_ttl_hoplimit -6 $TGT6 hlim
+
+# IPV6 exthdr
+for p in u U i r; do
+    # Very basic "does it crash" test
+    for h in h d r; do
+	$NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234
+	check_result $? 0 "ExtHdr $prot $ovr - pass"
+    done
+done
+
+# Summary
+if [ $BAD -ne 0 ]; then
+    echo "FAIL - $BAD/$TOTAL cases failed"
+    exit 1
+else
+    echo "OK"
+    exit 0
+fi
diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh
deleted file mode 100755
index 8bc23fb4c82b..000000000000
--- a/tools/testing/selftests/net/cmsg_ipv6.sh
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source lib.sh
-
-IP6=2001:db8:1::1/64
-TGT6=2001:db8:1::2
-TMPF=$(mktemp --suffix ".pcap")
-
-cleanup()
-{
-    rm -f $TMPF
-    cleanup_ns $NS
-}
-
-trap cleanup EXIT
-
-tcpdump -h | grep immediate-mode >> /dev/null
-if [ $? -ne 0 ]; then
-    echo "SKIP - tcpdump with --immediate-mode option required"
-    exit $ksft_skip
-fi
-
-# Namespaces
-setup_ns NS
-NSEXE="ip netns exec $NS"
-
-$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
-
-# Connectivity
-ip -netns $NS link add type dummy
-ip -netns $NS link set dev dummy0 up
-ip -netns $NS addr add $IP6 dev dummy0
-
-# Test
-BAD=0
-TOTAL=0
-
-check_result() {
-    ((TOTAL++))
-    if [ $1 -ne $2 ]; then
-	echo "  Case $3 returned $1, expected $2"
-	((BAD++))
-    fi
-}
-
-# IPV6_DONTFRAG
-for ovr in setsock cmsg both diff; do
-    for df in 0 1; do
-	for p in u i r; do
-	    [ $p == "u" ] && prot=UDP
-	    [ $p == "i" ] && prot=ICMP
-	    [ $p == "r" ] && prot=RAW
-
-	    [ $ovr == "setsock" ] && m="-F $df"
-	    [ $ovr == "cmsg" ]    && m="-f $df"
-	    [ $ovr == "both" ]    && m="-F $df -f $df"
-	    [ $ovr == "diff" ]    && m="-F $((1 - df)) -f $df"
-
-	    $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234
-	    check_result $? $df "DONTFRAG $prot $ovr"
-	done
-    done
-done
-
-# IPV6_TCLASS
-TOS=0x10
-TOS2=0x20
-
-ip -6 -netns $NS rule add tos $TOS lookup 300
-ip -6 -netns $NS route add table 300 prohibit any
-
-for ovr in setsock cmsg both diff; do
-    for p in u i r; do
-	[ $p == "u" ] && prot=UDP
-	[ $p == "i" ] && prot=ICMP
-	[ $p == "r" ] && prot=RAW
-
-	[ $ovr == "setsock" ] && m="-C"
-	[ $ovr == "cmsg" ]    && m="-c"
-	[ $ovr == "both" ]    && m="-C $((TOS2)) -c"
-	[ $ovr == "diff" ]    && m="-C $((TOS )) -c"
-
-	$NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
-	BG=$!
-	sleep 0.05
-
-	$NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
-	check_result $? 0 "TCLASS $prot $ovr - pass"
-
-	while [ -d /proc/$BG ]; do
-	    $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
-	done
-
-	tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null
-	check_result $? 0 "TCLASS $prot $ovr - packet data"
-	rm $TMPF
-
-	[ $ovr == "both" ]    && m="-C $((TOS )) -c"
-	[ $ovr == "diff" ]    && m="-C $((TOS2)) -c"
-
-	$NSEXE ./cmsg_sender -6 -p $p $m $((TOS)) -s $TGT6 1234
-	check_result $? 1 "TCLASS $prot $ovr - rejection"
-    done
-done
-
-# IPV6_HOPLIMIT
-LIM=4
-
-for ovr in setsock cmsg both diff; do
-    for p in u i r; do
-	[ $p == "u" ] && prot=UDP
-	[ $p == "i" ] && prot=ICMP
-	[ $p == "r" ] && prot=RAW
-
-	[ $ovr == "setsock" ] && m="-L"
-	[ $ovr == "cmsg" ]    && m="-l"
-	[ $ovr == "both" ]    && m="-L $LIM -l"
-	[ $ovr == "diff" ]    && m="-L $((LIM + 1)) -l"
-
-	$NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
-	BG=$!
-	sleep 0.05
-
-	$NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
-	check_result $? 0 "HOPLIMIT $prot $ovr - pass"
-
-	while [ -d /proc/$BG ]; do
-	    $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
-	done
-
-	tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null
-	check_result $? 0 "HOPLIMIT $prot $ovr - packet data"
-	rm $TMPF
-    done
-done
-
-# IPV6 exthdr
-for p in u i r; do
-    # Very basic "does it crash" test
-    for h in h d r; do
-	$NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234
-	check_result $? 0 "ExtHdr $prot $ovr - pass"
-    done
-done
-
-# Summary
-if [ $BAD -ne 0 ]; then
-    echo "FAIL - $BAD/$TOTAL cases failed"
-    exit 1
-else
-    echo "OK"
-    exit 0
-fi
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index bc314382e4e1..67a72b1a2f3d 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -16,7 +16,7 @@
 #include <linux/udp.h>
 #include <sys/socket.h>
 
-#include "../kselftest.h"
+#include "kselftest.h"
 
 enum {
 	ERN_SUCCESS = 0,
@@ -33,6 +33,7 @@ enum {
 	ERN_RECVERR,
 	ERN_CMSG_RD,
 	ERN_CMSG_RCV,
+	ERN_SEND_MORE,
 };
 
 struct option_cmsg_u32 {
@@ -46,6 +47,7 @@ struct options {
 	const char *service;
 	unsigned int size;
 	unsigned int num_pkt;
+	bool msg_more;
 	struct {
 		unsigned int mark;
 		unsigned int dontfrag;
@@ -72,7 +74,7 @@ struct options {
 		struct option_cmsg_u32 tclass;
 		struct option_cmsg_u32 hlimit;
 		struct option_cmsg_u32 exthdr;
-	} v6;
+	} cmsg;
 } opt = {
 	.size = 13,
 	.num_pkt = 1,
@@ -94,7 +96,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
 	       "\t\t-S      send() size\n"
 	       "\t\t-4/-6   Force IPv4 / IPv6 only\n"
 	       "\t\t-p prot Socket protocol\n"
-	       "\t\t        (u = UDP (default); i = ICMP; r = RAW)\n"
+	       "\t\t        (u = UDP (default); i = ICMP; r = RAW;\n"
+	       "\t\t         U = UDP with MSG_MORE)\n"
 	       "\n"
 	       "\t\t-m val  Set SO_MARK with given value\n"
 	       "\t\t-M val  Set SO_MARK via setsockopt\n"
@@ -104,13 +107,13 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
 	       "\t\t-t      Enable time stamp reporting\n"
 	       "\t\t-f val  Set don't fragment via cmsg\n"
 	       "\t\t-F val  Set don't fragment via setsockopt\n"
-	       "\t\t-c val  Set TCLASS via cmsg\n"
-	       "\t\t-C val  Set TCLASS via setsockopt\n"
-	       "\t\t-l val  Set HOPLIMIT via cmsg\n"
-	       "\t\t-L val  Set HOPLIMIT via setsockopt\n"
+	       "\t\t-c val  Set TOS/TCLASS via cmsg\n"
+	       "\t\t-C val  Set TOS/TCLASS via setsockopt\n"
+	       "\t\t-l val  Set TTL/HOPLIMIT via cmsg\n"
+	       "\t\t-L val  Set TTL/HOPLIMIT via setsockopt\n"
 	       "\t\t-H type Add an IPv6 header option\n"
-	       "\t\t        (h = HOP; d = DST; r = RTDST)"
-	       "");
+	       "\t\t        (h = HOP; d = DST; r = RTDST)\n"
+	       "\n");
 	exit(ERN_HELP);
 }
 
@@ -133,8 +136,11 @@ static void cs_parse_args(int argc, char *argv[])
 			opt.sock.family = AF_INET6;
 			break;
 		case 'p':
-			if (*optarg == 'u' || *optarg == 'U') {
+			if (*optarg == 'u') {
 				opt.sock.proto = IPPROTO_UDP;
+			} else if (*optarg == 'U') {
+				opt.sock.proto = IPPROTO_UDP;
+				opt.msg_more = true;
 			} else if (*optarg == 'i' || *optarg == 'I') {
 				opt.sock.proto = IPPROTO_ICMP;
 			} else if (*optarg == 'r') {
@@ -169,37 +175,37 @@ static void cs_parse_args(int argc, char *argv[])
 			opt.ts.ena = true;
 			break;
 		case 'f':
-			opt.v6.dontfrag.ena = true;
-			opt.v6.dontfrag.val = atoi(optarg);
+			opt.cmsg.dontfrag.ena = true;
+			opt.cmsg.dontfrag.val = atoi(optarg);
 			break;
 		case 'F':
 			opt.sockopt.dontfrag = atoi(optarg);
 			break;
 		case 'c':
-			opt.v6.tclass.ena = true;
-			opt.v6.tclass.val = atoi(optarg);
+			opt.cmsg.tclass.ena = true;
+			opt.cmsg.tclass.val = atoi(optarg);
 			break;
 		case 'C':
 			opt.sockopt.tclass = atoi(optarg);
 			break;
 		case 'l':
-			opt.v6.hlimit.ena = true;
-			opt.v6.hlimit.val = atoi(optarg);
+			opt.cmsg.hlimit.ena = true;
+			opt.cmsg.hlimit.val = atoi(optarg);
 			break;
 		case 'L':
 			opt.sockopt.hlimit = atoi(optarg);
 			break;
 		case 'H':
-			opt.v6.exthdr.ena = true;
+			opt.cmsg.exthdr.ena = true;
 			switch (optarg[0]) {
 			case 'h':
-				opt.v6.exthdr.val = IPV6_HOPOPTS;
+				opt.cmsg.exthdr.val = IPV6_HOPOPTS;
 				break;
 			case 'd':
-				opt.v6.exthdr.val = IPV6_DSTOPTS;
+				opt.cmsg.exthdr.val = IPV6_DSTOPTS;
 				break;
 			case 'r':
-				opt.v6.exthdr.val = IPV6_RTHDRDSTOPTS;
+				opt.cmsg.exthdr.val = IPV6_RTHDRDSTOPTS;
 				break;
 			default:
 				printf("Error: hdr type: %s\n", optarg);
@@ -261,12 +267,20 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 			  SOL_SOCKET, SO_MARK, &opt.mark);
 	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
 			  SOL_SOCKET, SO_PRIORITY, &opt.priority);
-	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
-			  SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag);
-	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
-			  SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass);
-	ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
-			  SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit);
+
+	if (opt.sock.family == AF_INET) {
+		ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+				  SOL_IP, IP_TOS, &opt.cmsg.tclass);
+		ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+				  SOL_IP, IP_TTL, &opt.cmsg.hlimit);
+	} else {
+		ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+				  SOL_IPV6, IPV6_DONTFRAG, &opt.cmsg.dontfrag);
+		ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+				  SOL_IPV6, IPV6_TCLASS, &opt.cmsg.tclass);
+		ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+				  SOL_IPV6, IPV6_HOPLIMIT, &opt.cmsg.hlimit);
+	}
 
 	if (opt.txtime.ena) {
 		__u64 txtime;
@@ -297,14 +311,14 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
 		*(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED |
 					    SOF_TIMESTAMPING_TX_SOFTWARE;
 	}
-	if (opt.v6.exthdr.ena) {
+	if (opt.cmsg.exthdr.ena) {
 		cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
 		cmsg_len += CMSG_SPACE(8);
 		if (cbuf_sz < cmsg_len)
 			error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
 
 		cmsg->cmsg_level = SOL_IPV6;
-		cmsg->cmsg_type = opt.v6.exthdr.val;
+		cmsg->cmsg_type = opt.cmsg.exthdr.val;
 		cmsg->cmsg_len = CMSG_LEN(8);
 		*(__u64 *)CMSG_DATA(cmsg) = 0;
 	}
@@ -405,23 +419,35 @@ static void ca_set_sockopts(int fd)
 	    setsockopt(fd, SOL_SOCKET, SO_MARK,
 		       &opt.sockopt.mark, sizeof(opt.sockopt.mark)))
 		error(ERN_SOCKOPT, errno, "setsockopt SO_MARK");
-	if (opt.sockopt.dontfrag &&
-	    setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG,
-		       &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag)))
-		error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG");
-	if (opt.sockopt.tclass &&
-	    setsockopt(fd, SOL_IPV6, IPV6_TCLASS,
-		       &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
-		error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS");
-	if (opt.sockopt.hlimit &&
-	    setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
-		       &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
-		error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
 	if (opt.sockopt.priority &&
 	    setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
 		       &opt.sockopt.priority, sizeof(opt.sockopt.priority)))
 		error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
 
+	if (opt.sock.family == AF_INET) {
+		if (opt.sockopt.tclass &&
+		    setsockopt(fd, SOL_IP, IP_TOS,
+			       &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
+			error(ERN_SOCKOPT, errno, "setsockopt IP_TOS");
+		if (opt.sockopt.hlimit &&
+		    setsockopt(fd, SOL_IP, IP_TTL,
+			       &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
+			error(ERN_SOCKOPT, errno, "setsockopt IP_TTL");
+	} else {
+		if (opt.sockopt.dontfrag &&
+		    setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG,
+			       &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag)))
+			error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG");
+		if (opt.sockopt.tclass &&
+		    setsockopt(fd, SOL_IPV6, IPV6_TCLASS,
+			       &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
+			error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS");
+		if (opt.sockopt.hlimit &&
+		    setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
+			       &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
+			error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
+	}
+
 	if (opt.txtime.ena) {
 		struct sock_txtime so_txtime = {
 			.clockid = CLOCK_MONOTONIC,
@@ -465,7 +491,8 @@ int main(int argc, char *argv[])
 	if (err) {
 		fprintf(stderr, "Can't resolve address [%s]:%s\n",
 			opt.host, opt.service);
-		return ERN_SOCK_CREATE;
+		err = ERN_SOCK_CREATE;
+		goto err_free_buff;
 	}
 
 	if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP)
@@ -474,8 +501,8 @@ int main(int argc, char *argv[])
 	fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto);
 	if (fd < 0) {
 		fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
-		freeaddrinfo(ai);
-		return ERN_RESOLVE;
+		err = ERN_RESOLVE;
+		goto err_free_info;
 	}
 
 	if (opt.sock.proto == IPPROTO_ICMP) {
@@ -511,7 +538,7 @@ int main(int argc, char *argv[])
 	cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
 
 	for (i = 0; i < opt.num_pkt; i++) {
-		err = sendmsg(fd, &msg, 0);
+		err = sendmsg(fd, &msg, opt.msg_more ? MSG_MORE : 0);
 		if (err < 0) {
 			if (!opt.silent_send)
 				fprintf(stderr, "send failed: %s\n", strerror(errno));
@@ -522,6 +549,14 @@ int main(int argc, char *argv[])
 			err = ERN_SEND_SHORT;
 			goto err_out;
 		}
+		if (opt.msg_more) {
+			err = write(fd, NULL, 0);
+			if (err < 0) {
+				fprintf(stderr, "send more: %s\n", strerror(errno));
+				err = ERN_SEND_MORE;
+				goto err_out;
+			}
+		}
 	}
 	err = ERN_SUCCESS;
 
@@ -540,6 +575,9 @@ int main(int argc, char *argv[])
 
 err_out:
 	close(fd);
+err_free_info:
 	freeaddrinfo(ai);
+err_free_buff:
+	free(buf);
 	return err;
 }
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 5b9baf708950..2a390cae41bf 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,109 +1,135 @@
-CONFIG_USER_NS=y
-CONFIG_NET_NS=y
+CONFIG_AMT=m
+CONFIG_BAREUDP=m
 CONFIG_BONDING=m
 CONFIG_BPF_SYSCALL=y
-CONFIG_TEST_BPF=m
-CONFIG_NUMA=y
-CONFIG_RPS=y
-CONFIG_SYSFS=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_NET_VRF=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_IPV6=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_VETH=y
-CONFIG_NET_IPVTI=y
-CONFIG_IPV6_VTI=y
-CONFIG_DUMMY=y
-CONFIG_BRIDGE_VLAN_FILTERING=y
 CONFIG_BRIDGE=y
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_CAN=m
+CONFIG_CAN_DEV=m
+CONFIG_CAN_VXCAN=m
+CONFIG_CRYPTO_ARIA=y
 CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_VLAN_8021Q=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SM4_GENERIC=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_BTF_MODULES=n
+CONFIG_DUMMY=y
 CONFIG_GENEVE=m
 CONFIG_IFB=y
 CONFIG_INET_DIAG=y
 CONFIG_INET_ESP=y
 CONFIG_INET_ESP_OFFLOAD=y
-CONFIG_NET_FOU=y
-CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_NF_CONNTRACK=m
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_SIT=y
-CONFIG_IP_DCCP=m
-CONFIG_NF_NAT=m
+CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
 CONFIG_IP6_NF_NAT=m
 CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
 CONFIG_IP_NF_NAT=m
 CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_SCTP=m
+CONFIG_IPV6=y
 CONFIG_IPV6_GRE=m
+CONFIG_IPV6_ILA=m
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
 CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPV6_SIT=y
+CONFIG_IPV6_VTI=y
+CONFIG_IPVLAN=m
+CONFIG_IPVTAP=m
+CONFIG_KALLSYMS=y
+CONFIG_L2TP=m
 CONFIG_L2TP_ETH=m
 CONFIG_L2TP_IP=m
-CONFIG_L2TP=m
 CONFIG_L2TP_V3=y
 CONFIG_MACSEC=m
 CONFIG_MACVLAN=y
 CONFIG_MACVTAP=y
 CONFIG_MPLS=y
+CONFIG_MPLS_IPTUNNEL=m
+CONFIG_MPLS_ROUTING=m
 CONFIG_MPTCP=y
-CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_IPV6=y
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_NAT=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_NET_ACT_CT=m
 CONFIG_NET_ACT_GACT=m
+CONFIG_NET_ACT_MIRRED=m
 CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
 CONFIG_NET_CLS_BASIC=m
 CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
 CONFIG_NET_CLS_MATCHALL=m
 CONFIG_NET_CLS_U32=m
-CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NETDEVSIM=m
+CONFIG_NET_DROP_MONITOR=m
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
+CONFIG_NET_FOU=y
+CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPIP=y
+CONFIG_NET_IPVTI=y
+CONFIG_NETKIT=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_FQ=m
 CONFIG_NET_SCH_FQ_CODEL=m
 CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_FQ=m
-CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_SCH_NETEM=y
 CONFIG_NET_SCH_PRIO=m
-CONFIG_NFT_COMPAT=m
+CONFIG_NET_VRF=y
+CONFIG_NF_CONNTRACK=m
 CONFIG_NF_CONNTRACK_OVS=y
 CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_NAT=m
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_NAT=m
+CONFIG_NUMA=y
 CONFIG_OPENVSWITCH=m
 CONFIG_OPENVSWITCH_GENEVE=m
 CONFIG_OPENVSWITCH_GRE=m
 CONFIG_OPENVSWITCH_VXLAN=m
+CONFIG_PROC_SYSCTL=y
 CONFIG_PSAMPLE=m
+CONFIG_RPS=y
+CONFIG_SYSFS=y
+CONFIG_TAP=m
 CONFIG_TCP_MD5SIG=y
 CONFIG_TEST_BLACKHOLE_DEV=m
-CONFIG_KALLSYMS=y
+CONFIG_TEST_BPF=m
 CONFIG_TLS=m
 CONFIG_TRACEPOINTS=y
-CONFIG_NET_DROP_MONITOR=m
-CONFIG_NETDEVSIM=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_NET_ACT_TUNNEL_KEY=m
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_BAREUDP=m
-CONFIG_IPV6_IOAM6_LWTUNNEL=y
-CONFIG_CRYPTO_SM4_GENERIC=y
-CONFIG_AMT=m
 CONFIG_TUN=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
 CONFIG_VXLAN=m
-CONFIG_IP_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_CRYPTO_ARIA=y
 CONFIG_XFRM_INTERFACE=m
 CONFIG_XFRM_USER=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
diff --git a/tools/testing/selftests/net/double_udp_encap.sh b/tools/testing/selftests/net/double_udp_encap.sh
new file mode 100755
index 000000000000..9aaf97cdf141
--- /dev/null
+++ b/tools/testing/selftests/net/double_udp_encap.sh
@@ -0,0 +1,393 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+# shellcheck disable=SC2155 # prefer RO variable over return value from cmd
+readonly CLI="$(dirname "$(readlink -f "$0")")/../../../net/ynl/pyynl/cli.py"
+
+readonly SRC=1
+readonly DST=2
+
+readonly NET_V4=192.168.1.
+readonly NET_V6=2001:db8::
+readonly OL1_NET_V4=172.16.1.
+readonly OL1_NET_V6=2001:db8:1::
+readonly OL2_NET_V4=172.16.2.
+readonly OL2_NET_V6=2001:db8:2::
+
+trap cleanup_all_ns EXIT
+
+# shellcheck disable=SC2329 # can't figure out usage trough a variable
+is_ipv6() {
+	if [[ $1 =~ .*:.* ]]; then
+		return 0
+	fi
+	return 1
+}
+
+# shellcheck disable=SC2329 # can't figure out usage trough a variable
+create_gnv_endpoint() {
+	local -r netns=$1
+	local -r bm_rem_addr=$2
+	local -r gnv_dev=$3
+	local -r gnv_id=$4
+	local opts=$5
+	local gnv_json
+	local rem
+
+	if is_ipv6 "$bm_rem_addr"; then
+		rem=remote6
+	else
+		rem=remote
+	fi
+
+	# add ynl opt separator, if needed
+	[ -n "$opts" ] && opts=", $opts"
+
+	gnv_json="{ \"id\": $gnv_id, \"$rem\": \"$bm_rem_addr\"$opts }"
+	ip netns exec "$netns" "$CLI" --family rt-link --create --excl \
+	   --do newlink  --json "{\"ifname\": \"$gnv_dev\",
+	       \"linkinfo\": {\"kind\":\"geneve\",
+	       \"data\": $gnv_json } }" > /dev/null
+	ip -n "$netns" link set dev "$gnv_dev" up
+}
+
+# shellcheck disable=SC2329 # can't figure out usage trough a variable
+create_vxlan_endpoint() {
+	local -r netns=$1
+	local -r bm_rem_addr=$2
+	local -r vxlan_dev=$3
+	local -r vxlan_id=$4
+	local -r opts_str=$5
+	local oldifs
+	local -a opts
+	local opt
+
+	# convert the arguments from yaml format
+	oldifs=$IFS
+	IFS=','
+	for opt in $opts_str; do
+		local pattern='"port":'
+
+		[ -n "$opt" ] || continue
+
+		opts+=("${opt/$pattern*/dstport}" "${opt/$pattern/}")
+	done
+	IFS=$oldifs
+	[ ${#opts[@]} -gt 0 ] || opts+=("dstport" "4789")
+
+	ip -n "$netns" link add "$vxlan_dev" type vxlan id "$vxlan_id" \
+	   remote "$bm_rem_addr" "${opts[@]}"
+	ip -n "$netns" link set dev "$vxlan_dev" up
+}
+
+create_ns() {
+	local nested_opt='"port":6082'
+	local create_endpoint
+	local options="$1"
+	local feature
+	local dev
+	local id
+	local ns
+
+	RET=0
+
+	#  +-------------+    +-------------+
+	#  | NS_SRC      |    | NS_NST_DST  |
+	#  |             |    |             |
+	#  |   gnv_nst1  |    |  gnv_nst2   |
+	#  |   +         |    |         +   |
+	#  |   |         |    |         |   |
+	#  |   +         |    |         +   |
+	#  |  gnv1       |    |        gnv2 |
+	#  |   +         |    |         +   |
+	#  |   |         |    |         |   |
+	#  |   + veth1 +--------+ veth2 +   |
+	#  |             |    |             |
+	#  +-------------+    +-------------+
+
+	setup_ns NS_SRC NS_DST
+
+	# concatenate caller provided options and default one
+	[ -n "$2" ] && nested_opt="$nested_opt,$2"
+
+	ip link add name "veth$SRC" netns "$NS_SRC" type veth \
+	   peer name "veth$DST" netns "$NS_DST"
+	case "$ENCAP" in
+	vxlan)
+		create_endpoint=create_vxlan_endpoint
+		dev=vx
+		;;
+	geneve)
+		create_endpoint=create_gnv_endpoint
+		dev=gnv
+		;;
+	esac
+
+	id=1
+	for ns in "${NS_LIST[@]}"; do
+		ip -n "$ns" link set dev "veth$id" up
+
+		# ensure the sender can do large write just after 3whs
+		ip netns exec "$ns" \
+		   sysctl -qw net.ipv4.tcp_wmem="4096 4194304 4194304"
+
+		# note that 3 - $SRC == $DST and 3 - $DST == $SRC
+		if [ $FAMILY = "4" ]; then
+			ip -n "$ns" addr add dev "veth$id" "$NET_V4$id/24"
+			$create_endpoint "$ns" "$NET_V4$((3 - id))" \
+			   "$dev$id" 4 "$options"
+			ip -n "$ns" addr add dev "$dev$id" "$OL1_NET_V4$id/24"
+
+			# nested tunnel devices
+			# pmtu can't be propagated to upper layer devices;
+			# need manual adjust
+			$create_endpoint "$ns" "$OL1_NET_V4$((3 - id))" \
+			   "$dev"_nst"$id" 40 "$nested_opt"
+			ip -n "$ns" addr add dev "$dev"_nst"$id" \
+			   "$OL2_NET_V4$id/24"
+			ip -n "$ns" link set dev "$dev"_nst"$id" mtu 1392
+		else
+			ip -n "$ns" addr add dev "veth$id" "$NET_V6$id/64" \
+			   nodad
+			$create_endpoint "$ns" "$NET_V6$((3 - id))" \
+			   "$dev"6"$id" 6 "$options"
+			ip -n "$ns" addr add dev "$dev"6"$id" \
+			   "$OL1_NET_V6$id/64" nodad
+
+			$create_endpoint "$ns" "$OL1_NET_V6$((3 - id))" \
+			   "$dev"6_nst"$id" 60 "$nested_opt"
+			ip -n "$ns" addr add dev "$dev"6_nst"$id" \
+			   "$OL2_NET_V6$id/64" nodad
+			ip -n "$ns" link set dev "$dev"6_nst"$id" mtu 1352
+		fi
+		id=$((id+1))
+	done
+
+	# enable GRO heuristic on the veth peer and ensure UDP L4 over tunnel is
+	# actually segmented
+	for feature in tso tx-udp_tnl-segmentation; do
+		ip netns exec "$NS_SRC" ethtool -K "veth$SRC" \
+		   "$feature" off 2>/dev/null
+	done
+}
+
+create_ns_gso() {
+	local dev
+
+	create_ns "$@"
+	if [ "$ENCAP" = "geneve" ]; then
+		dev=gnv
+	else
+		dev=vx
+	fi
+	[ "$FAMILY" = "6" ] && dev="$dev"6
+	ip netns exec "$NS_SRC" ethtool -K "$dev$SRC" \
+	   tx-gso-partial on \
+	   tx-udp_tnl-segmentation on \
+	   tx-udp_tnl-csum-segmentation on
+}
+
+create_ns_gso_gro() {
+	create_ns_gso "$@"
+	ip netns exec "$NS_DST" ethtool -K "veth$DST" gro on
+	ip netns exec "$NS_SRC" ethtool -K "veth$SRC" tx off >/dev/null 2>&1
+}
+
+run_test() {
+	local -r dst=$NET$DST
+	local -r msg=$1
+	local -r total_size=$2
+	local -r encappkts=$3
+	local inner_proto_offset=0
+	local inner_maclen=14
+	local rx_family="-4"
+	local ipt=iptables
+	local bpf_filter
+	local -a rx_args
+	local wire_pkts
+	local rcvpkts
+	local encl=8
+	local dport
+	local pkts
+	local snd
+
+	if [ $FAMILY = "6" ]; then
+		ipt=ip6tables
+	else
+		# rx program does not support '-6' and implies ipv6 usage by
+		# default
+		rx_args=("$rx_family")
+	fi
+
+	# The received can only check fixed size packet
+	pkts=$((total_size / GSO_SIZE))
+	if [ -n "$4" ]; then
+		wire_pkts=$4
+	elif [ $((total_size % GSO_SIZE)) -eq 0 ]; then
+		wire_pkts=1
+		rx_args+=("-l" "$GSO_SIZE")
+	else
+		wire_pkts=2
+		pkts=$((pkts + 1))
+	fi
+
+	if [ "$ENCAP" = "geneve" ]; then
+		dport=6081
+	else
+		dport=4789
+	fi
+
+	# Either:
+	# - IPv4, nested tunnel carries UDP over IPv4, with dport 6082,
+	#   innermost is TCP over IPv4 on port 8000
+	# - IPv6, nested tunnel carries UDP over IPv6, with dport 6082,
+	#   innermost is TCP over IPv6 on port 8000
+	# The nested tunnel port is 6082 and the nested encap len is 8
+	# regardless of the encap type (no geneve opts).
+	# In inherit protocol mode there is no nested mac hdr and the nested
+	# l3 protocol type field belongs to the geneve hdr.
+	[ "$USE_HINT" = true ] && encl=16
+	[ "$INHERIT" = true ] && inner_maclen=0
+	[ "$INHERIT" = true ] && inner_proto_offset=-4
+	local inner=$((inner_maclen+encl))
+	local proto=$((inner_maclen+encl+inner_proto_offset))
+	bpf_filter=$(nfbpf_compile "(ip &&
+		ip[$((40+encl))] == 0x08 && ip[$((41+encl))] == 0x00 &&
+		ip[$((51+encl))] == 0x11 &&
+		ip[$((64+encl))] == 0x17 && ip[$((65+encl))] == 0xc2 &&
+		ip[$((76+proto))] == 0x08 && ip[$((77+proto))] == 0x00 &&
+		ip[$((87+inner))] == 0x6 &&
+		ip[$((100+inner))] == 0x1f && ip[$((101+inner))] == 0x40) ||
+		(ip6 &&
+		ip6[$((60+encl))] == 0x86 && ip6[$((61+encl))] == 0xdd &&
+		ip6[$((68+encl))] == 0x11 &&
+		ip6[$((104+encl))] == 0x17 && ip6[$((105+encl))] == 0xc2 &&
+		ip6[$((116+proto))] == 0x86 && ip6[$((117+proto))] == 0xdd &&
+		ip6[$((124+inner))] == 0x6 &&
+		ip6[$((160+inner))] == 0x1f && ip6[$((161+inner))] == 0x40)")
+
+	# ignore shorts packet, to avoid arp/mld induced noise
+	ip netns exec "$NS_SRC" "$ipt" -A OUTPUT -p udp --dport "$dport" \
+	   -m length --length 600:65535 -m bpf --bytecode "$bpf_filter"
+	ip netns exec "$NS_DST" "$ipt" -A INPUT -p udp --dport "$dport" \
+	   -m length --length 600:65535 -m bpf --bytecode "$bpf_filter"
+	ip netns exec "$NS_DST" ./udpgso_bench_rx -C 2000 -t -R 100 \
+	   -n "$pkts" "${rx_args[@]}" &
+	local pid=$!
+	wait_local_port_listen "$NS_DST" 8000 tcp
+	ip netns exec "$NS_SRC" ./udpgso_bench_tx -"$FAMILY" -t -M 1 \
+	   -s "$total_size" -D "$dst"
+	local ret=$?
+	check_err "$ret" "client failure exit code $ret"
+	wait "$pid"
+	ret=$?
+	check_err "$ret" "sever failure exit code $ret"
+
+	snd=$(ip netns exec "$NS_SRC" "$ipt"-save -c |
+	    grep "dport $dport" | sed -e 's/\[//' -e 's/:.*//')
+
+	[ "$snd" = "$wire_pkts" ]
+	# shellcheck disable=SC2319 # known false positive
+	check_err $? "send $snd packets on the lowest link, expected $wire_pkts"
+
+	rcvpkts=$(ip netns exec "$NS_DST" "$ipt"-save -c | \
+	   grep "dport $dport" | sed -e 's/\[//' -e 's/:.*//')
+
+	[ "$rcvpkts" = "$encappkts" ]
+	check_err $? "received $rcvpkts $ENCAP packets, expected $encappkts"
+	log_test "$msg"
+}
+
+run_tests() {
+	for FAMILY in 4 6; do
+		NET=$OL2_NET_V4
+		GSO_SIZE=1340 # 1392 - 20 - 32
+
+		if [ $FAMILY = 6 ]; then
+			NET=$OL2_NET_V6
+			GSO_SIZE=1280 # 1352 - 40 - 32
+		fi
+
+		echo "IPv$FAMILY"
+
+		unset USE_HINT
+		unset INHERIT
+
+		# "geneve" must be last encap in list, so that later
+		# test cases will run on it
+		for ENCAP in "vxlan" "geneve"; do
+			create_ns
+			run_test "No GSO - $ENCAP" $((GSO_SIZE * 4)) 4 4
+			cleanup_all_ns
+
+			create_ns_gso
+			run_test "GSO without GRO - $ENCAP" $((GSO_SIZE * 4)) \
+			   4 1
+			cleanup_all_ns
+
+			# IPv4 only test
+			[ $FAMILY = "4" ] || continue
+			create_ns_gso
+			ip netns exec "$NS_SRC" \
+			   sysctl -qw net.ipv4.ip_no_pmtu_disc=1
+			run_test "GSO disable due to no fixedid - $ENCAP" \
+			   $((GSO_SIZE * 4)) 4 4
+			cleanup_all_ns
+		done
+
+		# GRO tests imply/require geneve encap, the only one providing
+		# GRO hints
+		create_ns_gso_gro
+		run_test "double tunnel GRO, no hints" $((GSO_SIZE * 4)) 4
+		cleanup_all_ns
+
+		# hint option is expected for all the following tests in the RX
+		# path
+		USE_HINT=true
+		create_ns_gso_gro \
+		   '"gro-hint":1,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' \
+		   '"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1'
+		run_test "double tunnel GRO" $((GSO_SIZE * 4)) 1
+		cleanup_all_ns
+
+		create_ns_gso_gro '"gro-hint":1,"udp-csum":1' '"udp-csum":1'
+		run_test "double tunnel GRO - csum complete" $((GSO_SIZE * 4))\
+		   1
+		cleanup_all_ns
+
+		create_ns_gso_gro '"gro-hint":1' \
+		   '"udp-csum":0,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1'
+		run_test "double tunnel GRO - no nested csum" \
+		   $((GSO_SIZE * 4)) 1
+		cleanup_all_ns
+
+		create_ns_gso_gro \
+		   '"gro-hint":1,"udp-zero-csum6-tx":1,"udp-zero-csum6-rx":1' \
+		   '"udp-csum":1'
+		run_test "double tunnel GRO - nested csum, outer 0-csum, skip"\
+		   $((GSO_SIZE * 4)) 4
+		cleanup_all_ns
+
+		INHERIT=true
+		create_ns_gso_gro '"gro-hint":1,"udp-csum":1' \
+		   '"udp-csum":1,"inner-proto-inherit":1'
+		run_test "double tunnel GRO - nested inherit proto" \
+		   $((GSO_SIZE * 4)) 1
+		cleanup_all_ns
+		unset INHERIT
+
+		create_ns_gso_gro '"gro-hint":1'
+		run_test "double tunnel GRO - short last pkt" \
+		   $((GSO_SIZE * 4 + GSO_SIZE / 2)) 2
+		cleanup_all_ns
+	done
+}
+
+require_command nfbpf_compile
+require_command jq
+
+# tcp retransmisions will break the accounting
+xfail_on_slow run_tests
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/epoll_busy_poll.c b/tools/testing/selftests/net/epoll_busy_poll.c
index 16e457c2f877..adf8dd0b5e0b 100644
--- a/tools/testing/selftests/net/epoll_busy_poll.c
+++ b/tools/testing/selftests/net/epoll_busy_poll.c
@@ -23,7 +23,7 @@
 #include <sys/ioctl.h>
 #include <sys/socket.h>
 
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 /* if the headers haven't been updated, we need to define some things */
 #if !defined(EPOLL_IOC_TYPE)
diff --git a/tools/testing/selftests/net/fcnal-ipv4.sh b/tools/testing/selftests/net/fcnal-ipv4.sh
new file mode 100755
index 000000000000..82f9c867c3e8
--- /dev/null
+++ b/tools/testing/selftests/net/fcnal-ipv4.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+./fcnal-test.sh -t ipv4
diff --git a/tools/testing/selftests/net/fcnal-ipv6.sh b/tools/testing/selftests/net/fcnal-ipv6.sh
new file mode 100755
index 000000000000..ab1fc7aa3caf
--- /dev/null
+++ b/tools/testing/selftests/net/fcnal-ipv6.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+./fcnal-test.sh -t ipv6
diff --git a/tools/testing/selftests/net/fcnal-other.sh b/tools/testing/selftests/net/fcnal-other.sh
new file mode 100755
index 000000000000..a840cf80b32e
--- /dev/null
+++ b/tools/testing/selftests/net/fcnal-other.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+./fcnal-test.sh -t other
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 899dbad0104b..890c3f8e51bb 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -189,7 +189,7 @@ show_hint()
 kill_procs()
 {
 	killall nettest ping ping6 >/dev/null 2>&1
-	sleep 1
+	slowwait 2 sh -c 'test -z "$(pgrep '"'^(nettest|ping|ping6)$'"')"'
 }
 
 set_ping_group()
@@ -424,6 +424,8 @@ create_ns()
 	ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
 	ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
 	ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.accept_dad=0
 }
 
 # create veth pair to connect namespaces and apply addresses.
@@ -875,7 +877,7 @@ ipv4_tcp_md5_novrf()
 	# basic use case
 	log_start
 	run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: Single address config"
 
@@ -883,7 +885,7 @@ ipv4_tcp_md5_novrf()
 	log_start
 	show_hint "Should timeout due to MD5 mismatch"
 	run_cmd nettest -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: Server no config, client uses password"
 
@@ -891,7 +893,7 @@ ipv4_tcp_md5_novrf()
 	log_start
 	show_hint "Should timeout since client uses wrong password"
 	run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: Client uses wrong password"
 
@@ -899,7 +901,7 @@ ipv4_tcp_md5_novrf()
 	log_start
 	show_hint "Should timeout due to MD5 mismatch"
 	run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: Client address does not match address configured with password"
 
@@ -910,7 +912,7 @@ ipv4_tcp_md5_novrf()
 	# client in prefix
 	log_start
 	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest  -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: Prefix config"
 
@@ -918,7 +920,7 @@ ipv4_tcp_md5_novrf()
 	log_start
 	show_hint "Should timeout since client uses wrong password"
 	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: Prefix config, client uses wrong password"
 
@@ -926,7 +928,7 @@ ipv4_tcp_md5_novrf()
 	log_start
 	show_hint "Should timeout due to MD5 mismatch"
 	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
 }
@@ -943,7 +945,7 @@ ipv4_tcp_md5()
 	# basic use case
 	log_start
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Single address config"
 
@@ -951,7 +953,7 @@ ipv4_tcp_md5()
 	log_start
 	show_hint "Should timeout since server does not have MD5 auth"
 	run_cmd nettest -s -I ${VRF} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Server no config, client uses password"
 
@@ -959,7 +961,7 @@ ipv4_tcp_md5()
 	log_start
 	show_hint "Should timeout since client uses wrong password"
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Client uses wrong password"
 
@@ -967,7 +969,7 @@ ipv4_tcp_md5()
 	log_start
 	show_hint "Should timeout since server config differs from client"
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
 
@@ -978,7 +980,7 @@ ipv4_tcp_md5()
 	# client in prefix
 	log_start
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest  -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Prefix config"
 
@@ -986,7 +988,7 @@ ipv4_tcp_md5()
 	log_start
 	show_hint "Should timeout since client uses wrong password"
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
 
@@ -994,7 +996,7 @@ ipv4_tcp_md5()
 	log_start
 	show_hint "Should timeout since client address is outside of prefix"
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
 
@@ -1005,14 +1007,14 @@ ipv4_tcp_md5()
 	log_start
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
 	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest  -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
 
 	log_start
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
 	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsc nettest  -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
 
@@ -1020,7 +1022,7 @@ ipv4_tcp_md5()
 	show_hint "Should timeout since client in default VRF uses VRF password"
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
 	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
 
@@ -1028,21 +1030,21 @@ ipv4_tcp_md5()
 	show_hint "Should timeout since client in VRF uses default VRF password"
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
 	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
 
 	log_start
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
 	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest  -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
 
 	log_start
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
 	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsc nettest  -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
 
@@ -1050,7 +1052,7 @@ ipv4_tcp_md5()
 	show_hint "Should timeout since client in default VRF uses VRF password"
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
 	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
 
@@ -1058,7 +1060,7 @@ ipv4_tcp_md5()
 	show_hint "Should timeout since client in VRF uses default VRF password"
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
 	run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
 
@@ -1082,14 +1084,14 @@ test_ipv4_md5_vrf__vrf_server__no_bind_ifindex()
 	log_start
 	show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX"
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection"
 
 	log_start
 	show_hint "Binding both the socket and the key is not required but it works"
 	run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection"
 }
@@ -1103,25 +1105,25 @@ test_ipv4_md5_vrf__global_server__bind_ifindex0()
 
 	log_start
 	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection"
 
 	log_start
 	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection"
 	log_start
 
 	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection"
 
 	log_start
 	run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection"
 
@@ -1193,7 +1195,7 @@ ipv4_tcp_novrf()
 	do
 		log_start
 		run_cmd nettest -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
 	done
@@ -1201,7 +1203,7 @@ ipv4_tcp_novrf()
 	a=${NSA_IP}
 	log_start
 	run_cmd nettest -s -I ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${a}
 	log_test_addr ${a} $? 0 "Device server"
 
@@ -1221,13 +1223,13 @@ ipv4_tcp_novrf()
 	do
 		log_start
 		run_cmd_nsb nettest -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 tcp
 		run_cmd nettest -r ${a} -0 ${NSA_IP}
 		log_test_addr ${a} $? 0 "Client"
 
 		log_start
 		run_cmd_nsb nettest -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 tcp
 		run_cmd nettest -r ${a} -d ${NSA_DEV}
 		log_test_addr ${a} $? 0 "Client, device bind"
 
@@ -1249,7 +1251,7 @@ ipv4_tcp_novrf()
 	do
 		log_start
 		run_cmd nettest -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest -r ${a} -0 ${a} -1 ${a}
 		log_test_addr ${a} $? 0 "Global server, local connection"
 	done
@@ -1257,7 +1259,7 @@ ipv4_tcp_novrf()
 	a=${NSA_IP}
 	log_start
 	run_cmd nettest -s -I ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest -r ${a} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
 
@@ -1266,7 +1268,7 @@ ipv4_tcp_novrf()
 		log_start
 		show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
 		run_cmd nettest -s -I ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest -r ${a}
 		log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
 	done
@@ -1274,7 +1276,7 @@ ipv4_tcp_novrf()
 	a=${NSA_IP}
 	log_start
 	run_cmd nettest -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest -r ${a} -0 ${a} -d ${NSA_DEV}
 	log_test_addr ${a} $? 0 "Global server, device client, local connection"
 
@@ -1283,7 +1285,7 @@ ipv4_tcp_novrf()
 		log_start
 		show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
 		run_cmd nettest -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest -r ${a} -d ${NSA_DEV}
 		log_test_addr ${a} $? 1 "Global server, device client, local connection"
 	done
@@ -1291,7 +1293,7 @@ ipv4_tcp_novrf()
 	a=${NSA_IP}
 	log_start
 	run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest  -d ${NSA_DEV} -r ${a} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, device client, local connection"
 
@@ -1323,19 +1325,19 @@ ipv4_tcp_vrf()
 		log_start
 		show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
 		run_cmd nettest -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -r ${a}
 		log_test_addr ${a} $? 1 "Global server"
 
 		log_start
 		run_cmd nettest -s -I ${VRF} -3 ${VRF} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
 
 		log_start
 		run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -r ${a}
 		log_test_addr ${a} $? 0 "Device server"
 
@@ -1352,7 +1354,7 @@ ipv4_tcp_vrf()
 	log_start
 	show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
 	run_cmd nettest -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest -r ${a} -d ${NSA_DEV}
 	log_test_addr ${a} $? 1 "Global server, local connection"
 
@@ -1374,14 +1376,14 @@ ipv4_tcp_vrf()
 		log_start
 		show_hint "client socket should be bound to VRF"
 		run_cmd nettest -s -3 ${VRF} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
 
 		log_start
 		show_hint "client socket should be bound to VRF"
 		run_cmd nettest -s -I ${VRF} -3 ${VRF} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
 
@@ -1396,7 +1398,7 @@ ipv4_tcp_vrf()
 	log_start
 	show_hint "client socket should be bound to device"
 	run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -r ${a}
 	log_test_addr ${a} $? 0 "Device server"
 
@@ -1406,7 +1408,7 @@ ipv4_tcp_vrf()
 		log_start
 		show_hint "Should fail 'Connection refused' since client is not bound to VRF"
 		run_cmd nettest -s -I ${VRF} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest -r ${a}
 		log_test_addr ${a} $? 1 "Global server, local connection"
 	done
@@ -1418,13 +1420,13 @@ ipv4_tcp_vrf()
 	do
 		log_start
 		run_cmd_nsb nettest -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 tcp
 		run_cmd nettest -r ${a} -d ${VRF}
 		log_test_addr ${a} $? 0 "Client, VRF bind"
 
 		log_start
 		run_cmd_nsb nettest -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 tcp
 		run_cmd nettest -r ${a} -d ${NSA_DEV}
 		log_test_addr ${a} $? 0 "Client, device bind"
 
@@ -1443,7 +1445,7 @@ ipv4_tcp_vrf()
 	do
 		log_start
 		run_cmd nettest -s -I ${VRF} -3 ${VRF} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest -r ${a} -d ${VRF} -0 ${a}
 		log_test_addr ${a} $? 0 "VRF server, VRF client, local connection"
 	done
@@ -1451,26 +1453,26 @@ ipv4_tcp_vrf()
 	a=${NSA_IP}
 	log_start
 	run_cmd nettest -s -I ${VRF} -3 ${VRF} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a}
 	log_test_addr ${a} $? 0 "VRF server, device client, local connection"
 
 	log_start
 	show_hint "Should fail 'No route to host' since client is out of VRF scope"
 	run_cmd nettest -s -I ${VRF} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest -r ${a}
 	log_test_addr ${a} $? 1 "VRF server, unbound client, local connection"
 
 	log_start
 	run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest -r ${a} -d ${VRF} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, VRF client, local connection"
 
 	log_start
 	run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, device client, local connection"
 }
@@ -1509,7 +1511,7 @@ ipv4_udp_novrf()
 	do
 		log_start
 		run_cmd nettest -D -s -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -D -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
 
@@ -1522,7 +1524,7 @@ ipv4_udp_novrf()
 	a=${NSA_IP}
 	log_start
 	run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd_nsb nettest -D -r ${a}
 	log_test_addr ${a} $? 0 "Device server"
 
@@ -1533,31 +1535,31 @@ ipv4_udp_novrf()
 	do
 		log_start
 		run_cmd_nsb nettest -D -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 udp
 		run_cmd nettest -D -r ${a} -0 ${NSA_IP}
 		log_test_addr ${a} $? 0 "Client"
 
 		log_start
 		run_cmd_nsb nettest -D -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 udp
 		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP}
 		log_test_addr ${a} $? 0 "Client, device bind"
 
 		log_start
 		run_cmd_nsb nettest -D -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 udp
 		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP}
 		log_test_addr ${a} $? 0 "Client, device send via cmsg"
 
 		log_start
 		run_cmd_nsb nettest -D -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 udp
 		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP}
 		log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF"
 
 		log_start
 		run_cmd_nsb nettest -D -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 udp
 		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} -U
 		log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF, with connect()"
 
@@ -1580,7 +1582,7 @@ ipv4_udp_novrf()
 	do
 		log_start
 		run_cmd nettest -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -D -r ${a} -0 ${a} -1 ${a}
 		log_test_addr ${a} $? 0 "Global server, local connection"
 	done
@@ -1588,7 +1590,7 @@ ipv4_udp_novrf()
 	a=${NSA_IP}
 	log_start
 	run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -r ${a}
 	log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
 
@@ -1597,7 +1599,7 @@ ipv4_udp_novrf()
 		log_start
 		show_hint "Should fail 'Connection refused' since address is out of device scope"
 		run_cmd nettest -s -D -I ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -D -r ${a}
 		log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
 	done
@@ -1605,25 +1607,25 @@ ipv4_udp_novrf()
 	a=${NSA_IP}
 	log_start
 	run_cmd nettest -s -D &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "Global server, device client, local connection"
 
 	log_start
 	run_cmd nettest -s -D &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${NSA_DEV} -C -r ${a}
 	log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection"
 
 	log_start
 	run_cmd nettest -s -D &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${NSA_DEV} -S -r ${a}
 	log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection"
 
 	log_start
 	run_cmd nettest -s -D &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} -U
 	log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
 
@@ -1636,28 +1638,28 @@ ipv4_udp_novrf()
 		log_start
 		show_hint "Should fail since addresses on loopback are out of device scope"
 		run_cmd nettest -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -D -r ${a} -d ${NSA_DEV}
 		log_test_addr ${a} $? 2 "Global server, device client, local connection"
 
 		log_start
 		show_hint "Should fail since addresses on loopback are out of device scope"
 		run_cmd nettest -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C
 		log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection"
 
 		log_start
 		show_hint "Should fail since addresses on loopback are out of device scope"
 		run_cmd nettest -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S
 		log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
 
 		log_start
 		show_hint "Should fail since addresses on loopback are out of device scope"
 		run_cmd nettest -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -U
 		log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
 
@@ -1667,7 +1669,7 @@ ipv4_udp_novrf()
 	a=${NSA_IP}
 	log_start
 	run_cmd nettest -D -s -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, device client, local conn"
 
@@ -1709,19 +1711,19 @@ ipv4_udp_vrf()
 		log_start
 		show_hint "Fails because ingress is in a VRF and global server is disabled"
 		run_cmd nettest -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -D -r ${a}
 		log_test_addr ${a} $? 1 "Global server"
 
 		log_start
 		run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -D -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
 
 		log_start
 		run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -D -r ${a}
 		log_test_addr ${a} $? 0 "Enslaved device server"
 
@@ -1733,7 +1735,7 @@ ipv4_udp_vrf()
 		log_start
 		show_hint "Should fail 'Connection refused' since global server is out of scope"
 		run_cmd nettest -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -D -d ${VRF} -r ${a}
 		log_test_addr ${a} $? 1 "Global server, VRF client, local connection"
 	done
@@ -1741,26 +1743,26 @@ ipv4_udp_vrf()
 	a=${NSA_IP}
 	log_start
 	run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
 
 	log_start
 	run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection"
 
 	a=${NSA_IP}
 	log_start
 	run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
 
 	log_start
 	run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
 
@@ -1775,19 +1777,19 @@ ipv4_udp_vrf()
 	do
 		log_start
 		run_cmd nettest -D -s -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -D -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
 
 		log_start
 		run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -D -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
 
 		log_start
 		run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -D -r ${a}
 		log_test_addr ${a} $? 0 "Enslaved device server"
 
@@ -1802,13 +1804,13 @@ ipv4_udp_vrf()
 	#
 	log_start
 	run_cmd_nsb nettest -D -s &
-	sleep 1
+	wait_local_port_listen ${NSB} 12345 udp
 	run_cmd nettest -d ${VRF} -D -r ${NSB_IP} -1 ${NSA_IP}
 	log_test $? 0 "VRF client"
 
 	log_start
 	run_cmd_nsb nettest -D -s &
-	sleep 1
+	wait_local_port_listen ${NSB} 12345 udp
 	run_cmd nettest -d ${NSA_DEV} -D -r ${NSB_IP} -1 ${NSA_IP}
 	log_test $? 0 "Enslaved device client"
 
@@ -1829,31 +1831,31 @@ ipv4_udp_vrf()
 	a=${NSA_IP}
 	log_start
 	run_cmd nettest -D -s -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
 
 	log_start
 	run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
 
 	log_start
 	run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, device client, local conn"
 
 	log_start
 	run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
 
 	log_start
 	run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
 
@@ -1861,7 +1863,7 @@ ipv4_udp_vrf()
 	do
 		log_start
 		run_cmd nettest -D -s -3 ${VRF} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -D -d ${VRF} -r ${a}
 		log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
 	done
@@ -1870,7 +1872,7 @@ ipv4_udp_vrf()
 	do
 		log_start
 		run_cmd nettest -s -D -I ${VRF} -3 ${VRF} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -D -d ${VRF} -r ${a}
 		log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
 	done
@@ -2093,7 +2095,7 @@ ipv4_rt()
 	do
 		log_start
 		run_cmd nettest ${varg} -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest ${varg} -r ${a} &
 		sleep 3
 		run_cmd ip link del ${VRF}
@@ -2107,7 +2109,7 @@ ipv4_rt()
 	do
 		log_start
 		run_cmd nettest ${varg} -s -I ${VRF} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest ${varg} -r ${a} &
 		sleep 3
 		run_cmd ip link del ${VRF}
@@ -2120,7 +2122,7 @@ ipv4_rt()
 	a=${NSA_IP}
 	log_start
 	run_cmd nettest ${varg} -s -I ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest ${varg} -r ${a} &
 	sleep 3
 	run_cmd ip link del ${VRF}
@@ -2134,7 +2136,7 @@ ipv4_rt()
 	#
 	log_start
 	run_cmd_nsb nettest ${varg} -s &
-	sleep 1
+	wait_local_port_listen ${NSB} 12345 tcp
 	run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP} &
 	sleep 3
 	run_cmd ip link del ${VRF}
@@ -2145,7 +2147,7 @@ ipv4_rt()
 
 	log_start
 	run_cmd_nsb nettest ${varg} -s &
-	sleep 1
+	wait_local_port_listen ${NSB} 12345 tcp
 	run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP} &
 	sleep 3
 	run_cmd ip link del ${VRF}
@@ -2161,7 +2163,7 @@ ipv4_rt()
 	do
 		log_start
 		run_cmd nettest ${varg} -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest ${varg} -d ${VRF} -r ${a} &
 		sleep 3
 		run_cmd ip link del ${VRF}
@@ -2175,7 +2177,7 @@ ipv4_rt()
 	do
 		log_start
 		run_cmd nettest ${varg} -I ${VRF} -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest ${varg} -d ${VRF} -r ${a} &
 		sleep 3
 		run_cmd ip link del ${VRF}
@@ -2189,7 +2191,7 @@ ipv4_rt()
 	log_start
 
 	run_cmd nettest ${varg} -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
 	sleep 3
 	run_cmd ip link del ${VRF}
@@ -2200,7 +2202,7 @@ ipv4_rt()
 
 	log_start
 	run_cmd nettest ${varg} -I ${VRF} -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
 	sleep 3
 	run_cmd ip link del ${VRF}
@@ -2211,7 +2213,7 @@ ipv4_rt()
 
 	log_start
 	run_cmd nettest ${varg} -I ${NSA_DEV} -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
 	sleep 3
 	run_cmd ip link del ${VRF}
@@ -2325,6 +2327,13 @@ ipv6_ping_novrf()
 		log_test_addr ${a} $? 2 "ping local, device bind"
 	done
 
+	for a in ${NSA_LO_IP6} ${NSA_LINKIP6}%${NSA_DEV} ${NSA_IP6}
+	do
+		log_start
+		run_cmd ${ping6} -c1 -w1 -I ::1 ${a}
+		log_test_addr ${a} $? 0 "ping local, from localhost"
+	done
+
 	#
 	# ip rule blocks address
 	#
@@ -2561,7 +2570,7 @@ ipv6_tcp_md5_novrf()
 	# basic use case
 	log_start
 	run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 0 "MD5: Single address config"
 
@@ -2569,7 +2578,7 @@ ipv6_tcp_md5_novrf()
 	log_start
 	show_hint "Should timeout due to MD5 mismatch"
 	run_cmd nettest -6 -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: Server no config, client uses password"
 
@@ -2577,7 +2586,7 @@ ipv6_tcp_md5_novrf()
 	log_start
 	show_hint "Should timeout since client uses wrong password"
 	run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: Client uses wrong password"
 
@@ -2585,7 +2594,7 @@ ipv6_tcp_md5_novrf()
 	log_start
 	show_hint "Should timeout due to MD5 mismatch"
 	run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: Client address does not match address configured with password"
 
@@ -2596,7 +2605,7 @@ ipv6_tcp_md5_novrf()
 	# client in prefix
 	log_start
 	run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 0 "MD5: Prefix config"
 
@@ -2604,7 +2613,7 @@ ipv6_tcp_md5_novrf()
 	log_start
 	show_hint "Should timeout since client uses wrong password"
 	run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: Prefix config, client uses wrong password"
 
@@ -2612,7 +2621,7 @@ ipv6_tcp_md5_novrf()
 	log_start
 	show_hint "Should timeout due to MD5 mismatch"
 	run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
 }
@@ -2629,7 +2638,7 @@ ipv6_tcp_md5()
 	# basic use case
 	log_start
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Single address config"
 
@@ -2637,7 +2646,7 @@ ipv6_tcp_md5()
 	log_start
 	show_hint "Should timeout since server does not have MD5 auth"
 	run_cmd nettest -6 -s -I ${VRF} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Server no config, client uses password"
 
@@ -2645,7 +2654,7 @@ ipv6_tcp_md5()
 	log_start
 	show_hint "Should timeout since client uses wrong password"
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Client uses wrong password"
 
@@ -2653,7 +2662,7 @@ ipv6_tcp_md5()
 	log_start
 	show_hint "Should timeout since server config differs from client"
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
 
@@ -2664,7 +2673,7 @@ ipv6_tcp_md5()
 	# client in prefix
 	log_start
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Prefix config"
 
@@ -2672,7 +2681,7 @@ ipv6_tcp_md5()
 	log_start
 	show_hint "Should timeout since client uses wrong password"
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
 
@@ -2680,7 +2689,7 @@ ipv6_tcp_md5()
 	log_start
 	show_hint "Should timeout since client address is outside of prefix"
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
 
@@ -2691,14 +2700,14 @@ ipv6_tcp_md5()
 	log_start
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
 	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
 
 	log_start
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
 	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
 
@@ -2706,7 +2715,7 @@ ipv6_tcp_md5()
 	show_hint "Should timeout since client in default VRF uses VRF password"
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
 	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
 
@@ -2714,21 +2723,21 @@ ipv6_tcp_md5()
 	show_hint "Should timeout since client in VRF uses default VRF password"
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
 	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
 
 	log_start
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
 	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
 
 	log_start
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
 	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
 
@@ -2736,7 +2745,7 @@ ipv6_tcp_md5()
 	show_hint "Should timeout since client in default VRF uses VRF password"
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
 	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
 	log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
 
@@ -2744,7 +2753,7 @@ ipv6_tcp_md5()
 	show_hint "Should timeout since client in VRF uses default VRF password"
 	run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
 	run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
 	log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
 
@@ -2772,7 +2781,7 @@ ipv6_tcp_novrf()
 	do
 		log_start
 		run_cmd nettest -6 -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -6 -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
 	done
@@ -2793,7 +2802,7 @@ ipv6_tcp_novrf()
 	do
 		log_start
 		run_cmd_nsb nettest -6 -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 tcp
 		run_cmd nettest -6 -r ${a}
 		log_test_addr ${a} $? 0 "Client"
 	done
@@ -2802,7 +2811,7 @@ ipv6_tcp_novrf()
 	do
 		log_start
 		run_cmd_nsb nettest -6 -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 tcp
 		run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
 		log_test_addr ${a} $? 0 "Client, device bind"
 	done
@@ -2822,7 +2831,7 @@ ipv6_tcp_novrf()
 	do
 		log_start
 		run_cmd nettest -6 -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest -6 -r ${a}
 		log_test_addr ${a} $? 0 "Global server, local connection"
 	done
@@ -2830,7 +2839,7 @@ ipv6_tcp_novrf()
 	a=${NSA_IP6}
 	log_start
 	run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest -6 -r ${a} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
 
@@ -2839,7 +2848,7 @@ ipv6_tcp_novrf()
 		log_start
 		show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
 		run_cmd nettest -6 -s -I ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest -6 -r ${a}
 		log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
 	done
@@ -2847,7 +2856,7 @@ ipv6_tcp_novrf()
 	a=${NSA_IP6}
 	log_start
 	run_cmd nettest -6 -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
 	log_test_addr ${a} $? 0 "Global server, device client, local connection"
 
@@ -2856,7 +2865,7 @@ ipv6_tcp_novrf()
 		log_start
 		show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
 		run_cmd nettest -6 -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
 		log_test_addr ${a} $? 1 "Global server, device client, local connection"
 	done
@@ -2865,7 +2874,7 @@ ipv6_tcp_novrf()
 	do
 		log_start
 		run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest -6  -d ${NSA_DEV} -r ${a}
 		log_test_addr ${a} $? 0 "Device server, device client, local conn"
 	done
@@ -2898,7 +2907,7 @@ ipv6_tcp_vrf()
 		log_start
 		show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
 		run_cmd nettest -6 -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -6 -r ${a}
 		log_test_addr ${a} $? 1 "Global server"
 	done
@@ -2907,7 +2916,7 @@ ipv6_tcp_vrf()
 	do
 		log_start
 		run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -6 -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
 	done
@@ -2916,7 +2925,7 @@ ipv6_tcp_vrf()
 	a=${NSA_LINKIP6}%${NSB_DEV}
 	log_start
 	run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${a}
 	log_test_addr ${a} $? 0 "VRF server"
 
@@ -2924,7 +2933,7 @@ ipv6_tcp_vrf()
 	do
 		log_start
 		run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -6 -r ${a}
 		log_test_addr ${a} $? 0 "Device server"
 	done
@@ -2943,7 +2952,7 @@ ipv6_tcp_vrf()
 	log_start
 	show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
 	run_cmd nettest -6 -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
 	log_test_addr ${a} $? 1 "Global server, local connection"
 
@@ -2964,7 +2973,7 @@ ipv6_tcp_vrf()
 	do
 		log_start
 		run_cmd nettest -6 -s -3 ${VRF} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -6 -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
 	done
@@ -2973,7 +2982,7 @@ ipv6_tcp_vrf()
 	do
 		log_start
 		run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -6 -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
 	done
@@ -2982,13 +2991,13 @@ ipv6_tcp_vrf()
 	a=${NSA_LINKIP6}%${NSB_DEV}
 	log_start
 	run_cmd nettest -6 -s -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${a}
 	log_test_addr ${a} $? 0 "Global server"
 
 	log_start
 	run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd_nsb nettest -6 -r ${a}
 	log_test_addr ${a} $? 0 "VRF server"
 
@@ -2996,7 +3005,7 @@ ipv6_tcp_vrf()
 	do
 		log_start
 		run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -6 -r ${a}
 		log_test_addr ${a} $? 0 "Device server"
 	done
@@ -3016,7 +3025,7 @@ ipv6_tcp_vrf()
 		log_start
 		show_hint "Fails 'Connection refused' since client is not in VRF"
 		run_cmd nettest -6 -s -I ${VRF} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest -6 -r ${a}
 		log_test_addr ${a} $? 1 "Global server, local connection"
 	done
@@ -3029,7 +3038,7 @@ ipv6_tcp_vrf()
 	do
 		log_start
 		run_cmd_nsb nettest -6 -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 tcp
 		run_cmd nettest -6 -r ${a} -d ${VRF}
 		log_test_addr ${a} $? 0 "Client, VRF bind"
 	done
@@ -3038,7 +3047,7 @@ ipv6_tcp_vrf()
 	log_start
 	show_hint "Fails since VRF device does not allow linklocal addresses"
 	run_cmd_nsb nettest -6 -s &
-	sleep 1
+	wait_local_port_listen ${NSB} 12345 tcp
 	run_cmd nettest -6 -r ${a} -d ${VRF}
 	log_test_addr ${a} $? 1 "Client, VRF bind"
 
@@ -3046,7 +3055,7 @@ ipv6_tcp_vrf()
 	do
 		log_start
 		run_cmd_nsb nettest -6 -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 tcp
 		run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
 		log_test_addr ${a} $? 0 "Client, device bind"
 	done
@@ -3071,7 +3080,7 @@ ipv6_tcp_vrf()
 	do
 		log_start
 		run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a}
 		log_test_addr ${a} $? 0 "VRF server, VRF client, local connection"
 	done
@@ -3079,7 +3088,7 @@ ipv6_tcp_vrf()
 	a=${NSA_IP6}
 	log_start
 	run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
 	log_test_addr ${a} $? 0 "VRF server, device client, local connection"
 
@@ -3087,13 +3096,13 @@ ipv6_tcp_vrf()
 	log_start
 	show_hint "Should fail since unbound client is out of VRF scope"
 	run_cmd nettest -6 -s -I ${VRF} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest -6 -r ${a}
 	log_test_addr ${a} $? 1 "VRF server, unbound client, local connection"
 
 	log_start
 	run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, VRF client, local connection"
 
@@ -3101,7 +3110,7 @@ ipv6_tcp_vrf()
 	do
 		log_start
 		run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
 		log_test_addr ${a} $? 0 "Device server, device client, local connection"
 	done
@@ -3141,13 +3150,13 @@ ipv6_udp_novrf()
 	do
 		log_start
 		run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
 
 		log_start
 		run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "Device server"
 	done
@@ -3155,7 +3164,7 @@ ipv6_udp_novrf()
 	a=${NSA_LO_IP6}
 	log_start
 	run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd_nsb nettest -6 -D -r ${a}
 	log_test_addr ${a} $? 0 "Global server"
 
@@ -3165,7 +3174,7 @@ ipv6_udp_novrf()
 	#log_start
 	#show_hint "Should fail since loopback address is out of scope"
 	#run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
-	#sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	#run_cmd_nsb nettest -6 -D -r ${a}
 	#log_test_addr ${a} $? 1 "Device server"
 
@@ -3185,25 +3194,25 @@ ipv6_udp_novrf()
 	do
 		log_start
 		run_cmd_nsb nettest -6 -D -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 udp
 		run_cmd nettest -6 -D -r ${a} -0 ${NSA_IP6}
 		log_test_addr ${a} $? 0 "Client"
 
 		log_start
 		run_cmd_nsb nettest -6 -D -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 udp
 		run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP6}
 		log_test_addr ${a} $? 0 "Client, device bind"
 
 		log_start
 		run_cmd_nsb nettest -6 -D -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 udp
 		run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP6}
 		log_test_addr ${a} $? 0 "Client, device send via cmsg"
 
 		log_start
 		run_cmd_nsb nettest -6 -D -s &
-		sleep 1
+		wait_local_port_listen ${NSB} 12345 udp
 		run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP6}
 		log_test_addr ${a} $? 0 "Client, device bind via IPV6_UNICAST_IF"
 
@@ -3225,7 +3234,7 @@ ipv6_udp_novrf()
 	do
 		log_start
 		run_cmd nettest -6 -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -6 -D -r ${a} -0 ${a} -1 ${a}
 		log_test_addr ${a} $? 0 "Global server, local connection"
 	done
@@ -3233,7 +3242,7 @@ ipv6_udp_novrf()
 	a=${NSA_IP6}
 	log_start
 	run_cmd nettest -6 -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -r ${a}
 	log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
 
@@ -3242,7 +3251,7 @@ ipv6_udp_novrf()
 		log_start
 		show_hint "Should fail 'Connection refused' since address is out of device scope"
 		run_cmd nettest -6 -s -D -I ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 1 "Device server, local connection"
 	done
@@ -3250,19 +3259,19 @@ ipv6_udp_novrf()
 	a=${NSA_IP6}
 	log_start
 	run_cmd nettest -6 -s -D &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "Global server, device client, local connection"
 
 	log_start
 	run_cmd nettest -6 -s -D &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${NSA_DEV} -C -r ${a}
 	log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection"
 
 	log_start
 	run_cmd nettest -6 -s -D &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${NSA_DEV} -S -r ${a}
 	log_test_addr ${a} $? 0 "Global server, device client via IPV6_UNICAST_IF, local connection"
 
@@ -3271,28 +3280,28 @@ ipv6_udp_novrf()
 		log_start
 		show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
 		run_cmd nettest -6 -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV}
 		log_test_addr ${a} $? 1 "Global server, device client, local connection"
 
 		log_start
 		show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
 		run_cmd nettest -6 -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C
 		log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection"
 
 		log_start
 		show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
 		run_cmd nettest -6 -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S
 		log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
 
 		log_start
 		show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
 		run_cmd nettest -6 -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -U
 		log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
 	done
@@ -3300,7 +3309,7 @@ ipv6_udp_novrf()
 	a=${NSA_IP6}
 	log_start
 	run_cmd nettest -6 -D -s -I ${NSA_DEV} -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a}
 	log_test_addr ${a} $? 0 "Device server, device client, local conn"
 
@@ -3314,7 +3323,7 @@ ipv6_udp_novrf()
 	run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV}
 	log_start
 	run_cmd nettest -6 -s -D &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd_nsb nettest -6 -D -r ${NSA_IP6}
 	log_test $? 0 "UDP in - LLA to GUA"
 
@@ -3338,7 +3347,7 @@ ipv6_udp_vrf()
 		log_start
 		show_hint "Should fail 'Connection refused' since global server is disabled"
 		run_cmd nettest -6 -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 1 "Global server"
 	done
@@ -3347,7 +3356,7 @@ ipv6_udp_vrf()
 	do
 		log_start
 		run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
 	done
@@ -3356,7 +3365,7 @@ ipv6_udp_vrf()
 	do
 		log_start
 		run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "Enslaved device server"
 	done
@@ -3378,7 +3387,7 @@ ipv6_udp_vrf()
 		log_start
 		show_hint "Should fail 'Connection refused' since global server is disabled"
 		run_cmd nettest -6 -D -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -6 -D -d ${VRF} -r ${a}
 		log_test_addr ${a} $? 1 "Global server, VRF client, local conn"
 	done
@@ -3387,7 +3396,7 @@ ipv6_udp_vrf()
 	do
 		log_start
 		run_cmd nettest -6 -D -I ${VRF} -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd nettest -6 -D -d ${VRF} -r ${a}
 		log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
 	done
@@ -3396,25 +3405,25 @@ ipv6_udp_vrf()
 	log_start
 	show_hint "Should fail 'Connection refused' since global server is disabled"
 	run_cmd nettest -6 -D -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 1 "Global server, device client, local conn"
 
 	log_start
 	run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, device client, local conn"
 
 	log_start
 	run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
 
 	log_start
 	run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
 
@@ -3429,7 +3438,7 @@ ipv6_udp_vrf()
 	do
 		log_start
 		run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "Global server"
 	done
@@ -3438,7 +3447,7 @@ ipv6_udp_vrf()
 	do
 		log_start
 		run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "VRF server"
 	done
@@ -3447,7 +3456,7 @@ ipv6_udp_vrf()
 	do
 		log_start
 		run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 udp
 		run_cmd_nsb nettest -6 -D -r ${a}
 		log_test_addr ${a} $? 0 "Enslaved device server"
 	done
@@ -3465,7 +3474,7 @@ ipv6_udp_vrf()
 	#
 	log_start
 	run_cmd_nsb nettest -6 -D -s &
-	sleep 1
+	wait_local_port_listen ${NSB} 12345 udp
 	run_cmd nettest -6 -D -d ${VRF} -r ${NSB_IP6}
 	log_test $? 0 "VRF client"
 
@@ -3476,7 +3485,7 @@ ipv6_udp_vrf()
 
 	log_start
 	run_cmd_nsb nettest -6 -D -s &
-	sleep 1
+	wait_local_port_listen ${NSB} 12345 udp
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_IP6}
 	log_test $? 0 "Enslaved device client"
 
@@ -3491,13 +3500,13 @@ ipv6_udp_vrf()
 	a=${NSA_IP6}
 	log_start
 	run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
 
 	#log_start
 	run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
 
@@ -3505,13 +3514,13 @@ ipv6_udp_vrf()
 	a=${VRF_IP6}
 	log_start
 	run_cmd nettest -6 -D -s -3 ${VRF} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
 
 	log_start
 	run_cmd nettest -6 -D -I ${VRF} -s -3 ${VRF} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
 
@@ -3527,25 +3536,25 @@ ipv6_udp_vrf()
 	a=${NSA_IP6}
 	log_start
 	run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "Global server, device client, local conn"
 
 	log_start
 	run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "VRF server, device client, local conn"
 
 	log_start
 	run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${VRF} -r ${a}
 	log_test_addr ${a} $? 0 "Device server, VRF client, local conn"
 
 	log_start
 	run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
 	log_test_addr ${a} $? 0 "Device server, device client, local conn"
 
@@ -3557,7 +3566,7 @@ ipv6_udp_vrf()
 	# link local addresses
 	log_start
 	run_cmd nettest -6 -D -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd_nsb nettest -6 -D -d ${NSB_DEV} -r ${NSA_LINKIP6}
 	log_test $? 0 "Global server, linklocal IP"
 
@@ -3568,7 +3577,7 @@ ipv6_udp_vrf()
 
 	log_start
 	run_cmd_nsb nettest -6 -D -s &
-	sleep 1
+	wait_local_port_listen ${NSB} 12345 udp
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_LINKIP6}
 	log_test $? 0 "Enslaved device client, linklocal IP"
 
@@ -3579,7 +3588,7 @@ ipv6_udp_vrf()
 
 	log_start
 	run_cmd nettest -6 -D -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSA_LINKIP6}
 	log_test $? 0 "Enslaved device client, local conn - linklocal IP"
 
@@ -3592,7 +3601,7 @@ ipv6_udp_vrf()
 	run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV}
 	log_start
 	run_cmd nettest -6 -s -D &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 udp
 	run_cmd_nsb nettest -6 -D -r ${NSA_IP6}
 	log_test $? 0 "UDP in - LLA to GUA"
 
@@ -3667,7 +3676,7 @@ ipv6_addr_bind_novrf()
 	# when it really should not
 	a=${NSA_LO_IP6}
 	log_start
-	show_hint "Tecnically should fail since address is not on device but kernel allows"
+	show_hint "Technically should fail since address is not on device but kernel allows"
 	run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
 	log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address"
 }
@@ -3724,7 +3733,7 @@ ipv6_addr_bind_vrf()
 	# passes when it really should not
 	a=${VRF_IP6}
 	log_start
-	show_hint "Tecnically should fail since address is not on device but kernel allows"
+	show_hint "Technically should fail since address is not on device but kernel allows"
 	run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
 	log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind"
 
@@ -3771,7 +3780,7 @@ ipv6_rt()
 	do
 		log_start
 		run_cmd nettest ${varg} -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest ${varg} -r ${a} &
 		sleep 3
 		run_cmd ip link del ${VRF}
@@ -3785,7 +3794,7 @@ ipv6_rt()
 	do
 		log_start
 		run_cmd nettest ${varg} -I ${VRF} -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest ${varg} -r ${a} &
 		sleep 3
 		run_cmd ip link del ${VRF}
@@ -3799,7 +3808,7 @@ ipv6_rt()
 	do
 		log_start
 		run_cmd nettest ${varg} -I ${NSA_DEV} -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest ${varg} -r ${a} &
 		sleep 3
 		run_cmd ip link del ${VRF}
@@ -3814,7 +3823,7 @@ ipv6_rt()
 	#
 	log_start
 	run_cmd_nsb nettest ${varg} -s &
-	sleep 1
+	wait_local_port_listen ${NSB} 12345 tcp
 	run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP6} &
 	sleep 3
 	run_cmd ip link del ${VRF}
@@ -3825,7 +3834,7 @@ ipv6_rt()
 
 	log_start
 	run_cmd_nsb nettest ${varg} -s &
-	sleep 1
+	wait_local_port_listen ${NSB} 12345 tcp
 	run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP6} &
 	sleep 3
 	run_cmd ip link del ${VRF}
@@ -3842,7 +3851,7 @@ ipv6_rt()
 	do
 		log_start
 		run_cmd nettest ${varg} -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest ${varg} -d ${VRF} -r ${a} &
 		sleep 3
 		run_cmd ip link del ${VRF}
@@ -3856,7 +3865,7 @@ ipv6_rt()
 	do
 		log_start
 		run_cmd nettest ${varg} -I ${VRF} -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd nettest ${varg} -d ${VRF} -r ${a} &
 		sleep 3
 		run_cmd ip link del ${VRF}
@@ -3869,7 +3878,7 @@ ipv6_rt()
 	a=${NSA_IP6}
 	log_start
 	run_cmd nettest ${varg} -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
 	sleep 3
 	run_cmd ip link del ${VRF}
@@ -3880,7 +3889,7 @@ ipv6_rt()
 
 	log_start
 	run_cmd nettest ${varg} -I ${VRF} -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
 	sleep 3
 	run_cmd ip link del ${VRF}
@@ -3891,7 +3900,7 @@ ipv6_rt()
 
 	log_start
 	run_cmd nettest ${varg} -I ${NSA_DEV} -s &
-	sleep 1
+	wait_local_port_listen ${NSA} 12345 tcp
 	run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
 	sleep 3
 	run_cmd ip link del ${VRF}
@@ -3950,7 +3959,7 @@ netfilter_tcp_reset()
 	do
 		log_start
 		run_cmd nettest -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -r ${a}
 		log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx"
 	done
@@ -3968,7 +3977,7 @@ netfilter_icmp()
 	do
 		log_start
 		run_cmd nettest ${arg} -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest ${arg} -r ${a}
 		log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach"
 	done
@@ -4007,7 +4016,7 @@ netfilter_tcp6_reset()
 	do
 		log_start
 		run_cmd nettest -6 -s &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -6 -r ${a}
 		log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx"
 	done
@@ -4025,7 +4034,7 @@ netfilter_icmp6()
 	do
 		log_start
 		run_cmd nettest -6 -s ${arg} &
-		sleep 1
+		wait_local_port_listen ${NSA} 12345 tcp
 		run_cmd_nsb nettest -6 ${arg} -r ${a}
 		log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach"
 	done
@@ -4221,12 +4230,12 @@ use_case_snat_on_vrf()
 	run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
 
 	run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} &
-	sleep 1
+	wait_local_port_listen ${NSB} ${port} tcp
 	run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port}
 	log_test $? 0 "IPv4 TCP connection over VRF with SNAT"
 
 	run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} &
-	sleep 1
+	wait_local_port_listen ${NSB} ${port} tcp
 	run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port}
 	log_test $? 0 "IPv6 TCP connection over VRF with SNAT"
 
@@ -4272,6 +4281,7 @@ EOF
 TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter"
 TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter"
 TESTS_OTHER="use_cases"
+# note: each TEST_ group needs a dedicated runner, e.g. fcnal-ipv4.sh
 
 PAUSE_ON_FAIL=no
 PAUSE=no
@@ -4302,6 +4312,8 @@ elif [ "$TESTS" = "ipv4" ]; then
 	TESTS="$TESTS_IPV4"
 elif [ "$TESTS" = "ipv6" ]; then
 	TESTS="$TESTS_IPV6"
+elif [ "$TESTS" = "other" ]; then
+	TESTS="$TESTS_OTHER"
 fi
 
 check_gen_prog "nettest"
diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh
index d5e3abb8658c..9931a1e36e3d 100755
--- a/tools/testing/selftests/net/fdb_flush.sh
+++ b/tools/testing/selftests/net/fdb_flush.sh
@@ -583,7 +583,7 @@ vxlan_test_flush_by_remote_attributes()
 	$IP link del dev vx10
 	$IP link add name vx10 type vxlan dstport "$VXPORT" external
 
-	# For multicat FDB entries, the VXLAN driver stores a linked list of
+	# For multicast FDB entries, the VXLAN driver stores a linked list of
 	# remotes for a given key. Verify that only the expected remotes are
 	# flushed.
 	multicast_fdb_entries_add
diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh
index c159230c9b62..0b8a2465dd04 100755
--- a/tools/testing/selftests/net/fdb_notify.sh
+++ b/tools/testing/selftests/net/fdb_notify.sh
@@ -40,16 +40,16 @@ do_test_dup()
 
 test_dup_bridge()
 {
-	ip_link_add br up type bridge vlan_filtering 1
+	adf_ip_link_add br up type bridge vlan_filtering 1
 	do_test_dup add "bridge" dev br self
 	do_test_dup del "bridge" dev br self
 }
 
 test_dup_vxlan_self()
 {
-	ip_link_add br up type bridge vlan_filtering 1
-	ip_link_add vx up type vxlan id 2000 dstport 4789
-	ip_link_set_master vx br
+	adf_ip_link_add br up type bridge vlan_filtering 1
+	adf_ip_link_add vx up type vxlan id 2000 dstport 4789
+	adf_ip_link_set_master vx br
 
 	do_test_dup add "vxlan" dev vx self dst 192.0.2.1
 	do_test_dup del "vxlan" dev vx self dst 192.0.2.1
@@ -57,9 +57,9 @@ test_dup_vxlan_self()
 
 test_dup_vxlan_master()
 {
-	ip_link_add br up type bridge vlan_filtering 1
-	ip_link_add vx up type vxlan id 2000 dstport 4789
-	ip_link_set_master vx br
+	adf_ip_link_add br up type bridge vlan_filtering 1
+	adf_ip_link_add vx up type vxlan id 2000 dstport 4789
+	adf_ip_link_set_master vx br
 
 	do_test_dup add "vxlan master" dev vx master
 	do_test_dup del "vxlan master" dev vx master
@@ -67,8 +67,8 @@ test_dup_vxlan_master()
 
 test_dup_macvlan_self()
 {
-	ip_link_add dd up type dummy
-	ip_link_add mv up link dd type macvlan mode passthru
+	adf_ip_link_add dd up type dummy
+	adf_ip_link_add mv up link dd type macvlan mode passthru
 
 	do_test_dup add "macvlan self" dev mv self
 	do_test_dup del "macvlan self" dev mv self
@@ -76,10 +76,10 @@ test_dup_macvlan_self()
 
 test_dup_macvlan_master()
 {
-	ip_link_add br up type bridge vlan_filtering 1
-	ip_link_add dd up type dummy
-	ip_link_add mv up link dd type macvlan mode passthru
-	ip_link_set_master mv br
+	adf_ip_link_add br up type bridge vlan_filtering 1
+	adf_ip_link_add dd up type dummy
+	adf_ip_link_add mv up link dd type macvlan mode passthru
+	adf_ip_link_set_master mv br
 
 	do_test_dup add "macvlan master" dev mv self
 	do_test_dup del "macvlan master" dev mv self
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index ec2d6ceb1f08..e0d45292a298 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -72,7 +72,8 @@ declare -A TEST_NET4IN6IN6
 TEST_NET4IN6[1]=10.1.1.254
 TEST_NET4IN6[2]=10.2.1.254
 
-# mcast address
+# mcast addresses
+MCAST4=233.252.0.1
 MCAST6=ff02::1
 
 VRF=lisa
@@ -120,7 +121,7 @@ log_subsection()
 
 run_cmd()
 {
-	local cmd="$*"
+	local cmd="$1"
 	local out
 	local rc
 
@@ -145,7 +146,7 @@ get_linklocal()
 	local pfx
 	local addr
 
-	addr=$(${pfx} ip -6 -br addr show dev ${dev} | \
+	addr=$(${pfx} ${IP} -6 -br addr show dev ${dev} | \
 	awk '{
 		for (i = 3; i <= NF; ++i) {
 			if ($i ~ /^fe80/)
@@ -173,58 +174,48 @@ setup()
 
 	set -e
 
-	# create namespace
-	setup_ns PEER_NS
+	# create namespaces
+	setup_ns ns1
+	IP="ip -netns $ns1"
+	setup_ns ns2
 
 	# add vrf table
-	ip li add ${VRF} type vrf table ${VRF_TABLE}
-	ip li set ${VRF} up
-	ip ro add table ${VRF_TABLE} unreachable default metric 8192
-	ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192
+	${IP} li add ${VRF} type vrf table ${VRF_TABLE}
+	${IP} li set ${VRF} up
+	${IP} ro add table ${VRF_TABLE} unreachable default metric 8192
+	${IP} -6 ro add table ${VRF_TABLE} unreachable default metric 8192
 
 	# create test interfaces
-	ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
-	ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]}
-	ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]}
-	ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]}
+	${IP} li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
+	${IP} li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]}
+	${IP} li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]}
+	${IP} li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]}
 
 	# enslave vrf interfaces
 	for n in 5 7; do
-		ip li set ${NETIFS[p${n}]} vrf ${VRF}
+		${IP} li set ${NETIFS[p${n}]} vrf ${VRF}
 	done
 
 	# add addresses
 	for n in 1 3 5 7; do
-		ip li set ${NETIFS[p${n}]} up
-		ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
-		ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
+		${IP} li set ${NETIFS[p${n}]} up
+		${IP} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+		${IP} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
 	done
 
 	# move peer interfaces to namespace and add addresses
 	for n in 2 4 6 8; do
-		ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
-		ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
-		ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
+		${IP} li set ${NETIFS[p${n}]} netns ${ns2} up
+		ip -netns $ns2 addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+		ip -netns $ns2 addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
 	done
 
-	ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64}
-	ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64}
+	${IP} -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64}
+	${IP} -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64}
 
 	set +e
 }
 
-cleanup()
-{
-	# make sure we start from a clean slate
-	cleanup_ns ${PEER_NS} 2>/dev/null
-	for n in 1 3 5 7; do
-		ip link del ${NETIFS[p${n}]} 2>/dev/null
-	done
-	ip link del ${VRF} 2>/dev/null
-	ip ro flush table ${VRF_TABLE}
-	ip -6 ro flush table ${VRF_TABLE}
-}
-
 ################################################################################
 # IPv4 tests
 #
@@ -241,7 +232,7 @@ run_ip()
 	# dev arg may be empty
 	[ -n "${dev}" ] && dev="dev ${dev}"
 
-	run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink
+	run_cmd "${IP} ro add table ${table} ${prefix}/32 via ${gw} ${dev} onlink"
 	log_test $? ${exp_rc} "${desc}"
 }
 
@@ -257,8 +248,8 @@ run_ip_mpath()
 	# dev arg may be empty
 	[ -n "${dev}" ] && dev="dev ${dev}"
 
-	run_cmd ip ro add table "${table}" "${prefix}"/32 \
-		nexthop via ${nh1} nexthop via ${nh2}
+	run_cmd "${IP} ro add table ${table} ${prefix}/32 \
+		nexthop via ${nh1} nexthop via ${nh2}"
 	log_test $? ${exp_rc} "${desc}"
 }
 
@@ -270,11 +261,15 @@ valid_onlink_ipv4()
 
 	run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected"
 	run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive"
+	run_ip 254 ${TEST_NET4[1]}.9 ${CONGW[1]} ${NETIFS[p3]} 0 \
+		"nexthop device mismatch"
 
 	log_subsection "VRF ${VRF}"
 
 	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
 	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.10 ${CONGW[3]} ${NETIFS[p7]} 0 \
+		"nexthop device mismatch"
 
 	log_subsection "VRF device, PBR table"
 
@@ -310,17 +305,15 @@ invalid_onlink_ipv4()
 {
 	run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \
 		"Invalid gw - local unicast address"
+	run_ip 254 ${TEST_NET4[1]}.12 ${MCAST4} ${NETIFS[p1]} 2 \
+		"Invalid gw - multicast address"
 
 	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \
 		"Invalid gw - local unicast address, VRF"
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.12 ${MCAST4} ${NETIFS[p5]} 2 \
+		"Invalid gw - multicast address, VRF"
 
 	run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given"
-
-	run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \
-		"Gateway resolves to wrong nexthop device"
-
-	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \
-		"Gateway resolves to wrong nexthop device - VRF"
 }
 
 ################################################################################
@@ -339,7 +332,7 @@ run_ip6()
 	# dev arg may be empty
 	[ -n "${dev}" ] && dev="dev ${dev}"
 
-	run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink
+	run_cmd "${IP} -6 ro add table ${table} ${prefix}/128 via ${gw} ${dev} onlink"
 	log_test $? ${exp_rc} "${desc}"
 }
 
@@ -353,8 +346,8 @@ run_ip6_mpath()
 	local exp_rc="$6"
 	local desc="$7"
 
-	run_cmd ip -6 ro add table "${table}" "${prefix}"/128 "${opts}" \
-		nexthop via ${nh1} nexthop via ${nh2}
+	run_cmd "${IP} -6 ro add table ${table} ${prefix}/128 ${opts} \
+		nexthop via ${nh1} nexthop via ${nh2}"
 	log_test $? ${exp_rc} "${desc}"
 }
 
@@ -367,12 +360,16 @@ valid_onlink_ipv6()
 	run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected"
 	run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive"
 	run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped"
+	run_ip6 254 ${TEST_NET6[1]}::a ${V6ADDRS[p1]/::*}::64 ${NETIFS[p3]} 0 \
+		"nexthop device mismatch"
 
 	log_subsection "VRF ${VRF}"
 
 	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
 	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
 	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::b ${V6ADDRS[p5]/::*}::64 \
+		${NETIFS[p7]} 0 "nexthop device mismatch"
 
 	log_subsection "VRF device, PBR table"
 
@@ -438,13 +435,6 @@ invalid_onlink_ipv6()
 
 	run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \
 		"No nexthop device given"
-
-	# default VRF validation is done against LOCAL table
-	# run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \
-	#	"Gateway resolves to wrong nexthop device"
-
-	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \
-		"Gateway resolves to wrong nexthop device - VRF"
 }
 
 run_onlink_tests()
@@ -491,10 +481,9 @@ do
 	esac
 done
 
-cleanup
 setup
 run_onlink_tests
-cleanup
+cleanup_ns ${ns1} ${ns2}
 
 if [ "$TESTS" != "none" ]; then
 	printf "\nTests passed: %3d\n" ${nsuccess}
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 77c83d9508d3..6eb7f95e70e1 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -76,11 +76,13 @@ log_test()
 		printf "TEST: %-60s  [ OK ]\n" "${msg}"
 		nsuccess=$((nsuccess+1))
 	else
-		ret=1
-		nfail=$((nfail+1))
 		if [[ $rc -eq $ksft_skip ]]; then
+			[[ $ret -eq 0 ]] && ret=$ksft_skip
+			nskip=$((nskip+1))
 			printf "TEST: %-60s  [SKIP]\n" "${msg}"
 		else
+			ret=1
+			nfail=$((nfail+1))
 			printf "TEST: %-60s  [FAIL]\n" "${msg}"
 		fi
 
@@ -465,8 +467,8 @@ ipv6_fdb_grp_fcnal()
 	log_test $? 0 "Get Fdb nexthop group by id"
 
 	# fdb nexthop group can only contain fdb nexthops
-	run_cmd "$IP nexthop add id 63 via 2001:db8:91::4"
-	run_cmd "$IP nexthop add id 64 via 2001:db8:91::5"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::4 dev veth1"
+	run_cmd "$IP nexthop add id 64 via 2001:db8:91::5 dev veth1"
 	run_cmd "$IP nexthop add id 103 group 63/64 fdb"
 	log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
 
@@ -492,6 +494,26 @@ ipv6_fdb_grp_fcnal()
 	run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb"
 	log_test $? 2 "Fdb Nexthop with encap"
 
+	# Replace FDB nexthop to non-FDB and vice versa
+	run_cmd "$IP nexthop add id 70 via 2001:db8:91::2 fdb"
+	run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1"
+	log_test $? 0 "Replace FDB nexthop to non-FDB nexthop"
+	run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 fdb"
+	log_test $? 0 "Replace non-FDB nexthop to FDB nexthop"
+
+	# Replace FDB nexthop address while in a group
+	run_cmd "$IP nexthop add id 71 group 70 fdb"
+	run_cmd "$IP nexthop replace id 70 via 2001:db8:91::3 fdb"
+	log_test $? 0 "Replace FDB nexthop address while in a group"
+
+	# Cannot replace FDB nexthop to non-FDB and vice versa while in a group
+	run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1"
+	log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group"
+	run_cmd "$IP nexthop add id 72 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 73 group 72"
+	run_cmd "$IP nexthop replace id 72 via 2001:db8:91::2 fdb"
+	log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group"
+
 	run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
 	run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
 	log_test $? 0 "Fdb mac add with nexthop group"
@@ -545,15 +567,15 @@ ipv4_fdb_grp_fcnal()
 	log_test $? 0 "Get Fdb nexthop group by id"
 
 	# fdb nexthop group can only contain fdb nexthops
-	run_cmd "$IP nexthop add id 14 via 172.16.1.2"
-	run_cmd "$IP nexthop add id 15 via 172.16.1.3"
+	run_cmd "$IP nexthop add id 14 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 15 via 172.16.1.3 dev veth1"
 	run_cmd "$IP nexthop add id 103 group 14/15 fdb"
 	log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
 
 	# Non fdb nexthop group can not contain fdb nexthops
 	run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb"
 	run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb"
-	run_cmd "$IP nexthop add id 104 group 14/15"
+	run_cmd "$IP nexthop add id 104 group 16/17"
 	log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops"
 
 	# fdb nexthop cannot have blackhole
@@ -572,6 +594,26 @@ ipv4_fdb_grp_fcnal()
 	run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb"
 	log_test $? 2 "Fdb Nexthop with encap"
 
+	# Replace FDB nexthop to non-FDB and vice versa
+	run_cmd "$IP nexthop add id 18 via 172.16.1.2 fdb"
+	run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1"
+	log_test $? 0 "Replace FDB nexthop to non-FDB nexthop"
+	run_cmd "$IP nexthop replace id 18 via 172.16.1.2 fdb"
+	log_test $? 0 "Replace non-FDB nexthop to FDB nexthop"
+
+	# Replace FDB nexthop address while in a group
+	run_cmd "$IP nexthop add id 19 group 18 fdb"
+	run_cmd "$IP nexthop replace id 18 via 172.16.1.3 fdb"
+	log_test $? 0 "Replace FDB nexthop address while in a group"
+
+	# Cannot replace FDB nexthop to non-FDB and vice versa while in a group
+	run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1"
+	log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group"
+	run_cmd "$IP nexthop add id 20 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 21 group 20"
+	run_cmd "$IP nexthop replace id 20 via 172.16.1.2 fdb"
+	log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group"
+
 	run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
 	run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
 	log_test $? 0 "Fdb mac add with nexthop group"
@@ -580,7 +622,7 @@ ipv4_fdb_grp_fcnal()
 	run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self"
 	log_test $? 255 "Fdb mac add with nexthop"
 
-	run_cmd "$IP ro add 172.16.0.0/22 nhid 15"
+	run_cmd "$IP ro add 172.16.0.0/22 nhid 16"
 	log_test $? 2 "Route add with fdb nexthop"
 
 	run_cmd "$IP ro add 172.16.0.0/22 nhid 103"
@@ -741,7 +783,7 @@ ipv6_fcnal()
 	run_cmd "$IP nexthop add id 52 via 2001:db8:92::3"
 	log_test $? 2 "Create nexthop - gw only"
 
-	# gw is not reachable throught given dev
+	# gw is not reachable through given dev
 	run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1"
 	log_test $? 2 "Create nexthop - invalid gw+dev combination"
 
@@ -758,6 +800,14 @@ ipv6_fcnal()
 	set +e
 	check_nexthop "dev veth1" ""
 	log_test $? 0 "Nexthops removed on admin down"
+
+	# error routes should be deleted when their nexthop is deleted
+	run_cmd "$IP li set dev veth1 up"
+	run_cmd "$IP -6 nexthop add id 58 dev veth1"
+	run_cmd "$IP ro add blackhole 2001:db8:101::1/128 nhid 58"
+	run_cmd "$IP nexthop del id 58"
+	check_route6 "2001:db8:101::1" ""
+	log_test $? 0 "Error route removed on nexthop deletion"
 }
 
 ipv6_grp_refs()
@@ -1417,6 +1467,13 @@ ipv4_fcnal()
 
 	run_cmd "$IP ro del 172.16.102.0/24"
 	log_test $? 0 "Delete route when not specifying nexthop attributes"
+
+	# error routes should be deleted when their nexthop is deleted
+	run_cmd "$IP nexthop add id 23 dev veth1"
+	run_cmd "$IP ro add blackhole 172.16.102.100/32 nhid 23"
+	run_cmd "$IP nexthop del id 23"
+	check_route "172.16.102.100" ""
+	log_test $? 0 "Error route removed on nexthop deletion"
 }
 
 ipv4_grp_fcnal()
@@ -1615,6 +1672,17 @@ ipv4_withv6_fcnal()
 
 	run_cmd "$IP ro replace 172.16.101.1/32 via inet6 2001:db8:50::1 dev veth1"
 	log_test $? 2 "IPv4 route with invalid IPv6 gateway"
+
+	# Test IPv4 route with loopback IPv6 nexthop
+	# Regression test: loopback IPv6 nexthop was misclassified as reject
+	# route, skipping nhc_pcpu_rth_output allocation, causing panic when
+	# an IPv4 route references it and triggers __mkroute_output().
+	run_cmd "$IP -6 nexthop add id 20 dev lo"
+	run_cmd "$IP ro add 172.20.20.0/24 nhid 20"
+	run_cmd "ip netns exec $me ping -c1 -W1 172.20.20.1"
+	log_test $? 1 "IPv4 route with loopback IPv6 nexthop (no crash)"
+	run_cmd "$IP ro del 172.20.20.0/24"
+	run_cmd "$IP nexthop del id 20"
 }
 
 ipv4_fcnal_runtime()
@@ -2528,6 +2596,7 @@ done
 if [ "$TESTS" != "none" ]; then
 	printf "\nTests passed: %3d\n" ${nsuccess}
 	printf "Tests failed: %3d\n"   ${nfail}
+	printf "Tests skipped: %2d\n"  ${nskip}
 fi
 
 exit $ret
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 847936363a12..5fbdd2a0b537 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -256,6 +256,24 @@ fib_rule6_test()
 		fib_rule6_test_match_n_redirect "$match" "$match" \
 			"$getnomatch" "sport and dport redirect to table" \
 			"sport and dport no redirect to table"
+
+		match="sport 100-200 dport 300-400"
+		getmatch="sport 100 dport 400"
+		getnomatch="sport 100 dport 401"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" \
+			"sport and dport range redirect to table" \
+			"sport and dport range no redirect to table"
+	fi
+
+	ip rule help 2>&1 | grep sport | grep -q MASK
+	if [ $? -eq 0 ]; then
+		match="sport 0x0f00/0xff00 dport 0x000f/0x00ff"
+		getmatch="sport 0x0f11 dport 0x220f"
+		getnomatch="sport 0x1f11 dport 0x221f"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "sport and dport masked redirect to table" \
+			"sport and dport masked no redirect to table"
 	fi
 
 	fib_check_iproute_support "ipproto" "ipproto"
@@ -292,6 +310,25 @@ fib_rule6_test()
 			"iif dscp no redirect to table"
 	fi
 
+	ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
+	if [ $? -eq 0 ]; then
+		match="dscp 0x0f/0x0f"
+		tosmatch=$(printf 0x"%x" $((0x1f << 2)))
+		tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
+		getmatch="tos $tosmatch"
+		getnomatch="tos $tosnomatch"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "dscp masked redirect to table" \
+			"dscp masked no redirect to table"
+
+		match="dscp 0x0f/0x0f"
+		getmatch="from $SRC_IP6 iif $DEV tos $tosmatch"
+		getnomatch="from $SRC_IP6 iif $DEV tos $tosnomatch"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "iif dscp masked redirect to table" \
+			"iif dscp masked no redirect to table"
+	fi
+
 	fib_check_iproute_support "flowlabel" "flowlabel"
 	if [ $? -eq 0 ]; then
 		match="flowlabel 0xfffff"
@@ -322,6 +359,23 @@ fib_rule6_test()
 			"$getnomatch" "iif flowlabel masked redirect to table" \
 			"iif flowlabel masked no redirect to table"
 	fi
+
+	$IP link show dev $DEV | grep -q vrf0
+	if [ $? -eq 0 ]; then
+		match="oif vrf0"
+		getmatch="oif $DEV"
+		getnomatch="oif lo"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "VRF oif redirect to table" \
+			"VRF oif no redirect to table"
+
+		match="from $SRC_IP6 iif vrf0"
+		getmatch="from $SRC_IP6 iif $DEV"
+		getnomatch="from $SRC_IP6 iif lo"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "VRF iif redirect to table" \
+			"VRF iif no redirect to table"
+	fi
 }
 
 fib_rule6_vrf_test()
@@ -462,10 +516,7 @@ fib_rule4_test()
 	fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
 		"oif redirect to table" "oif no redirect to table"
 
-	# Enable forwarding and disable rp_filter as all the addresses are in
-	# the same subnet and egress device == ingress device.
 	ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1
-	ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
 	match="from $SRC_IP iif $DEV"
 	getnomatch="from $SRC_IP iif lo"
 	fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
@@ -525,6 +576,24 @@ fib_rule4_test()
 		fib_rule4_test_match_n_redirect "$match" "$match" \
 			"$getnomatch" "sport and dport redirect to table" \
 			"sport and dport no redirect to table"
+
+		match="sport 100-200 dport 300-400"
+		getmatch="sport 100 dport 400"
+		getnomatch="sport 100 dport 401"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" \
+			"sport and dport range redirect to table" \
+			"sport and dport range no redirect to table"
+	fi
+
+	ip rule help 2>&1 | grep sport | grep -q MASK
+	if [ $? -eq 0 ]; then
+		match="sport 0x0f00/0xff00 dport 0x000f/0x00ff"
+		getmatch="sport 0x0f11 dport 0x220f"
+		getnomatch="sport 0x1f11 dport 0x221f"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "sport and dport masked redirect to table" \
+			"sport and dport masked no redirect to table"
 	fi
 
 	fib_check_iproute_support "ipproto" "ipproto"
@@ -561,6 +630,42 @@ fib_rule4_test()
 			"$getnomatch" "iif dscp redirect to table" \
 			"iif dscp no redirect to table"
 	fi
+
+	ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
+	if [ $? -eq 0 ]; then
+		match="dscp 0x0f/0x0f"
+		tosmatch=$(printf 0x"%x" $((0x1f << 2)))
+		tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
+		getmatch="tos $tosmatch"
+		getnomatch="tos $tosnomatch"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "dscp masked redirect to table" \
+			"dscp masked no redirect to table"
+
+		match="dscp 0x0f/0x0f"
+		getmatch="from $SRC_IP iif $DEV tos $tosmatch"
+		getnomatch="from $SRC_IP iif $DEV tos $tosnomatch"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "iif dscp masked redirect to table" \
+			"iif dscp masked no redirect to table"
+	fi
+
+	$IP link show dev $DEV | grep -q vrf0
+	if [ $? -eq 0 ]; then
+		match="oif vrf0"
+		getmatch="oif $DEV"
+		getnomatch="oif lo"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "VRF oif redirect to table" \
+			"VRF oif no redirect to table"
+
+		match="from $SRC_IP iif vrf0"
+		getmatch="from $SRC_IP iif $DEV"
+		getnomatch="from $SRC_IP iif lo"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+			"$getnomatch" "VRF iif redirect to table" \
+			"VRF iif no redirect to table"
+	fi
 }
 
 fib_rule4_vrf_test()
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 3ea6f886a210..af64f93bb2e1 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -11,7 +11,8 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
        ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
        ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
        ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
-       ipv4_mpath_list ipv6_mpath_list"
+       ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance \
+       ipv4_mpath_balance_preferred fib6_ra_to_static"
 
 VERBOSE=0
 PAUSE_ON_FAIL=no
@@ -544,7 +545,7 @@ fib4_nexthop()
 fib6_nexthop()
 {
 	local lldummy=$(get_linklocal dummy0)
-	local llv1=$(get_linklocal dummy0)
+	local llv1=$(get_linklocal veth1)
 
 	if [ -z "$lldummy" ]; then
 		echo "Failed to get linklocal address for dummy0"
@@ -867,6 +868,64 @@ fib6_gc_test()
 	check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l)
 	log_test $ret 0 "ipv6 route garbage collection (replace with permanent)"
 
+	# Delete dummy_10 and remove all routes
+	$IP link del dev dummy_10
+
+	# rd6 is required for the next test. (ipv6toolkit)
+	if [ ! -x "$(command -v rd6)" ]; then
+	    echo "SKIP: rd6 not found."
+	    set +e
+	    cleanup &> /dev/null
+	    return
+	fi
+
+	setup_ns ns2
+	$IP link add veth1 type veth peer veth2 netns $ns2
+	$IP link set veth1 up
+	ip -netns $ns2 link set veth2 up
+	$IP addr add fe80:dead::1/64 dev veth1
+	ip -netns $ns2 addr add fe80:dead::2/64 dev veth2
+
+	# Add NTF_ROUTER neighbour to prevent rt6_age_examine_exception()
+	# from removing not-yet-expired exceptions.
+	ip -netns $ns2 link set veth2 address 00:11:22:33:44:55
+	$IP neigh add fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router
+
+	$NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_redirects=1
+	$NS_EXEC sysctl -wq net.ipv6.conf.veth1.forwarding=0
+
+	# Temporary routes
+	for i in $(seq 1 5); do
+	    # Expire route after $EXPIRE seconds
+	    $IP -6 route add 2001:10::$i \
+		via fe80:dead::2 dev veth1 expires $EXPIRE
+
+	    ip netns exec $ns2 rd6 -i veth2 \
+		-s fe80:dead::2 -d fe80:dead::1 \
+		-r 2001:10::$i -t fe80:dead::3 -p ICMP6
+	done
+
+	check_rt_num 5 $($IP -6 route list | grep expires | grep 2001:10:: | wc -l)
+
+	# Promote to permanent routes by "prepend" (w/o NLM_F_EXCL and NLM_F_REPLACE)
+	for i in $(seq 1 5); do
+	    # -EEXIST, but the temporary route becomes the permanent route.
+	    $IP -6 route append 2001:10::$i \
+		via fe80:dead::2 dev veth1 2>/dev/null || true
+	done
+
+	check_rt_num 5 $($IP -6 route list | grep -v expires | grep 2001:10:: | wc -l)
+	check_rt_num 5 $($IP -6 route list cache | grep 2001:10:: | wc -l)
+
+	# Trigger GC instead of waiting $GC_WAIT_TIME.
+	# rt6_nh_dump_exceptions() just skips expired exceptions.
+	$NS_EXEC sysctl -wq net.ipv6.route.flush=1
+	check_rt_num 0 $($IP -6 route list cache | grep 2001:10:: | wc -l)
+	log_test $ret 0 "ipv6 route garbage collection (promote to permanent routes)"
+
+	$IP neigh del fe80:dead::3 lladdr 00:11:22:33:44:55 dev veth1 router
+	$IP link del veth1
+
 	# ra6 is required for the next test. (ipv6toolkit)
 	if [ ! -x "$(command -v ra6)" ]; then
 	    echo "SKIP: ra6 not found."
@@ -875,9 +934,6 @@ fib6_gc_test()
 	    return
 	fi
 
-	# Delete dummy_10 and remove all routes
-	$IP link del dev dummy_10
-
 	# Create a pair of veth devices to send a RA message from one
 	# device to another.
 	$IP link add veth1 type veth peer name veth2
@@ -1085,6 +1141,35 @@ route_setup()
 	set +e
 }
 
+forwarding_cleanup()
+{
+	cleanup_ns $ns3
+
+	route_cleanup
+}
+
+# extend route_setup with an ns3 reachable through ns2 over both devices
+forwarding_setup()
+{
+	forwarding_cleanup
+
+	route_setup
+
+	setup_ns ns3
+
+	ip link add veth5 netns $ns3 type veth peer name veth6 netns $ns2
+	ip -netns $ns3 link set veth5 up
+	ip -netns $ns2 link set veth6 up
+
+	ip -netns $ns3 -4 addr add dev veth5 172.16.105.1/24
+	ip -netns $ns2 -4 addr add dev veth6 172.16.105.2/24
+	ip -netns $ns3 -4 route add 172.16.100.0/22 via 172.16.105.2
+
+	ip -netns $ns3 -6 addr add dev veth5 2001:db8:105::1/64 nodad
+	ip -netns $ns2 -6 addr add dev veth6 2001:db8:105::2/64 nodad
+	ip -netns $ns3 -6 route add 2001:db8:101::/33 via 2001:db8:105::2
+}
+
 # assumption is that basic add of a single path route works
 # otherwise just adding an address on an interface is broken
 ipv6_rt_add()
@@ -1447,6 +1532,85 @@ ipv6_route_metrics_test()
 	route_cleanup
 }
 
+fib6_ra_to_static()
+{
+	setup
+
+	echo
+	echo "Fib6 route promotion from RA-learned to static test"
+	set -e
+
+	# ra6 is required for the test. (ipv6toolkit)
+	if [ ! -x "$(command -v ra6)" ]; then
+	    echo "SKIP: ra6 not found."
+	    set +e
+	    cleanup &> /dev/null
+	    return
+	fi
+
+	# Create a pair of veth devices to send a RA message from one
+	# device to another.
+	$IP link add veth1 type veth peer name veth2
+	$IP link set dev veth1 up
+	$IP link set dev veth2 up
+	$IP -6 address add 2001:10::1/64 dev veth1 nodad
+	$IP -6 address add 2001:10::2/64 dev veth2 nodad
+
+	# Make veth1 ready to receive RA messages.
+	$NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2
+
+	# Send a RA message with a prefix from veth2.
+	$NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60
+
+	# Wait for the RA message.
+	sleep 1
+
+	# systemd may mess up the test. Make sure that
+	# systemd-networkd.service and systemd-networkd.socket are stopped.
+	check_rt_num_clean 2 $($IP -6 route list|grep expires|wc -l) || return
+
+	# Configure static address on the same prefix
+	$IP -6 address add 2001:12::dead/64 dev veth1 nodad
+
+	# On-link route won't expire anymore, default route still owned by RA
+	check_rt_num 1 $($IP -6 route list |grep expires|wc -l)
+
+	# Send a second RA message with a prefix from veth2.
+	$NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60
+	sleep 1
+
+	# Expire is not back, on-link route is still static
+	check_rt_num 1 $($IP -6 route list |grep expires|wc -l)
+
+	$IP -6 address del 2001:12::dead/64 dev veth1 nodad
+
+	# Expire is back, on-link route is now owned by RA again
+	check_rt_num 2 $($IP -6 route list |grep expires|wc -l)
+
+	log_test $ret 0 "ipv6 promote RA route to static"
+
+	# Prepare for RA route with gateway
+	$NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra_rt_info_max_plen=64
+
+	# Add initial route to cause ECMP merging
+	$IP -6 route add 2001:12::/64 via fe80::dead:beef dev veth1
+
+	$NS_EXEC ra6 -i veth2 -d 2001:10::1 -R 2001:12::/64#1#120
+
+	# Routes are not merged as RA routes are not elegible for ECMP
+	check_rt_num 2 "$($IP -6 route list | grep -c "2001:12::/64 via")"
+
+	$IP -6 route append 2001:12::/64 via fe80::dead:feeb dev veth1
+
+	check_rt_num 2 "$($IP -6 route list | grep -c "nexthop via")"
+
+	log_test "$ret" 0 "ipv6 RA route with nexthop do not merge into ECMP with static"
+
+	set +e
+
+	cleanup &> /dev/null
+}
+
 # add route for a prefix, flushing any existing routes first
 # expected to be the first step of a test
 add_route()
@@ -2531,9 +2695,6 @@ ipv4_mpath_list_test()
 	run_cmd "ip -n $ns2 route add 203.0.113.0/24
 		nexthop via 172.16.201.2 nexthop via 172.16.202.2"
 	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
-	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0"
-	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0"
-	run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0"
 	set +e
 
 	local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]')
@@ -2600,6 +2761,160 @@ ipv6_mpath_list_test()
 	route_cleanup
 }
 
+tc_set_flower_counter__saddr_syn() {
+	tc_set_flower_counter $1 $2 $3 "src_ip $4 ip_proto tcp tcp_flags 0x2"
+}
+
+ip_mpath_balance_dep_check()
+{
+	if [ ! -x "$(command -v socat)" ]; then
+		echo "socat command not found. Skipping test"
+		return 1
+	fi
+
+	if [ ! -x "$(command -v jq)" ]; then
+		echo "jq command not found. Skipping test"
+		return 1
+	fi
+}
+
+ip_mpath_balance() {
+	local -r ipver=$1
+	local -r daddr=$2
+	local -r num_conn=20
+
+	for i in $(seq 1 $num_conn); do
+		ip netns exec $ns3 socat $ipver TCP-LISTEN:8000 STDIO >/dev/null &
+		sleep 0.02
+		echo -n a | ip netns exec $ns1 socat $ipver STDIO TCP:$daddr:8000
+	done
+
+	local -r syn0="$(tc_get_flower_counter $ns1 veth1)"
+	local -r syn1="$(tc_get_flower_counter $ns1 veth3)"
+	local -r syns=$((syn0+syn1))
+
+	[ "$VERBOSE" = "1" ] && echo "multipath: syns seen: ($syn0,$syn1)"
+
+	[[ $syns -ge $num_conn ]] && [[ $syn0 -gt 0 ]] && [[ $syn1 -gt 0 ]]
+}
+
+ipv4_mpath_balance_test()
+{
+	echo
+	echo "IPv4 multipath load balance test"
+
+	ip_mpath_balance_dep_check || return 1
+	forwarding_setup
+
+	$IP route add 172.16.105.1 \
+		nexthop via 172.16.101.2 \
+		nexthop via 172.16.103.2
+
+	ip netns exec $ns1 \
+		sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+
+	tc_set_flower_counter__saddr_syn $ns1 4 veth1 172.16.101.1
+	tc_set_flower_counter__saddr_syn $ns1 4 veth3 172.16.103.1
+
+	ip_mpath_balance -4 172.16.105.1
+
+	log_test $? 0 "IPv4 multipath loadbalance"
+
+	forwarding_cleanup
+}
+
+get_route_dev_src()
+{
+	local pfx="$1"
+	local src="$2"
+	local out
+
+	if out=$($IP -j route get "$pfx" from "$src" | jq -re ".[0].dev"); then
+		echo "$out"
+	fi
+}
+
+ipv4_mpath_preferred()
+{
+	local src_ip=$1
+	local pref_dev=$2
+	local dev routes
+	local route0=0
+	local route1=0
+	local pref_route=0
+	num_routes=254
+
+	for i in $(seq 1 $num_routes) ; do
+		dev=$(get_route_dev_src 172.16.105.$i $src_ip)
+		if [ "$dev" = "$pref_dev" ]; then
+			pref_route=$((pref_route+1))
+		elif [ "$dev" = "veth1" ]; then
+			route0=$((route0+1))
+		elif [ "$dev" = "veth3" ]; then
+			route1=$((route1+1))
+		fi
+	done
+
+	routes=$((route0+route1))
+
+	[ "$VERBOSE" = "1" ] && echo "multipath: routes seen: ($route0,$route1,$pref_route)"
+
+	if [ x"$pref_dev" = x"" ]; then
+		[[ $routes -ge $num_routes ]] && [[ $route0 -gt 0 ]] && [[ $route1 -gt 0 ]]
+	else
+		[[ $pref_route -ge $num_routes ]]
+	fi
+
+}
+
+ipv4_mpath_balance_preferred_test()
+{
+	echo
+	echo "IPv4 multipath load balance preferred route"
+
+	forwarding_setup
+
+	$IP route add 172.16.105.0/24 \
+		nexthop via 172.16.101.2 \
+		nexthop via 172.16.103.2
+
+	ipv4_mpath_preferred 172.16.101.1 veth1
+	log_test $? 0 "IPv4 multipath loadbalance from veth1"
+
+	ipv4_mpath_preferred 172.16.103.1 veth3
+	log_test $? 0 "IPv4 multipath loadbalance from veth3"
+
+	ipv4_mpath_preferred 198.51.100.1
+	log_test $? 0 "IPv4 multipath loadbalance from dummy"
+
+	forwarding_cleanup
+}
+
+ipv6_mpath_balance_test()
+{
+	echo
+	echo "IPv6 multipath load balance test"
+
+	ip_mpath_balance_dep_check || return 1
+	forwarding_setup
+
+	$IP route add 2001:db8:105::1\
+		nexthop via 2001:db8:101::2 \
+		nexthop via 2001:db8:103::2
+
+	ip netns exec $ns1 \
+		sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+	tc_set_flower_counter__saddr_syn $ns1 6 veth1 2001:db8:101::1
+	tc_set_flower_counter__saddr_syn $ns1 6 veth3 2001:db8:103::1
+
+	ip_mpath_balance -6 "[2001:db8:105::1]"
+
+	log_test $? 0 "IPv6 multipath loadbalance"
+
+	forwarding_cleanup
+}
+
 ################################################################################
 # usage
 
@@ -2683,6 +2998,10 @@ do
 	fib6_gc_test|ipv6_gc)		fib6_gc_test;;
 	ipv4_mpath_list)		ipv4_mpath_list_test;;
 	ipv6_mpath_list)		ipv6_mpath_list_test;;
+	ipv4_mpath_balance)		ipv4_mpath_balance_test;;
+	ipv6_mpath_balance)		ipv6_mpath_balance_test;;
+	ipv4_mpath_balance_preferred)	ipv4_mpath_balance_preferred_test;;
+	fib6_ra_to_static)		fib6_ra_to_static;;
 
 	help) echo "Test names: $TESTS"; exit 0;;
 	esac
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
index 2dea317f12e7..418ff96c52ef 100644
--- a/tools/testing/selftests/net/forwarding/.gitignore
+++ b/tools/testing/selftests/net/forwarding/.gitignore
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 forwarding.config
+ipmr
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 00bde7b6f39e..bbaf4d937dd8 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0+ OR MIT
 
-TEST_PROGS = bridge_fdb_learning_limit.sh \
+TEST_PROGS := \
+	bridge_activity_notify.sh \
+	bridge_fdb_learning_limit.sh \
+	bridge_fdb_local_vlan_0.sh \
 	bridge_igmp.sh \
 	bridge_locked_port.sh \
 	bridge_mdb.sh \
@@ -18,64 +21,64 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
 	gre_custom_multipath_hash.sh \
 	gre_inner_v4_multipath.sh \
 	gre_inner_v6_multipath.sh \
-	gre_multipath_nh_res.sh \
-	gre_multipath_nh.sh \
 	gre_multipath.sh \
+	gre_multipath_nh.sh \
+	gre_multipath_nh_res.sh \
 	ip6_forward_instats_vrf.sh \
 	ip6gre_custom_multipath_hash.sh \
+	ip6gre_flat.sh \
 	ip6gre_flat_key.sh \
 	ip6gre_flat_keys.sh \
-	ip6gre_flat.sh \
+	ip6gre_hier.sh \
 	ip6gre_hier_key.sh \
 	ip6gre_hier_keys.sh \
-	ip6gre_hier.sh \
 	ip6gre_inner_v4_multipath.sh \
 	ip6gre_inner_v6_multipath.sh \
+	ipip_flat_gre.sh \
 	ipip_flat_gre_key.sh \
 	ipip_flat_gre_keys.sh \
-	ipip_flat_gre.sh \
+	ipip_hier_gre.sh \
 	ipip_hier_gre_key.sh \
 	ipip_hier_gre_keys.sh \
-	ipip_hier_gre.sh \
 	lib_sh_test.sh \
 	local_termination.sh \
 	min_max_mtu.sh \
+	mirror_gre.sh \
 	mirror_gre_bound.sh \
 	mirror_gre_bridge_1d.sh \
 	mirror_gre_bridge_1d_vlan.sh \
-	mirror_gre_bridge_1q_lag.sh \
 	mirror_gre_bridge_1q.sh \
+	mirror_gre_bridge_1q_lag.sh \
 	mirror_gre_changes.sh \
 	mirror_gre_flower.sh \
 	mirror_gre_lag_lacp.sh \
 	mirror_gre_neigh.sh \
 	mirror_gre_nh.sh \
-	mirror_gre.sh \
-	mirror_gre_vlan_bridge_1q.sh \
 	mirror_gre_vlan.sh \
+	mirror_gre_vlan_bridge_1q.sh \
 	mirror_vlan.sh \
 	no_forwarding.sh \
 	pedit_dsfield.sh \
 	pedit_ip.sh \
 	pedit_l4port.sh \
-	q_in_vni_ipv6.sh \
 	q_in_vni.sh \
+	q_in_vni_ipv6.sh \
+	router.sh \
 	router_bridge.sh \
 	router_bridge_1d.sh \
 	router_bridge_1d_lag.sh \
 	router_bridge_lag.sh \
+	router_bridge_pvid_vlan_upper.sh \
 	router_bridge_vlan.sh \
 	router_bridge_vlan_upper.sh \
-	router_bridge_pvid_vlan_upper.sh \
 	router_bridge_vlan_upper_pvid.sh \
 	router_broadcast.sh \
-	router_mpath_nh_res.sh \
 	router_mpath_nh.sh \
+	router_mpath_nh_res.sh \
 	router_mpath_seed.sh \
 	router_multicast.sh \
 	router_multipath.sh \
 	router_nh.sh \
-	router.sh \
 	router_vid_1.sh \
 	sch_ets.sh \
 	sch_red.sh \
@@ -85,31 +88,34 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
 	skbedit_priority.sh \
 	tc_actions.sh \
 	tc_chains.sh \
-	tc_flower_router.sh \
 	tc_flower.sh \
-	tc_flower_l2_miss.sh \
 	tc_flower_cfm.sh \
+	tc_flower_l2_miss.sh \
 	tc_flower_port_range.sh \
+	tc_flower_router.sh \
 	tc_mpls_l2vpn.sh \
 	tc_police.sh \
 	tc_shblocks.sh \
 	tc_tunnel_key.sh \
 	tc_vlan_modify.sh \
-	vxlan_asymmetric_ipv6.sh \
 	vxlan_asymmetric.sh \
+	vxlan_asymmetric_ipv6.sh \
+	vxlan_bridge_1d.sh \
 	vxlan_bridge_1d_ipv6.sh \
-	vxlan_bridge_1d_port_8472_ipv6.sh \
 	vxlan_bridge_1d_port_8472.sh \
-	vxlan_bridge_1d.sh \
+	vxlan_bridge_1d_port_8472_ipv6.sh \
+	vxlan_bridge_1q.sh \
 	vxlan_bridge_1q_ipv6.sh \
-	vxlan_bridge_1q_port_8472_ipv6.sh \
+	vxlan_bridge_1q_mc_ul.sh \
 	vxlan_bridge_1q_port_8472.sh \
-	vxlan_bridge_1q.sh \
+	vxlan_bridge_1q_port_8472_ipv6.sh \
 	vxlan_reserved.sh \
+	vxlan_symmetric.sh \
 	vxlan_symmetric_ipv6.sh \
-	vxlan_symmetric.sh
+# end of TEST_PROGS
 
-TEST_FILES := devlink_lib.sh \
+TEST_FILES := \
+	devlink_lib.sh \
 	fib_offload_lib.sh \
 	forwarding.config.sample \
 	ip6gre_lib.sh \
@@ -124,10 +130,16 @@ TEST_FILES := devlink_lib.sh \
 	sch_ets_tests.sh \
 	sch_tbf_core.sh \
 	sch_tbf_etsprio.sh \
-	tc_common.sh
+	tc_common.sh \
+# end of TEST_FILES
+
+TEST_GEN_PROGS := \
+	ipmr
+# end of TEST_GEN_PROGS
 
 TEST_INCLUDES := \
+	$(wildcard ../lib/sh/*.sh) \
 	../lib.sh \
-	$(wildcard ../lib/sh/*.sh)
+# end of TEST_INCLUDES
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
index a652429bfd53..392a5a91ed37 100644
--- a/tools/testing/selftests/net/forwarding/README
+++ b/tools/testing/selftests/net/forwarding/README
@@ -6,7 +6,7 @@ to easily create and test complex environments.
 
 Unfortunately, these namespaces can not be used with actual switching
 ASICs, as their ports can not be migrated to other network namespaces
-(dev->netns_local) and most of them probably do not support the
+(dev->netns_immutable) and most of them probably do not support the
 L1-separation provided by namespaces.
 
 However, a similar kind of flexibility can be achieved by using VRFs and
@@ -57,6 +57,21 @@ o Code shall be checked using ShellCheck [1] prior to submission.
 
 1. https://www.shellcheck.net/
 
+Cleanups
+--------
+
+o lib.sh brings in defer.sh (by way of ../lib.sh) by default. Consider
+  making use of the defer primitive to schedule automatic cleanups. This
+  makes it harder to forget to remove a temporary netdevice, kill a running
+  process or perform other cleanup when the test script is interrupted.
+
+o When adding a helper that dirties the environment, but schedules all
+  necessary cleanups through defer, consider prefixing it adf_ for
+  consistency with lib.sh and ../lib.sh helpers. This serves as an
+  immediately visible bit of documentation about the helper API.
+
+o Definitely do the above for any new code in lib.sh, if practical.
+
 Customization
 =============
 
diff --git a/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh
new file mode 100755
index 000000000000..522a5b1b046c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh
@@ -0,0 +1,170 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+                          +------------------------+
+# | H1 (vrf)              |                          | H2 (vrf)               |
+# | 192.0.2.1/28          |                          | 192.0.2.2/28           |
+# |    + $h1              |                          |    + $h2               |
+# +----|------------------+                          +----|-------------------+
+#      |                                                  |
+# +----|--------------------------------------------------|-------------------+
+# | SW |                                                  |                   |
+# | +--|--------------------------------------------------|-----------------+ |
+# | |  + $swp1                   BR1 (802.1d)             + $swp2           | |
+# | |                                                                       | |
+# | +-----------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	new_inactive_test
+	existing_active_test
+	norefresh_test
+"
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	adf_simple_if_init "$h1" 192.0.2.1/28
+}
+
+h2_create()
+{
+	adf_simple_if_init "$h2" 192.0.2.2/28
+}
+
+switch_create()
+{
+	adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 \
+		ageing_time "$LOW_AGEING_TIME"
+	adf_ip_link_set_up br1
+
+	adf_ip_link_set_master "$swp1" br1
+	adf_ip_link_set_up "$swp1"
+
+	adf_ip_link_set_master "$swp2" br1
+	adf_ip_link_set_up "$swp2"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	adf_vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+fdb_active_wait()
+{
+	local mac=$1; shift
+
+	bridge -d fdb get "$mac" br br1 | grep -q -v "inactive"
+}
+
+fdb_inactive_wait()
+{
+	local mac=$1; shift
+
+	bridge -d fdb get "$mac" br br1 | grep -q "inactive"
+}
+
+new_inactive_test()
+{
+	local mac="00:11:22:33:44:55"
+
+	# Add a new FDB entry as static and inactive and check that it
+	# becomes active upon traffic.
+	RET=0
+
+	bridge fdb add "$mac" dev "$swp1" master static activity_notify inactive
+	bridge -d fdb get "$mac" br br1 | grep -q "inactive"
+	check_err $? "FDB entry not present as \"inactive\" when should"
+
+	$MZ "$h1" -c 1 -p 64 -a "$mac" -b bcast -t ip -q
+
+	busywait "$BUSYWAIT_TIMEOUT" fdb_active_wait "$mac"
+	check_err $? "FDB entry present as \"inactive\" when should not"
+
+	log_test "Transition from inactive to active"
+
+	bridge fdb del "$mac" dev "$swp1" master
+}
+
+existing_active_test()
+{
+	local mac="00:11:22:33:44:55"
+	local ageing_time
+
+	# Enable activity notifications on an existing dynamic FDB entry and
+	# check that it becomes inactive after the ageing time passed.
+	RET=0
+
+	bridge fdb add "$mac" dev "$swp1" master dynamic
+	bridge fdb replace "$mac" dev "$swp1" master static activity_notify norefresh
+
+	bridge -d fdb get "$mac" br br1 | grep -q "activity_notify"
+	check_err $? "FDB entry not present as \"activity_notify\" when should"
+
+	bridge -d fdb get "$mac" br br1 | grep -q "inactive"
+	check_fail $? "FDB entry present as \"inactive\" when should not"
+
+	ageing_time=$(bridge_ageing_time_get br1)
+	slowwait $((ageing_time * 2)) fdb_inactive_wait "$mac"
+	check_err $? "FDB entry not present as \"inactive\" when should"
+
+	log_test "Transition from active to inactive"
+
+	bridge fdb del "$mac" dev "$swp1" master
+}
+
+norefresh_test()
+{
+	local mac="00:11:22:33:44:55"
+	local updated_time
+
+	# Check that the "updated" time is reset when replacing an FDB entry
+	# without the "norefresh" keyword and that it is not reset when
+	# replacing with the "norefresh" keyword.
+	RET=0
+
+	bridge fdb add "$mac" dev "$swp1" master static
+	sleep 1
+
+	bridge fdb replace "$mac" dev "$swp1" master static activity_notify
+	updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]')
+	if [[ $updated_time -ne 0 ]]; then
+		check_err 1 "\"updated\" time was not reset when should"
+	fi
+
+	sleep 1
+	bridge fdb replace "$mac" dev "$swp1" master static norefresh
+	updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]')
+	if [[ $updated_time -eq 0 ]]; then
+		check_err 1 "\"updated\" time was reset when should not"
+	fi
+
+	log_test "Resetting of \"updated\" time"
+
+	bridge fdb del "$mac" dev "$swp1" master
+}
+
+if ! bridge fdb help 2>&1 | grep -q "activity_notify"; then
+	echo "SKIP: iproute2 too old, missing bridge FDB activity notification control"
+	exit "$ksft_skip"
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh
new file mode 100755
index 000000000000..694de8ba97e4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh
@@ -0,0 +1,387 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +-----------------------+ +-----------------------+
+# | H1 (vrf)              | | H2 (vrf)              | | H3 (vrf)              |
+# |    + $h1              | |    + $h2              | |    + $h3              |
+# |    | 192.0.2.1/28     | |    | 192.0.2.2/28     | |    | 192.0.2.18/28    |
+# |    | 2001:db8:1::1/64 | |    | 2001:db8:1::2/64 | |    | 2001:db8:2::2/64 |
+# |    |                  | |    |                  | |    |                  |
+# +----|------------------+ +----|------------------+ +----|------------------+
+#      |                         |                         |
+# +----|-------------------------|-------------------------|------------------+
+# | +--|-------------------------|------------------+      |                  |
+# | |  + $swp1                   + $swp2            |      + $swp3            |
+# | |                                               |        192.0.2.17/28    |
+# | |  BR1 (802.1q)                                 |        2001:db8:2::1/64 |
+# | |  192.0.2.3/28                                 |                         |
+# | |  2001:db8:1::3/64                             |                         |
+# | +-----------------------------------------------+                      SW |
+# +---------------------------------------------------------------------------+
+#
+#shellcheck disable=SC2317 # SC doesn't see our uses of functions.
+#shellcheck disable=SC2034 # ... and global variables
+
+ALL_TESTS="
+	test_d_no_sharing
+	test_d_sharing
+	test_q_no_sharing
+	test_q_sharing
+	test_addr_set
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+pMAC=00:11:22:33:44:55
+bMAC=00:11:22:33:44:66
+mMAC=00:11:22:33:44:77
+xMAC=00:11:22:33:44:88
+
+host_create()
+{
+	local h=$1; shift
+	local ipv4=$1; shift
+	local ipv6=$1; shift
+
+	adf_simple_if_init "$h" "$ipv4" "$ipv6"
+	adf_ip_route_add vrf "v$h" 192.0.2.16/28 nexthop via 192.0.2.3
+	adf_ip_route_add vrf "v$h" 2001:db8:2::/64 nexthop via 2001:db8:1::3
+}
+
+h3_create()
+{
+	adf_simple_if_init "$h3" 192.0.2.18/28 2001:db8:2::2/64
+	adf_ip_route_add vrf "v$h3" 192.0.2.0/28 nexthop via 192.0.2.17
+	adf_ip_route_add vrf "v$h3" 2001:db8:1::/64 nexthop via 2001:db8:2::1
+
+	tc qdisc add dev "$h3" clsact
+	defer tc qdisc del dev "$h3" clsact
+
+	tc filter add dev "$h3" ingress proto ip pref 104 \
+	   flower skip_hw ip_proto udp dst_port 4096 \
+	   action pass
+	defer tc filter del dev "$h3" ingress proto ip pref 104
+
+	tc qdisc add dev "$h2" clsact
+	defer tc qdisc del dev "$h2" clsact
+
+	tc filter add dev "$h2" ingress proto ip pref 104 \
+	   flower skip_hw ip_proto udp dst_port 4096 \
+	   action pass
+	defer tc filter del dev "$h2" ingress proto ip pref 104
+}
+
+switch_create()
+{
+	adf_ip_link_set_up "$swp1"
+
+	adf_ip_link_set_up "$swp2"
+
+	adf_ip_addr_add "$swp3" 192.0.2.17/28
+	adf_ip_addr_add "$swp3" 2001:db8:2::1/64
+	adf_ip_link_set_up "$swp3"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	adf_vrf_prepare
+	adf_forwarding_enable
+
+	host_create "$h1" 192.0.2.1/28 2001:db8:1::1/64
+	host_create "$h2" 192.0.2.2/28 2001:db8:1::2/64
+	h3_create
+
+	switch_create
+}
+
+adf_bridge_configure()
+{
+	local dev
+
+	adf_ip_addr_add br 192.0.2.3/28
+	adf_ip_addr_add br 2001:db8:1::3/64
+
+	adf_bridge_vlan_add dev br vid 1 pvid untagged self
+	adf_bridge_vlan_add dev br vid 2 self
+	adf_bridge_vlan_add dev br vid 3 self
+
+	for dev in "$swp1" "$swp2"; do
+		adf_ip_link_set_master "$dev" br
+		adf_bridge_vlan_add dev "$dev" vid 1 pvid untagged
+		adf_bridge_vlan_add dev "$dev" vid 2
+		adf_bridge_vlan_add dev "$dev" vid 3
+	done
+}
+
+adf_bridge_create()
+{
+	local mac
+
+	adf_ip_link_add br up type bridge vlan_default_pvid 0 "$@"
+	mac=$(mac_get br)
+	adf_bridge_configure
+	adf_ip_link_set_addr br "$mac"
+}
+
+check_fdb_local_vlan_0_support()
+{
+	if adf_ip_link_add XXbr up type bridge vlan_filtering 1 \
+			fdb_local_vlan_0 1 &>/dev/null; then
+		return 0
+	fi
+
+	log_test_skip "FDB sharing" \
+		      "iproute 2 or the kernel do not support fdb_local_vlan_0"
+}
+
+check_mac_presence()
+{
+	local should_fail=$1; shift
+	local dev=$1; shift
+	local vlan=$1; shift
+	local mac
+
+	mac=$(mac_get "$dev")
+
+	if ((vlan == 0)); then
+		vlan=null
+	fi
+
+	bridge -j fdb show dev "$dev" |
+	    jq -e --arg mac "$mac" --argjson vlan "$vlan" \
+	       '.[] | select(.mac == $mac) | select(.vlan == $vlan)' > /dev/null
+	check_err_fail "$should_fail" $? "FDB dev $dev vid $vlan addr $mac exists"
+}
+
+do_sharing_test()
+{
+	local should_fail=$1; shift
+	local what=$1; shift
+	local dev
+
+	RET=0
+
+	for dev in "$swp1" "$swp2" br; do
+		check_mac_presence 0 "$dev" 0
+		check_mac_presence "$should_fail" "$dev" 1
+		check_mac_presence "$should_fail" "$dev" 2
+		check_mac_presence "$should_fail" "$dev" 3
+	done
+
+	log_test "$what"
+}
+
+do_end_to_end_test()
+{
+	local mac=$1; shift
+	local what=$1; shift
+	local probe_dev=${1-$h3}; shift
+	local expect=${1-10}; shift
+
+	local t0
+	local t1
+	local dd
+
+	RET=0
+
+	# In mausezahn, use $dev MAC as the destination MAC. In the MAC sharing
+	# context, that will cause an FDB miss on VLAN 1 and prompt a second
+	# lookup in VLAN 0.
+
+	t0=$(tc_rule_stats_get "$probe_dev" 104 ingress)
+
+	$MZ "$h1" -c 10 -p 64 -a own -b "$mac" \
+		  -A 192.0.2.1 -B 192.0.2.18 -t udp "dp=4096,sp=2048" -q
+	sleep 1
+
+	t1=$(tc_rule_stats_get "$probe_dev" 104 ingress)
+	dd=$((t1 - t0))
+
+	((dd == expect))
+	check_err $? "Expected $expect packets on $probe_dev got $dd"
+
+	log_test "$what"
+}
+
+do_tests()
+{
+	local should_fail=$1; shift
+	local what=$1; shift
+	local swp1_mac
+	local br_mac
+
+	swp1_mac=$(mac_get "$swp1")
+	br_mac=$(mac_get br)
+
+	do_sharing_test "$should_fail" "$what"
+	do_end_to_end_test "$swp1_mac" "$what: end to end, $swp1 MAC"
+	do_end_to_end_test "$br_mac" "$what: end to end, br MAC"
+}
+
+bridge_standard()
+{
+	local vlan_filtering=$1; shift
+
+	if ((vlan_filtering)); then
+		echo 802.1q
+	else
+		echo 802.1d
+	fi
+}
+
+nonexistent_fdb_test()
+{
+	local vlan_filtering=$1; shift
+	local standard
+
+	standard=$(bridge_standard "$vlan_filtering")
+
+	# We expect flooding, so $h2 should get the traffic.
+	do_end_to_end_test "$xMAC" "$standard: Nonexistent FDB" "$h2"
+}
+
+misleading_fdb_test()
+{
+	local vlan_filtering=$1; shift
+	local standard
+
+	standard=$(bridge_standard "$vlan_filtering")
+
+	defer_scope_push
+		# Add an FDB entry on VLAN 0. The lookup on VLAN-aware bridge
+		# shouldn't pick this up even with fdb_local_vlan_0 enabled, so
+		# the traffic should be flooded. This all holds on
+		# vlan_filtering bridge, on non-vlan_filtering one the FDB entry
+		# is expected to be found as usual, no flooding takes place.
+		#
+		# Adding only on VLAN 0 is a bit tricky, because bridge is
+		# trying to be nice and interprets the request as if the FDB
+		# should be added on each VLAN.
+
+		bridge fdb add "$mMAC" dev "$swp1" master
+		bridge fdb del "$mMAC" dev "$swp1" vlan 1 master
+		bridge fdb del "$mMAC" dev "$swp1" vlan 2 master
+		bridge fdb del "$mMAC" dev "$swp1" vlan 3 master
+
+		local expect=$((vlan_filtering ? 10 : 0))
+		do_end_to_end_test "$mMAC" \
+				   "$standard: Lookup of non-local MAC on VLAN 0" \
+				   "$h2" "$expect"
+	defer_scope_pop
+}
+
+change_mac()
+{
+	local dev=$1; shift
+	local mac=$1; shift
+	local cur_mac
+
+	cur_mac=$(mac_get "$dev")
+
+	log_info "Change $dev MAC $cur_mac -> $mac"
+	adf_ip_link_set_addr "$dev" "$mac"
+	defer log_info "Change $dev MAC back"
+}
+
+do_test_no_sharing()
+{
+	local vlan_filtering=$1; shift
+	local standard
+
+	standard=$(bridge_standard "$vlan_filtering")
+
+	adf_bridge_create vlan_filtering "$vlan_filtering"
+	setup_wait
+
+	do_tests 0 "$standard, no FDB sharing"
+
+	change_mac "$swp1" "$pMAC"
+	change_mac br "$bMAC"
+
+	do_tests 0 "$standard, no FDB sharing after MAC change"
+
+	in_defer_scope check_fdb_local_vlan_0_support || return
+
+	log_info "Set fdb_local_vlan_0=1"
+	ip link set dev br type bridge fdb_local_vlan_0 1
+
+	do_tests 1 "$standard, fdb sharing after toggle"
+}
+
+do_test_sharing()
+{
+	local vlan_filtering=$1; shift
+	local standard
+
+	standard=$(bridge_standard "$vlan_filtering")
+
+	in_defer_scope check_fdb_local_vlan_0_support || return
+
+	adf_bridge_create vlan_filtering "$vlan_filtering" fdb_local_vlan_0 1
+	setup_wait
+
+	do_tests 1 "$standard, FDB sharing"
+
+	nonexistent_fdb_test "$vlan_filtering"
+	misleading_fdb_test "$vlan_filtering"
+
+	change_mac "$swp1" "$pMAC"
+	change_mac br "$bMAC"
+
+	do_tests 1 "$standard, FDB sharing after MAC change"
+
+	log_info "Set fdb_local_vlan_0=0"
+	ip link set dev br type bridge fdb_local_vlan_0 0
+
+	do_tests 0 "$standard, No FDB sharing after toggle"
+}
+
+test_d_no_sharing()
+{
+	do_test_no_sharing 0
+}
+
+test_d_sharing()
+{
+	do_test_sharing 0
+}
+
+test_q_no_sharing()
+{
+	do_test_no_sharing 1
+}
+
+test_q_sharing()
+{
+	do_test_sharing 1
+}
+
+adf_addr_set_bridge_create()
+{
+	adf_ip_link_add br up type bridge vlan_filtering 0
+	adf_ip_link_set_addr br "$(mac_get br)"
+	adf_bridge_configure
+}
+
+test_addr_set()
+{
+	adf_addr_set_bridge_create
+	setup_wait
+
+	do_end_to_end_test "$(mac_get br)" "NET_ADDR_SET: end to end, br MAC"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index e6a3e04fd83f..d4e7dd659354 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,10 +1,24 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
-	   v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
-	   v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
-	   v3exc_timeout_test v3star_ex_auto_add_test"
+ALL_TESTS="
+	v2reportleave_test
+	v3include_test
+	v3inc_allow_test
+	v3inc_is_include_test
+	v3inc_is_exclude_test
+	v3inc_to_exclude_test
+	v3exc_allow_test
+	v3exc_is_include_test
+	v3exc_is_exclude_test
+	v3exc_to_exclude_test
+	v3inc_block_test
+	v3exc_block_test
+	v3exc_timeout_test
+	v3star_ex_auto_add_test
+	v2per_vlan_snooping_port_stp_test
+	v2per_vlan_snooping_vlan_stp_test
+"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="239.10.10.10"
@@ -554,6 +568,64 @@ v3star_ex_auto_add_test()
 	v3cleanup $swp2 $TEST_GROUP
 }
 
+v2per_vlan_snooping_stp_test()
+{
+	local is_port=$1
+
+	local msg="port"
+	[[ $is_port -ne 1 ]] && msg="vlan"
+
+	ip link set br0 up type bridge vlan_filtering 1 \
+					mcast_igmp_version 2 \
+					mcast_snooping 1 \
+					mcast_vlan_snooping 1 \
+					mcast_querier 1 \
+					mcast_stats_enabled 1
+	bridge vlan global set vid 1 dev br0 \
+					mcast_snooping 1 \
+					mcast_querier 1 \
+					mcast_query_interval 100 \
+					mcast_startup_query_count 0
+	[[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+	[[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+	sleep 5
+	local tx_s=$(ip -j -p stats show dev $swp1 \
+			group xstats_slave subgroup bridge suite mcast \
+			| jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+	[[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+	[[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+	sleep 5
+	local tx_e=$(ip -j -p stats show dev $swp1 \
+			group xstats_slave subgroup bridge suite mcast \
+			| jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+	RET=0
+	local tx=$(expr $tx_e - $tx_s)
+	test $tx -gt 0
+	check_err $? "No IGMP queries after STP state becomes forwarding"
+	log_test "per vlan snooping with $msg stp state change"
+
+	# restore settings
+	bridge vlan global set vid 1 dev br0 \
+					mcast_querier 0 \
+					mcast_query_interval 12500 \
+					mcast_startup_query_count 2
+	ip link set br0 up type bridge vlan_filtering 0 \
+					mcast_vlan_snooping 0 \
+					mcast_stats_enabled 0
+}
+
+v2per_vlan_snooping_port_stp_test()
+{
+	v2per_vlan_snooping_stp_test 1
+}
+
+v2per_vlan_snooping_vlan_stp_test()
+{
+	v2per_vlan_snooping_stp_test 0
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index d9d587454d20..e86d77946585 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -28,6 +28,7 @@ ALL_TESTS="
 	cfg_test
 	fwd_test
 	ctrl_test
+	disable_test
 "
 
 NUM_NETIFS=4
@@ -64,7 +65,10 @@ h2_destroy()
 
 switch_create()
 {
-	ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+	local vlan_filtering=$1; shift
+
+	ip link add name br0 type bridge \
+		vlan_filtering "$vlan_filtering" vlan_default_pvid 0 \
 		mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2
 	bridge vlan add vid 10 dev br0 self
 	bridge vlan add vid 20 dev br0 self
@@ -118,7 +122,7 @@ setup_prepare()
 
 	h1_create
 	h2_create
-	switch_create
+	switch_create 1
 }
 
 cleanup()
@@ -149,7 +153,7 @@ cfg_test_host_common()
 	check_err $? "Failed to add $name host entry"
 
 	bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null
-	check_fail $? "Managed to replace $name host entry"
+	check_err $? "Failed to replace $name host entry"
 
 	bridge mdb del dev br0 port br0 grp $grp $state vid 10
 	bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
@@ -1357,6 +1361,98 @@ ctrl_test()
 	ctrl_mldv2_is_in_test
 }
 
+check_group()
+{
+	local group=$1; shift
+	local vid=$1; shift
+	local should_fail=$1; shift
+	local when=$1; shift
+	local -a vidkws
+
+	if ((vid)); then
+		vidkws=(vid "$vid")
+	fi
+
+	bridge mdb get dev br0 grp "$group" "${vidkws[@]}" 2>/dev/null |
+		grep -q "port $swp1"
+	check_err_fail "$should_fail" $? "$group seen $when snooping disable:"
+}
+
+__disable_test()
+{
+	local vid=$1; shift
+	local what=$1; shift
+	local -a vidkws
+
+	if ((vid)); then
+		vidkws=(vid "$vid")
+	fi
+
+	RET=0
+
+	bridge mdb add dev br0 port "$swp1" grp ff0e::1 permanent \
+		"${vidkws[@]}" filter_mode include source_list 2001:db8:1::1
+	bridge mdb add dev br0 port "$swp1" grp ff0e::2 permanent \
+		"${vidkws[@]}" filter_mode exclude
+
+	bridge mdb add dev br0 port "$swp1" grp ff0e::3 \
+		"${vidkws[@]}" filter_mode include source_list 2001:db8:1::2
+	bridge mdb add dev br0 port "$swp1" grp ff0e::4 \
+		"${vidkws[@]}" filter_mode exclude
+
+	bridge mdb add dev br0 port "$swp1" grp 239.1.1.1 permanent \
+		"${vidkws[@]}" filter_mode include source_list 192.0.2.1
+	bridge mdb add dev br0 port "$swp1" grp 239.1.1.2 permanent \
+		"${vidkws[@]}" filter_mode exclude
+
+	bridge mdb add dev br0 port "$swp1" grp 239.1.1.3 \
+		"${vidkws[@]}" filter_mode include source_list 192.0.2.2
+	bridge mdb add dev br0 port "$swp1" grp 239.1.1.4 \
+		"${vidkws[@]}" filter_mode exclude
+
+	check_group ff0e::1 "$vid" 0 "before"
+	check_group ff0e::2 "$vid" 0 "before"
+	check_group ff0e::3 "$vid" 0 "before"
+	check_group ff0e::4 "$vid" 0 "before"
+
+	check_group 239.1.1.1 "$vid" 0 "before"
+	check_group 239.1.1.2 "$vid" 0 "before"
+	check_group 239.1.1.3 "$vid" 0 "before"
+	check_group 239.1.1.4 "$vid" 0 "before"
+
+	ip link set dev br0 type bridge mcast_snooping 0
+
+	check_group ff0e::1 "$vid" 0 "after"
+	check_group ff0e::2 "$vid" 0 "after"
+	check_group ff0e::3 "$vid" 1 "after"
+	check_group ff0e::4 "$vid" 1 "after"
+
+	check_group 239.1.1.1 "$vid" 0 "after"
+	check_group 239.1.1.2 "$vid" 0 "after"
+	check_group 239.1.1.3 "$vid" 1 "after"
+	check_group 239.1.1.4 "$vid" 1 "after"
+
+	log_test "$what: Flush after disable"
+
+	ip link set dev br0 type bridge mcast_snooping 1
+	sleep 10
+}
+
+disable_test()
+{
+	__disable_test 10 802.1q
+
+	switch_destroy
+	switch_create 0
+	setup_wait
+
+	__disable_test 0 802.1d
+
+	switch_destroy
+	switch_create 1
+	setup_wait
+}
+
 if ! bridge mdb help 2>&1 | grep -q "flush"; then
 	echo "SKIP: iproute2 too old, missing bridge mdb flush support"
 	exit $ksft_skip
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
index 3da9d93ab36f..625162fd7e8b 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
@@ -28,6 +28,7 @@ ALL_TESTS="
 	test_8021d
 	test_8021q
 	test_8021qvs
+	test_mdb_count_warning
 "
 
 NUM_NETIFS=4
@@ -83,8 +84,6 @@ switch_create_8021q()
 {
 	local br_flags=$1; shift
 
-	log_info "802.1q $br_flags${br_flags:+ }tests"
-
 	ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
 		mcast_snooping 1 $br_flags \
 		mcast_igmp_version 3 mcast_mld_version 2
@@ -106,6 +105,7 @@ switch_create_8021q()
 
 switch_create_8021qvs()
 {
+	log_info "802.1q mcast_vlan_snooping 1 tests"
 	switch_create_8021q "mcast_vlan_snooping 1"
 	bridge vlan global set dev br0 vid 10 mcast_igmp_version 3
 	bridge vlan global set dev br0 vid 10 mcast_mld_version 2
@@ -1272,6 +1272,76 @@ test_8021qvs_toggle_vlan_snooping()
 	test_toggle_vlan_snooping_permanent
 }
 
+mdb_count_check_warn()
+{
+	local msg=$1; shift
+
+	dmesg | grep -q "WARNING:.*br_multicast_port_ngroups_dec.*"
+	check_fail $? "$msg"
+}
+
+test_mdb_count_mcast_vlan_snooping_flush()
+{
+	RET=0
+
+	# check if we already have a warning
+	mdb_count_check_warn "Check MDB entries count warning before test"
+
+	bridge mdb add dev br0 port "$swp1" grp 239.0.0.1 permanent vid 10
+	ip link set dev br0 down
+	ip link set dev br0 type bridge mcast_vlan_snooping 1
+	bridge mdb flush dev br0
+
+	mdb_count_check_warn "Check MDB entries count warning after test"
+
+	ip link set dev br0 type bridge mcast_vlan_snooping 0
+	ip link set dev br0 up
+
+	log_test "MDB count warning: mcast_vlan_snooping and MDB flush"
+}
+
+test_mdb_count_mcast_snooping_flush()
+{
+	RET=0
+
+	# check if we already have a warning
+	mdb_count_check_warn "Check MDB entries count warning before test"
+
+	bridge mdb add dev br0 port "$swp1" grp 239.0.0.1 permanent vid 10
+	ip link set dev br0 type bridge mcast_snooping 0
+	ip link set dev br0 type bridge mcast_vlan_snooping 1
+	bridge mdb flush dev br0
+
+	mdb_count_check_warn "Check MDB entries count warning after test"
+
+	ip link set dev br0 type bridge mcast_vlan_snooping 0
+	ip link set dev br0 type bridge mcast_snooping 1
+
+	log_test "MDB count warning: mcast_snooping and MDB flush"
+}
+
+test_mdb_count_vlan_state_flush()
+{
+	RET=0
+
+	# check if we already have a warning
+	mdb_count_check_warn "Check MDB entries count warning before test"
+
+	bridge mdb add dev br0 port "$swp1" grp 239.0.0.1 permanent vid 10
+	ip link set dev br0 down
+	bridge vlan set vid 10 dev "$swp1" state blocking
+	ip link set dev br0 type bridge mcast_vlan_snooping 1
+	ip link set dev br0 up
+	bridge mdb flush dev br0
+
+	mdb_count_check_warn "Check MDB entries count warning after test"
+
+	bridge vlan set vid 10 dev "$swp1" state forwarding
+	ip link set dev br0 type bridge mcast_vlan_snooping 0
+
+	log_test "MDB count warning: disabled vlan state and MDB flush"
+}
+
 # test groups
 
 test_8021d()
@@ -1297,6 +1367,7 @@ test_8021q()
 {
 	# Tests for vlan_filtering 1 mcast_vlan_snooping 0.
 
+	log_info "802.1q tests"
 	switch_create_8021q
 	setup_wait
 
@@ -1334,6 +1405,21 @@ test_8021qvs()
 	switch_destroy
 }
 
+test_mdb_count_warning()
+{
+	# Tests for mdb_n_entries warning
+
+	log_info "MDB count warning tests"
+	switch_create_8021q
+	setup_wait
+
+	test_mdb_count_mcast_vlan_snooping_flush
+	test_mdb_count_mcast_snooping_flush
+	test_mdb_count_vlan_state_flush
+
+	switch_destroy
+}
+
 if ! bridge link help 2>&1 | grep -q "mcast_max_groups"; then
 	echo "SKIP: iproute2 too old, missing bridge \"mcast_max_groups\" support"
 	exit $ksft_skip
diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index f84ab2e65754..4cacef5a813a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,10 +1,23 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
-	   mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
-	   mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
-	   mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test"
+ALL_TESTS="
+	mldv2include_test
+	mldv2inc_allow_test
+	mldv2inc_is_include_test
+	mldv2inc_is_exclude_test
+	mldv2inc_to_exclude_test
+	mldv2exc_allow_test
+	mldv2exc_is_include_test
+	mldv2exc_is_exclude_test
+	mldv2exc_to_exclude_test
+	mldv2inc_block_test
+	mldv2exc_block_test
+	mldv2exc_timeout_test
+	mldv2star_ex_auto_add_test
+	mldv2per_vlan_snooping_port_stp_test
+	mldv2per_vlan_snooping_vlan_stp_test
+"
 NUM_NETIFS=4
 CHECK_TC="yes"
 TEST_GROUP="ff02::cc"
@@ -554,6 +567,66 @@ mldv2star_ex_auto_add_test()
 	mldv2cleanup $swp2
 }
 
+mldv2per_vlan_snooping_stp_test()
+{
+	local is_port=$1
+
+	local msg="port"
+	[[ $is_port -ne 1 ]] && msg="vlan"
+
+	ip link set br0 up type bridge vlan_filtering 1 \
+					mcast_mld_version 2 \
+					mcast_snooping 1 \
+					mcast_vlan_snooping 1 \
+					mcast_querier 1 \
+					mcast_stats_enabled 1
+	bridge vlan global set vid 1 dev br0 \
+					mcast_mld_version 2 \
+					mcast_snooping 1 \
+					mcast_querier 1 \
+					mcast_query_interval 100 \
+					mcast_startup_query_count 0
+
+	[[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+	[[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+	sleep 5
+	local tx_s=$(ip -j -p stats show dev $swp1 \
+			group xstats_slave subgroup bridge suite mcast \
+			| jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+	[[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+	[[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+	sleep 5
+	local tx_e=$(ip -j -p stats show dev $swp1 \
+			group xstats_slave subgroup bridge suite mcast \
+			| jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+
+	RET=0
+	local tx=$(expr $tx_e - $tx_s)
+	test $tx -gt 0
+	check_err $? "No MLD queries after STP state becomes forwarding"
+	log_test "per vlan snooping with $msg stp state change"
+
+	# restore settings
+	bridge vlan global set vid 1 dev br0 \
+					mcast_querier 0 \
+					mcast_query_interval 12500 \
+					mcast_startup_query_count 2 \
+					mcast_mld_version 1
+	ip link set br0 up type bridge vlan_filtering 0 \
+					mcast_vlan_snooping 0 \
+					mcast_stats_enabled 0
+}
+
+mldv2per_vlan_snooping_port_stp_test()
+{
+	mldv2per_vlan_snooping_stp_test 1
+}
+
+mldv2per_vlan_snooping_vlan_stp_test()
+{
+	mldv2per_vlan_snooping_stp_test 0
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 90f8a244ea90..e59fba366a0a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid"
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged"
 NUM_NETIFS=4
 CHECK_TC="yes"
 source lib.sh
@@ -194,6 +194,100 @@ other_tpid()
 	tc qdisc del dev $h2 clsact
 }
 
+8021p_do()
+{
+	local should_fail=$1; shift
+	local mac=de:ad:be:ef:13:37
+
+	tc filter add dev $h2 ingress protocol all pref 1 handle 101 \
+		flower dst_mac $mac action drop
+
+	$MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa"
+	sleep 1
+
+	tc -j -s filter show dev $h2 ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	check_err_fail $should_fail $? "802.1p-tagged reception"
+
+	tc filter del dev $h2 ingress pref 1
+}
+
+8021p()
+{
+	RET=0
+
+	tc qdisc add dev $h2 clsact
+	ip link set $h2 promisc on
+
+	# Test that with the default_pvid, 1, packets tagged with VID 0 are
+	# accepted.
+	8021p_do 0
+
+	# Test that packets tagged with VID 0 are still accepted after changing
+	# the default_pvid.
+	ip link set br0 type bridge vlan_default_pvid 10
+	8021p_do 0
+
+	log_test "Reception of 802.1p-tagged traffic"
+
+	ip link set $h2 promisc off
+	tc qdisc del dev $h2 clsact
+}
+
+send_untagged_and_8021p()
+{
+	ping_do $h1 192.0.2.2
+	check_fail $?
+
+	8021p_do 1
+}
+
+drop_untagged()
+{
+	RET=0
+
+	tc qdisc add dev $h2 clsact
+	ip link set $h2 promisc on
+
+	# Test that with no PVID, untagged and 802.1p-tagged traffic is
+	# dropped.
+	ip link set br0 type bridge vlan_default_pvid 1
+
+	# First we reconfigure the default_pvid, 1, as a non-PVID VLAN.
+	bridge vlan add dev $swp1 vid 1 untagged
+	send_untagged_and_8021p
+	bridge vlan add dev $swp1 vid 1 pvid untagged
+
+	# Next we try to delete VID 1 altogether
+	bridge vlan del dev $swp1 vid 1
+	send_untagged_and_8021p
+	bridge vlan add dev $swp1 vid 1 pvid untagged
+
+	# Set up the bridge without a default_pvid, then check that the 8021q
+	# module, when the bridge port goes down and then up again, does not
+	# accidentally re-enable untagged packet reception.
+	ip link set br0 type bridge vlan_default_pvid 0
+	ip link set $swp1 down
+	ip link set $swp1 up
+	setup_wait
+	send_untagged_and_8021p
+
+	# Remove swp1 as a bridge port and let it rejoin the bridge while it
+	# has no default_pvid.
+	ip link set $swp1 nomaster
+	ip link set $swp1 master br0
+	send_untagged_and_8021p
+
+	# Restore settings
+	ip link set br0 type bridge vlan_default_pvid 1
+
+	log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID"
+
+	ip link set $h2 promisc off
+	tc qdisc del dev $h2 clsact
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
index 72dfbeaf56b9..e8031f68200a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
@@ -414,6 +414,7 @@ vlmc_querier_intvl_test()
 	bridge vlan add vid 10 dev br1 self pvid untagged
 	ip link set dev $h1 master br1
 	ip link set dev br1 up
+	setup_wait_dev $h1 0
 	bridge vlan add vid 10 dev $h1 master
 	bridge vlan global set vid 10 dev br1 mcast_snooping 1 mcast_querier 1
 	sleep 2
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index 8d7a1a004b7c..75a6c3d3c1da 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -1,23 +1,23 @@
+CONFIG_BPF_SYSCALL=y
 CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_IGMP_SNOOPING=y
 CONFIG_BRIDGE_VLAN_FILTERING=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_NET_VRF=m
-CONFIG_BPF_SYSCALL=y
 CONFIG_CGROUP_BPF=y
 CONFIG_DUMMY=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
 CONFIG_IPV6=y
 CONFIG_IPV6_GRE=m
 CONFIG_IPV6_MROUTE=y
 CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_IPV6_PIMSM_V2=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
 CONFIG_MACVLAN=m
+CONFIG_NAMESPACES=y
 CONFIG_NET_ACT_CT=m
+CONFIG_NET_ACT_GACT=m
 CONFIG_NET_ACT_MIRRED=m
 CONFIG_NET_ACT_MPLS=m
 CONFIG_NET_ACT_PEDIT=m
@@ -26,29 +26,31 @@ CONFIG_NET_ACT_SAMPLE=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_TUNNEL_KEY=m
 CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_CLS_BASIC=m
 CONFIG_NET_CLS_FLOWER=m
 CONFIG_NET_CLS_MATCHALL=m
-CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_U32=m
 CONFIG_NET_EMATCH=y
 CONFIG_NET_EMATCH_META=m
+CONFIG_NETFILTER=y
 CONFIG_NET_IPGRE=m
 CONFIG_NET_IPGRE_DEMUX=m
 CONFIG_NET_IPIP=m
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
 CONFIG_NET_SCH_ETS=m
 CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_ACT_GACT=m
 CONFIG_NET_SCH_PRIO=m
 CONFIG_NET_SCH_RED=m
 CONFIG_NET_SCH_TBF=m
 CONFIG_NET_TC_SKB_EXT=y
 CONFIG_NET_TEAM=y
 CONFIG_NET_TEAM_MODE_LOADBALANCE=y
-CONFIG_NETFILTER=y
+CONFIG_NET_VRF=m
 CONFIG_NF_CONNTRACK=m
 CONFIG_NF_FLOW_TABLE=m
 CONFIG_NF_TABLES=m
 CONFIG_VETH=m
-CONFIG_NAMESPACES=y
-CONFIG_NET_NS=y
+CONFIG_VLAN_8021Q=m
 CONFIG_VXLAN=m
 CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
index 7d531f7091e6..5dbfab0e23e3 100755
--- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -226,7 +226,7 @@ send_flowlabel()
 	# Generate 16384 echo requests, each with a random flow label.
 	ip vrf exec v$h1 sh -c \
 		"for _ in {1..16384}; do \
-			$PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+			$PING6 -F 0 -c 1 -q 2001:db8:4::2 >/dev/null 2>&1; \
 		done"
 }
 
diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
index dda11a4a9450..b4f17a5bbc61 100755
--- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -321,7 +321,7 @@ send_flowlabel()
 	# Generate 16384 echo requests, each with a random flow label.
 	ip vrf exec v$h1 sh -c \
 		"for _ in {1..16384}; do \
-			$PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+			$PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \
 		done"
 }
 
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh
index 57531c1d884d..ce4ae74843d9 100755
--- a/tools/testing/selftests/net/forwarding/gre_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh
@@ -65,7 +65,7 @@ source lib.sh
 
 h1_create()
 {
-	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	simple_if_init $h1 192.0.2.1/28
 	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
 }
 
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
index 7d5b2b9cc133..c667b81da37f 100755
--- a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
@@ -80,7 +80,7 @@ h1_destroy()
 {
 	ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
 	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
-	simple_if_fini $h1 192.0.2.1/28
+	simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
 }
 
 sw1_create()
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
index 370f9925302d..d04bad58a96a 100755
--- a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
@@ -80,7 +80,7 @@ h1_destroy()
 {
 	ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
 	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
-	simple_if_fini $h1 192.0.2.1/28
+	simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
 }
 
 sw1_create()
diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
index 49fa94b53a1c..25036e38043c 100755
--- a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
+++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
@@ -95,7 +95,7 @@ ipv6_in_too_big_err()
 
 	# Send too big packets
 	ip vrf exec $vrf_name \
-		$PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+		$PING6 -s 1300 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null
 
 	local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors)
 	test "$((t1 - t0))" -ne 0
@@ -131,7 +131,7 @@ ipv6_in_addr_err()
 	# Disable forwarding temporary while sending the packet
 	sysctl -qw net.ipv6.conf.all.forwarding=0
 	ip vrf exec $vrf_name \
-		$PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+		$PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null
 	sysctl -qw net.ipv6.conf.all.forwarding=1
 
 	local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors)
@@ -150,7 +150,7 @@ ipv6_in_discard()
 	# Add a policy to discard
 	ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block
 	ip vrf exec $vrf_name \
-		$PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+		$PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null
 	ip xfrm policy del dst 2001:1:2::2/128 dir fwd
 
 	local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards)
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
index e28b4a079e52..b24acfa52a3a 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -323,7 +323,7 @@ send_flowlabel()
 	# Generate 16384 echo requests, each with a random flow label.
 	ip vrf exec v$h1 sh -c \
 		"for _ in {1..16384}; do \
-			$PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+			$PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \
 		done"
 }
 
diff --git a/tools/testing/selftests/net/forwarding/ipip_lib.sh b/tools/testing/selftests/net/forwarding/ipip_lib.sh
index 01e62c4ac94d..b255646b737a 100644
--- a/tools/testing/selftests/net/forwarding/ipip_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ipip_lib.sh
@@ -144,7 +144,7 @@
 
 h1_create()
 {
-	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	simple_if_init $h1 192.0.2.1/28
 	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
 }
 
diff --git a/tools/testing/selftests/net/forwarding/ipmr.c b/tools/testing/selftests/net/forwarding/ipmr.c
new file mode 100644
index 000000000000..df870aad9ead
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ipmr.c
@@ -0,0 +1,455 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2026 Google LLC */
+
+#include <linux/if.h>
+#include <linux/mroute.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/socket.h>
+#include <sched.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "kselftest_harness.h"
+
+FIXTURE(ipmr)
+{
+	int netlink_sk;
+	int raw_sk;
+	int veth_ifindex;
+};
+
+FIXTURE_VARIANT(ipmr)
+{
+	int family;
+	int protocol;
+	int level;
+	int opts[MRT_MAX - MRT_BASE + 1];
+};
+
+FIXTURE_VARIANT_ADD(ipmr, ipv4)
+{
+	.family = AF_INET,
+	.protocol = IPPROTO_IGMP,
+	.level = IPPROTO_IP,
+	.opts = {
+		MRT_INIT,
+		MRT_DONE,
+		MRT_ADD_VIF,
+		MRT_DEL_VIF,
+		MRT_ADD_MFC,
+		MRT_DEL_MFC,
+		MRT_VERSION,
+		MRT_ASSERT,
+		MRT_PIM,
+		MRT_TABLE,
+		MRT_ADD_MFC_PROXY,
+		MRT_DEL_MFC_PROXY,
+		MRT_FLUSH,
+	},
+};
+
+struct mfc_attr {
+	int table;
+	__u32 origin;
+	__u32 group;
+	int ifindex;
+	bool proxy;
+};
+
+static struct rtattr *nl_add_rtattr(struct nlmsghdr *nlmsg, struct rtattr *rta,
+				    int type, const void *data, int len)
+{
+	int unused = 0;
+
+	rta->rta_type = type;
+	rta->rta_len = RTA_LENGTH(len);
+	memcpy(RTA_DATA(rta), data, len);
+
+	nlmsg->nlmsg_len += NLMSG_ALIGN(rta->rta_len);
+
+	return RTA_NEXT(rta, unused);
+}
+
+static int nl_sendmsg_mfc(struct __test_metadata *_metadata, FIXTURE_DATA(ipmr) *self,
+			  __u16 nlmsg_type, struct mfc_attr *mfc_attr)
+{
+	struct {
+		struct nlmsghdr nlmsg;
+		struct rtmsg rtm;
+		char buf[4096];
+	} req = {
+		.nlmsg = {
+			.nlmsg_len = NLMSG_LENGTH(sizeof(req.rtm)),
+			/* ipmr does not care about NLM_F_CREATE and NLM_F_EXCL ... */
+			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+			.nlmsg_type = nlmsg_type,
+		},
+		.rtm = {
+			/* hard requirements in rtm_to_ipmr_mfcc() */
+			.rtm_family = RTNL_FAMILY_IPMR,
+			.rtm_dst_len = 32,
+			.rtm_type = RTN_MULTICAST,
+			.rtm_scope = RT_SCOPE_UNIVERSE,
+			.rtm_protocol = RTPROT_MROUTED,
+		},
+	};
+	struct nlmsghdr *nlmsg = &req.nlmsg;
+	struct nlmsgerr *errmsg;
+	struct rtattr *rta;
+	int err;
+
+	rta = (struct rtattr *)&req.buf;
+	rta = nl_add_rtattr(nlmsg, rta, RTA_TABLE, &mfc_attr->table, sizeof(mfc_attr->table));
+	rta = nl_add_rtattr(nlmsg, rta, RTA_SRC, &mfc_attr->origin, sizeof(mfc_attr->origin));
+	rta = nl_add_rtattr(nlmsg, rta, RTA_DST, &mfc_attr->group, sizeof(mfc_attr->group));
+	if (mfc_attr->ifindex)
+		rta = nl_add_rtattr(nlmsg, rta, RTA_IIF, &mfc_attr->ifindex, sizeof(mfc_attr->ifindex));
+	if (mfc_attr->proxy)
+		rta = nl_add_rtattr(nlmsg, rta, RTA_PREFSRC, NULL, 0);
+
+	err = send(self->netlink_sk, &req, req.nlmsg.nlmsg_len, 0);
+	ASSERT_EQ(err, req.nlmsg.nlmsg_len);
+
+	memset(&req, 0, sizeof(req));
+
+	err = recv(self->netlink_sk, &req, sizeof(req), 0);
+	ASSERT_TRUE(NLMSG_OK(nlmsg, err));
+	ASSERT_EQ(NLMSG_ERROR, nlmsg->nlmsg_type);
+
+	errmsg = (struct nlmsgerr *)NLMSG_DATA(nlmsg);
+	return errmsg->error;
+}
+
+FIXTURE_SETUP(ipmr)
+{
+	struct ifreq ifr = {
+		.ifr_name = "veth0",
+	};
+	int err;
+
+	err = unshare(CLONE_NEWNET);
+	ASSERT_EQ(0, err);
+
+	self->netlink_sk = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	ASSERT_LE(0, self->netlink_sk);
+
+	self->raw_sk = socket(variant->family, SOCK_RAW, variant->protocol);
+	ASSERT_LT(0, self->raw_sk);
+
+	err = system("ip link add veth0 type veth peer veth1");
+	ASSERT_EQ(0, err);
+
+	err = ioctl(self->raw_sk, SIOCGIFINDEX, &ifr);
+	ASSERT_EQ(0, err);
+
+	self->veth_ifindex = ifr.ifr_ifindex;
+}
+
+FIXTURE_TEARDOWN(ipmr)
+{
+	close(self->raw_sk);
+	close(self->netlink_sk);
+}
+
+TEST_F(ipmr, mrt_init)
+{
+	int err, val = 0;  /* any value is ok, but size must be int for MRT_INIT. */
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_INIT - MRT_BASE],
+			 &val,  sizeof(val));
+	ASSERT_EQ(0, err);
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_DONE - MRT_BASE],
+			 &val,  sizeof(val));
+	ASSERT_EQ(0, err);
+}
+
+TEST_F(ipmr, mrt_add_vif_register)
+{
+	struct vifctl vif = {
+		.vifc_vifi = 0,
+		.vifc_flags = VIFF_REGISTER,
+	};
+	int err;
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+
+	err = system("cat /proc/net/ip_mr_vif | grep -q pimreg");
+	ASSERT_EQ(0, err);
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+}
+
+TEST_F(ipmr, mrt_del_vif_unreg)
+{
+	struct vifctl vif = {
+		.vifc_vifi = 0,
+		.vifc_flags = VIFF_USE_IFINDEX,
+		.vifc_lcl_ifindex = self->veth_ifindex,
+	};
+	int err;
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+
+	err = system("cat /proc/net/ip_mr_vif | grep -q veth0");
+	ASSERT_EQ(0, err);
+
+	/* VIF is removed along with its device. */
+	err = system("ip link del veth0");
+	ASSERT_EQ(0, err);
+
+	/* mrt->vif_table[veth_ifindex]->dev is NULL. */
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_DEL_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(-1, err);
+	ASSERT_EQ(EADDRNOTAVAIL, errno);
+}
+
+TEST_F(ipmr, mrt_del_vif_netns_dismantle)
+{
+	struct vifctl vif = {
+		.vifc_vifi = 0,
+		.vifc_flags = VIFF_USE_IFINDEX,
+		.vifc_lcl_ifindex = self->veth_ifindex,
+	};
+	int err;
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+
+	/* Let cleanup_net() remove veth0 and VIF. */
+}
+
+TEST_F(ipmr, mrt_add_mfc)
+{
+	struct mfcctl mfc = {};
+	int err;
+
+	/* MRT_ADD_MFC / MRT_ADD_MFC_PROXY does not need vif to exist (unlike netlink). */
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_MFC - MRT_BASE],
+			 &mfc,  sizeof(mfc));
+	ASSERT_EQ(0, err);
+
+	/* (0.0.0.0 -> 0.0.0.0) */
+	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
+	ASSERT_EQ(0, err);
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_DEL_MFC - MRT_BASE],
+			 &mfc,  sizeof(mfc));
+}
+
+TEST_F(ipmr, mrt_add_mfc_proxy)
+{
+	struct mfcctl mfc = {};
+	int err;
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_MFC_PROXY - MRT_BASE],
+			 &mfc,  sizeof(mfc));
+	ASSERT_EQ(0, err);
+
+	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
+	ASSERT_EQ(0, err);
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_DEL_MFC_PROXY - MRT_BASE],
+			 &mfc,  sizeof(mfc));
+}
+
+TEST_F(ipmr, mrt_add_mfc_netlink)
+{
+	struct vifctl vif = {
+		.vifc_vifi = 0,
+		.vifc_flags = VIFF_USE_IFINDEX,
+		.vifc_lcl_ifindex = self->veth_ifindex,
+	};
+	struct mfc_attr mfc_attr = {
+		.table = RT_TABLE_DEFAULT,
+		.origin = 0,
+		.group = 0,
+		.ifindex = self->veth_ifindex,
+		.proxy = false,
+	};
+	int err;
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+
+	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
+	ASSERT_EQ(0, err);
+
+	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
+	ASSERT_EQ(0, err);
+
+	err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr);
+	ASSERT_EQ(0, err);
+}
+
+TEST_F(ipmr, mrt_add_mfc_netlink_proxy)
+{
+	struct vifctl vif = {
+		.vifc_vifi = 0,
+		.vifc_flags = VIFF_USE_IFINDEX,
+		.vifc_lcl_ifindex = self->veth_ifindex,
+	};
+	struct mfc_attr mfc_attr = {
+		.table = RT_TABLE_DEFAULT,
+		.origin = 0,
+		.group = 0,
+		.ifindex = self->veth_ifindex,
+		.proxy = true,
+	};
+	int err;
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+
+	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
+	ASSERT_EQ(0, err);
+
+	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
+	ASSERT_EQ(0, err);
+
+	err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr);
+	ASSERT_EQ(0, err);
+}
+
+TEST_F(ipmr, mrt_add_mfc_netlink_no_vif)
+{
+	struct mfc_attr mfc_attr = {
+		.table = RT_TABLE_DEFAULT,
+		.origin = 0,
+		.group = 0,
+		.proxy = false,
+	};
+	int err;
+
+	/* netlink always requires RTA_IIF of an existing vif. */
+	mfc_attr.ifindex = 0;
+	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
+	ASSERT_EQ(-ENFILE, err);
+
+	/* netlink always requires RTA_IIF of an existing vif. */
+	mfc_attr.ifindex = self->veth_ifindex;
+	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
+	ASSERT_EQ(-ENFILE, err);
+}
+
+TEST_F(ipmr, mrt_del_mfc_netlink_netns_dismantle)
+{
+	struct vifctl vifs[2] = {
+		{
+			.vifc_vifi = 0,
+			.vifc_flags = VIFF_USE_IFINDEX,
+			.vifc_lcl_ifindex = self->veth_ifindex,
+		},
+		{
+			.vifc_vifi = 1,
+			.vifc_flags = VIFF_REGISTER,
+		}
+	};
+	struct mfc_attr mfc_attr = {
+		.table = RT_TABLE_DEFAULT,
+		.origin = 0,
+		.group = 0,
+		.ifindex = self->veth_ifindex,
+		.proxy = false,
+	};
+	int i, err;
+
+	for (i = 0; i < 2; i++) {
+		/* Create 2 VIFs just to avoid -ENFILE later. */
+		err = setsockopt(self->raw_sk,
+				 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+				 &vifs[i],  sizeof(vifs[i]));
+		ASSERT_EQ(0, err);
+	}
+
+	/* Create a MFC for mrt->vif_table[0]. */
+	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
+	ASSERT_EQ(0, err);
+
+	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
+	ASSERT_EQ(0, err);
+
+	/* Remove mrt->vif_table[0]. */
+	err = system("ip link del veth0");
+	ASSERT_EQ(0, err);
+
+	/* MFC entry is NOT removed even if the tied VIF is removed... */
+	err = system("cat /proc/net/ip_mr_cache | grep -q '00000000 00000000' ");
+	ASSERT_EQ(0, err);
+
+	/* ... and netlink is not capable of removing such an entry
+	 * because netlink always requires a valid RTA_IIF ... :/
+	 */
+	err = nl_sendmsg_mfc(_metadata, self, RTM_DELROUTE, &mfc_attr);
+	ASSERT_EQ(-ENODEV, err);
+
+	/* It can be removed by setsockopt(), but let cleanup_net() remove this time. */
+}
+
+TEST_F(ipmr, mrt_table_flush)
+{
+	struct vifctl vif = {
+		.vifc_vifi = 0,
+		.vifc_flags = VIFF_USE_IFINDEX,
+		.vifc_lcl_ifindex = self->veth_ifindex,
+	};
+	struct mfc_attr mfc_attr = {
+		.origin = 0,
+		.group = 0,
+		.ifindex = self->veth_ifindex,
+		.proxy = false,
+	};
+	int table_id = 92;
+	int err, flags;
+
+	/* Set a random table id rather than RT_TABLE_DEFAULT.
+	 * Note that /proc/net/ip_mr_{vif,cache} only supports RT_TABLE_DEFAULT.
+	 */
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_TABLE - MRT_BASE],
+			 &table_id,  sizeof(table_id));
+	ASSERT_EQ(0, err);
+
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_ADD_VIF - MRT_BASE],
+			 &vif,  sizeof(vif));
+	ASSERT_EQ(0, err);
+
+	mfc_attr.table = table_id;
+	err = nl_sendmsg_mfc(_metadata, self, RTM_NEWROUTE, &mfc_attr);
+	ASSERT_EQ(0, err);
+
+	/* Flush mrt->vif_table[] and all caches. */
+	flags = MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
+		MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC;
+	err = setsockopt(self->raw_sk,
+			 variant->level, variant->opts[MRT_FLUSH - MRT_BASE],
+			 &flags,  sizeof(flags));
+	ASSERT_EQ(0, err);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 8de80acf249e..ac8358bcb22c 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
+#shellcheck disable=SC2034 # SC doesn't see our uses of global variables
 
 ##############################################################################
 # Topology description. p1 looped back to p2, p3 to p4 and so on.
@@ -37,6 +38,7 @@ declare -A NETIFS=(
 : "${TEAMD:=teamd}"
 : "${MCD:=smcrouted}"
 : "${MC_CLI:=smcroutectl}"
+: "${MCD_TABLE_NAME:=selftests}"
 
 # Constants for netdevice bring-up:
 # Default time in seconds to wait for an interface to come up before giving up
@@ -141,6 +143,20 @@ check_tc_version()
 	fi
 }
 
+check_tc_erspan_support()
+{
+	local dev=$1; shift
+
+	tc filter add dev $dev ingress pref 1 handle 1 flower \
+		erspan_opts 1:0:0:0 &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing erspan support"
+		return $ksft_skip
+	fi
+	tc filter del dev $dev ingress pref 1 handle 1 flower \
+		erspan_opts 1:0:0:0 &> /dev/null
+}
+
 # Old versions of tc don't understand "mpls_uc"
 check_tc_mpls_support()
 {
@@ -291,16 +307,6 @@ if [[ "$CHECK_TC" = "yes" ]]; then
 	check_tc_version
 fi
 
-require_command()
-{
-	local cmd=$1; shift
-
-	if [[ ! -x "$(command -v "$cmd")" ]]; then
-		echo "SKIP: $cmd not installed"
-		exit $ksft_skip
-	fi
-}
-
 # IPv6 support was added in v3.0
 check_mtools_version()
 {
@@ -335,17 +341,145 @@ fi
 ##############################################################################
 # Command line options handling
 
-count=0
+check_env() {
+	if [[ ! (( -n "$LOCAL_V4" && -n "$REMOTE_V4") ||
+		 ( -n "$LOCAL_V6" && -n "$REMOTE_V6" )) ]]; then
+		echo "SKIP: Invalid environment, missing or inconsistent LOCAL_V4/REMOTE_V4/LOCAL_V6/REMOTE_V6"
+		echo "Please see tools/testing/selftests/drivers/net/README.rst"
+		exit "$ksft_skip"
+	fi
+
+	if [[ -z "$REMOTE_TYPE" ]]; then
+		echo "SKIP: Invalid environment, missing REMOTE_TYPE"
+		exit "$ksft_skip"
+	fi
+
+	if [[ -z "$REMOTE_ARGS" ]]; then
+		echo "SKIP: Invalid environment, missing REMOTE_ARGS"
+		exit "$ksft_skip"
+	fi
+}
+
+__run_on()
+{
+	local target=$1; shift
+	local type args
+
+	IFS=':' read -r type args <<< "$target"
+
+	case "$type" in
+	netns)
+		# Execute command in network namespace
+		# args contains the namespace name
+		ip netns exec "$args" "$@"
+		;;
+	ssh)
+		# Execute command via SSH args contains user@host
+		ssh -n "$args" "$@"
+		;;
+	local|*)
+		# Execute command locally. This is also the fallback
+		# case for when the interface's target is not found in
+		# the TARGETS array.
+		"$@"
+		;;
+	esac
+}
+
+run_on()
+{
+	local iface=$1; shift
+	local target="local:"
+
+	if [ "${DRIVER_TEST_CONFORMANT}" = "yes" ]; then
+		target="${TARGETS[$iface]}"
+	fi
+
+	__run_on "$target" "$@"
+}
+
+get_ifname_by_ip()
+{
+	local target=$1; shift
+	local ip_addr=$1; shift
+
+	__run_on "$target" ip -j addr show to "$ip_addr" | jq -r '.[].ifname'
+}
+
+# Whether the test is conforming to the requirements and usage described in
+# drivers/net/README.rst.
+: "${DRIVER_TEST_CONFORMANT:=no}"
+
+declare -A TARGETS
+
+# Based on DRIVER_TEST_CONFORMANT, decide if to source drivers/net/net.config
+# or not. In the "yes" case, the test expects to pass the arguments through the
+# variables specified in drivers/net/README.rst file. If not, fallback on
+# parsing the script arguments for interface names.
+if [ "${DRIVER_TEST_CONFORMANT}" = "yes" ]; then
+	if [[ -f $net_forwarding_dir/../../drivers/net/net.config ]]; then
+		source "$net_forwarding_dir/../../drivers/net/net.config"
+	fi
+
+	if (( NUM_NETIFS > 2)); then
+		echo "SKIP: DRIVER_TEST_CONFORMANT=yes and NUM_NETIFS is bigger than 2"
+		exit "$ksft_skip"
+	fi
+
+	check_env
+
+	# Populate the NETIFS and TARGETS arrays automatically based on the
+	# environment variables. The TARGETS array is indexed by the network
+	# interface name keeping track of the target on which the interface
+	# resides. Values will be strings of the following format -
+	# <type>:<args>.
+	#
+	# TARGETS[eth0]="local:" - meaning that the eth0 interface is
+	# accessible locally
+	# TARGETS[eth1]="netns:foo" - eth1 is in the foo netns
+	# TARGETS[eth2]="ssh:root@10.0.0.2" - eth2 is accessible through
+	# running the 'ssh root@10.0.0.2' command.
 
-while [[ $# -gt 0 ]]; do
-	if [[ "$count" -eq "0" ]]; then
+	unset NETIFS
+	declare -A NETIFS
+
+	NETIFS[p1]="$NETIF"
+	TARGETS[$NETIF]="local:"
+
+	# Locate the name of the remote interface
+	remote_target="$REMOTE_TYPE:$REMOTE_ARGS"
+	if [[ -v REMOTE_V4 ]]; then
+		remote_netif=$(get_ifname_by_ip "$remote_target" "$REMOTE_V4")
+	else
+		remote_netif=$(get_ifname_by_ip "$remote_target" "$REMOTE_V6")
+	fi
+	if [[ ! -n "$remote_netif" ]]; then
+		echo "SKIP: cannot find remote interface"
+		exit "$ksft_skip"
+	fi
+
+	if [[ "$NETIF" == "$remote_netif" ]]; then
+		echo "SKIP: local and remote interfaces cannot have the same name"
+		exit "$ksft_skip"
+	fi
+
+	NETIFS[p2]="$remote_netif"
+	TARGETS[$remote_netif]="$REMOTE_TYPE:$REMOTE_ARGS"
+else
+	count=0
+	# Prime NETIFS from the command line, but retain if none given.
+	if [[ $# -gt 0 ]]; then
 		unset NETIFS
 		declare -A NETIFS
+
+		while [[ $# -gt 0 ]]; do
+			count=$((count + 1))
+			NETIFS[p$count]="$1"
+			TARGETS[$1]="local:"
+			shift
+		done
 	fi
-	count=$((count + 1))
-	NETIFS[p$count]="$1"
-	shift
-done
+fi
 
 ##############################################################################
 # Network interfaces configuration
@@ -413,10 +547,11 @@ mac_addr_prepare()
 		dev=${NETIFS[p$i]}
 		new_addr=$(printf "00:01:02:03:04:%02x" $i)
 
-		MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address')
+		MAC_ADDR_ORIG["$dev"]=$(run_on "$dev" \
+			ip -j link show dev "$dev" | jq -e '.[].address')
 		# Strip quotes
 		MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
-		ip link set dev $dev address $new_addr
+		run_on "$dev" ip link set dev "$dev" address $new_addr
 	done
 }
 
@@ -426,7 +561,8 @@ mac_addr_restore()
 
 	for ((i = 1; i <= NUM_NETIFS; ++i)); do
 		dev=${NETIFS[p$i]}
-		ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]}
+		run_on "$dev" \
+			ip link set dev "$dev" address ${MAC_ADDR_ORIG["$dev"]}
 	done
 }
 
@@ -439,7 +575,9 @@ if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
 fi
 
 for ((i = 1; i <= NUM_NETIFS; ++i)); do
-	ip link show dev ${NETIFS[p$i]} &> /dev/null
+	int="${NETIFS[p$i]}"
+
+	run_on "$int" ip link show dev "$int" &> /dev/null
 	if [[ $? -ne 0 ]]; then
 		echo "SKIP: could not find all required interfaces"
 		exit $ksft_skip
@@ -522,7 +660,7 @@ setup_wait_dev_with_timeout()
 	local i
 
 	for ((i = 1; i <= $max_iterations; ++i)); do
-		ip link show dev $dev up \
+		run_on "$dev" ip link show dev "$dev" up \
 			| grep 'state UP' &> /dev/null
 		if [[ $? -ne 0 ]]; then
 			sleep 1
@@ -535,9 +673,9 @@ setup_wait_dev_with_timeout()
 	return 1
 }
 
-setup_wait()
+setup_wait_n()
 {
-	local num_netifs=${1:-$NUM_NETIFS}
+	local num_netifs=$1; shift
 	local i
 
 	for ((i = 1; i <= num_netifs; ++i)); do
@@ -548,6 +686,11 @@ setup_wait()
 	sleep $WAIT_TIME
 }
 
+setup_wait()
+{
+	setup_wait_n "$NUM_NETIFS"
+}
+
 wait_for_dev()
 {
         local dev=$1; shift
@@ -561,30 +704,6 @@ wait_for_dev()
         fi
 }
 
-cmd_jq()
-{
-	local cmd=$1
-	local jq_exp=$2
-	local jq_opts=$3
-	local ret
-	local output
-
-	output="$($cmd)"
-	# it the command fails, return error right away
-	ret=$?
-	if [[ $ret -ne 0 ]]; then
-		return $ret
-	fi
-	output=$(echo $output | jq -r $jq_opts "$jq_exp")
-	ret=$?
-	if [[ $ret -ne 0 ]]; then
-		return $ret
-	fi
-	echo $output
-	# return success only in case of non-empty output
-	[ ! -z "$output" ]
-}
-
 pre_cleanup()
 {
 	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
@@ -613,6 +732,12 @@ vrf_cleanup()
 	ip -4 rule del pref 32765
 }
 
+adf_vrf_prepare()
+{
+	vrf_prepare
+	defer vrf_cleanup
+}
+
 __last_tb_id=0
 declare -A __TB_IDS
 
@@ -725,6 +850,12 @@ simple_if_fini()
 	vrf_destroy $vrf_name
 }
 
+adf_simple_if_init()
+{
+	simple_if_init "$@"
+	defer simple_if_fini "$@"
+}
+
 tunnel_create()
 {
 	local name=$1; shift
@@ -833,8 +964,15 @@ ethtool_std_stats_get()
 	local name=$1; shift
 	local src=$1; shift
 
-	ethtool --json -S $dev --groups $grp -- --src $src | \
-		jq '.[]."'"$grp"'"."'$name'"'
+	if [[ "$grp" == "pause" ]]; then
+		run_on "$dev" ethtool -I --json -a "$dev" --src "$src" | \
+			jq --arg name "$name" '.[].statistics[$name]'
+		return
+	fi
+
+	run_on "$dev" \
+		ethtool --json -S "$dev" --groups "$grp" -- --src "$src" | \
+		jq --arg grp "$grp" --arg name "$name" '.[][$grp][$name]'
 }
 
 qdisc_stats_get()
@@ -1025,6 +1163,12 @@ forwarding_restore()
 	sysctl_restore net.ipv4.conf.all.forwarding
 }
 
+adf_forwarding_enable()
+{
+	forwarding_enable
+	defer forwarding_restore
+}
+
 declare -A MTU_ORIG
 mtu_set()
 {
@@ -1281,8 +1425,8 @@ ping_do()
 
 	vrf_name=$(master_name_get $if_name)
 	ip vrf exec $vrf_name \
-		$PING $args $dip -c $PING_COUNT -i 0.1 \
-		-w $PING_TIMEOUT &> /dev/null
+		$PING $args -c $PING_COUNT -i 0.1 \
+		-w $PING_TIMEOUT $dip &> /dev/null
 }
 
 ping_test()
@@ -1312,8 +1456,8 @@ ping6_do()
 
 	vrf_name=$(master_name_get $if_name)
 	ip vrf exec $vrf_name \
-		$PING6 $args $dip -c $PING_COUNT -i 0.1 \
-		-w $PING_TIMEOUT &> /dev/null
+		$PING6 $args -c $PING_COUNT -i 0.1 \
+		-w $PING_TIMEOUT $dip &> /dev/null
 }
 
 ping6_test()
@@ -1606,12 +1750,17 @@ tcpdump_start()
 	sleep 1
 }
 
-tcpdump_stop()
+tcpdump_stop_nosleep()
 {
 	local if_name=$1
 	local pid=${cappid[$if_name]}
 
 	$ns_cmd kill "$pid" && wait "$pid"
+}
+
+tcpdump_stop()
+{
+	tcpdump_stop_nosleep "$1"
 	sleep 1
 }
 
@@ -1626,7 +1775,7 @@ tcpdump_show()
 {
 	local if_name=$1
 
-	tcpdump -e -n -r ${capfile[$if_name]} 2>&1
+	tcpdump -e -nn -r ${capfile[$if_name]} 2>&1
 }
 
 # return 0 if the packet wasn't seen on host2_if or 1 if it was
@@ -1767,6 +1916,51 @@ mc_send()
 		msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
 }
 
+adf_mcd_start()
+{
+	local ifs=("$@")
+
+	local table_name="$MCD_TABLE_NAME"
+	local smcroutedir
+	local pid
+	local if
+	local i
+
+	check_command "$MCD" || return 1
+	check_command "$MC_CLI" || return 1
+
+	smcroutedir=$(mktemp -d)
+	defer rm -rf "$smcroutedir"
+
+	for ((i = 1; i <= NUM_NETIFS; ++i)); do
+		echo "phyint ${NETIFS[p$i]} enable" >> \
+			"$smcroutedir/$table_name.conf"
+	done
+
+	for if in "${ifs[@]}"; do
+		if ! ip_link_has_flag "$if" MULTICAST; then
+			ip link set dev "$if" multicast on
+			defer ip link set dev "$if" multicast off
+		fi
+
+		echo "phyint $if enable" >> \
+			"$smcroutedir/$table_name.conf"
+	done
+
+	"$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \
+		-P "$smcroutedir/$table_name.pid"
+	busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid"
+	pid=$(cat "$smcroutedir/$table_name.pid")
+	defer kill_process "$pid"
+}
+
+mc_cli()
+{
+	local table_name="$MCD_TABLE_NAME"
+
+        "$MC_CLI" -I "$table_name" "$@"
+}
+
 start_ip_monitor()
 {
 	local mtype=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
index ff2accccaf4d..b4eda6c6199e 100755
--- a/tools/testing/selftests/net/forwarding/lib_sh_test.sh
+++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
@@ -30,6 +30,11 @@ tfail()
 	do_test "tfail" false
 }
 
+tfail2()
+{
+	do_test "tfail2" false
+}
+
 txfail()
 {
 	FAIL_TO_XFAIL=yes do_test "txfail" false
@@ -132,6 +137,8 @@ test_ret()
 	ret_subtest $ksft_fail "tfail" txfail tfail
 
 	ret_subtest $ksft_xfail "txfail" txfail txfail
+
+	ret_subtest $ksft_fail "tfail2" tfail2 tfail
 }
 
 exit_status_tests_run()
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index ecd34f364125..15b1a1255a41 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -57,21 +57,21 @@ PTP_1588_L2_PDELAY_REQ=" \
 PTP_1588_IPV4_SYNC=" \
 01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \
 00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \
-01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \
+01 81 01 3f 01 3f 00 34 9f 41 00 02 00 2c 00 00 \
 02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \
 63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \
 00 00 00 00 00 00"
 PTP_1588_IPV4_FOLLOW_UP="
 01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \
 00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \
-01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \
+01 81 01 40 01 40 00 34 eb 8a 08 02 00 2c 00 00 \
 00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \
 63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \
 c6 0f 1d 9a 61 87"
 PTP_1588_IPV4_PDELAY_REQ=" \
 01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \
 00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \
-00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \
+00 6b 01 3f 01 3f 00 3e 9a b9 02 02 00 36 00 00 \
 00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \
 63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \
 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00"
@@ -79,7 +79,7 @@ PTP_1588_IPV6_SYNC=" \
 33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \
 7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \
 00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \
-00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \
+00 00 00 00 01 81 01 3f 01 3f 00 36 14 76 00 02 \
 00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \
 00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \
 00 00 00 00 00 00 00 00 00 00 00 00"
@@ -87,7 +87,7 @@ PTP_1588_IPV6_FOLLOW_UP=" \
 33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \
 00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \
 00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \
-00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \
+00 00 00 00 01 81 01 40 01 40 00 36 f0 47 08 02 \
 00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
 00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \
 00 00 66 83 c6 2a 32 09 bd 74 00 00"
@@ -95,11 +95,20 @@ PTP_1588_IPV6_PDELAY_REQ=" \
 33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \
 5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \
 63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \
-00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \
+00 00 00 00 00 6b 01 3f 01 3f 00 40 89 1f 02 02 \
 00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
 00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \
 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
 00 00 00 00 00 00"
+LINK_LOCAL_STP_BPDU=" \
+01:80:c2:00:00:00 00:00:de:ad:be:ef 00 26 42 42 03 \
+00 00 00 00 00 80 00 aa bb cc dd ee ff 00 00 00 00 \
+80 00 aa bb cc dd ee ff 80 01 00 00 14 00 02 00 \
+0f 00"
+LINK_LOCAL_LLDP=" \
+01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:cc 02 07 04 \
+00 11 22 33 44 55 04 05 05 65 74 68 30 06 02 00 \
+78 00 00"
 
 # Disable promisc to ensure we don't receive unknown MAC DA packets
 export TCPDUMP_EXTRA_FLAGS="-pl"
@@ -176,6 +185,8 @@ run_test()
 	local rcv_dmac=$(mac_get $rcv_if_name)
 	local should_receive
 
+	setup_wait
+
 	tcpdump_start $rcv_if_name
 
 	mc_route_prepare $send_if_name
@@ -211,7 +222,15 @@ run_test()
 	mc_route_destroy $rcv_if_name
 	mc_route_destroy $send_if_name
 
+	ip maddress add 01:80:c2:00:00:00 dev $rcv_if_name
+	send_raw $send_if_name "$LINK_LOCAL_STP_BPDU"
+	ip maddress del 01:80:c2:00:00:00 dev $rcv_if_name
+
 	if [ $skip_ptp = false ]; then
+		ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name
+		send_raw $send_if_name "$LINK_LOCAL_LLDP"
+		ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name
+
 		ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name
 		send_raw $send_if_name "$PTP_1588_L2_SYNC"
 		send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP"
@@ -302,41 +321,49 @@ run_test()
 		"$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \
 		true "$test_name"
 
+	check_rcv $rcv_if_name "Link-local STP BPDU" \
+		"> 01:80:c2:00:00:00" \
+		true "$test_name"
+
 	if [ $skip_ptp = false ]; then
+		check_rcv $rcv_if_name "Link-local LLDP" \
+			"> 01:80:c2:00:00:0e" \
+			true "$test_name"
+
 		check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \
-			"ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \
+			"ethertype PTP (0x88f7).* PTPv2.* msg type *: sync msg" \
 			true "$test_name"
 
 		check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \
-			"ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \
+			"ethertype PTP (0x88f7).* PTPv2.* msg type *: follow up msg" \
 			true "$test_name"
 
 		check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \
-			"ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \
+			"ethertype PTP (0x88f7).* PTPv2.* msg type *: peer delay req msg" \
 			true "$test_name"
 
 		check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \
-			"ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \
+			"ethertype IPv4 (0x0800).* PTPv2.* msg type *: sync msg" \
 			true "$test_name"
 
 		check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \
-			"ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \
+			"ethertype IPv4 (0x0800).* PTPv2.* msg type *: follow up msg" \
 			true "$test_name"
 
 		check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \
-			"ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \
+			"ethertype IPv4 (0x0800).* PTPv2.* msg type *: peer delay req msg" \
 			true "$test_name"
 
 		check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \
-			"ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \
+			"ethertype IPv6 (0x86dd).* PTPv2.* msg type *: sync msg" \
 			true "$test_name"
 
 		check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \
-			"ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \
+			"ethertype IPv6 (0x86dd).* PTPv2.* msg type *: follow up msg" \
 			true "$test_name"
 
 		check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \
-			"ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \
+			"ethertype IPv6 (0x86dd).* PTPv2.* msg type *: peer delay req msg" \
 			true "$test_name"
 	fi
 
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
index a20d22d1df36..8d4ae6c952a1 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -238,7 +238,7 @@ test_lag_slave()
 	ip neigh flush dev br1
 	setup_wait_dev $up_dev
 	setup_wait_dev $host_dev
-	$ARPING -I br1 192.0.2.130 -qfc 1
+	$ARPING -I br1 -qfc 1 192.0.2.130
 	sleep 2
 	mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10"
 
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
index 1b902cc579f6..a21c771908b3 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -196,7 +196,7 @@ test_span_gre_forbidden_egress()
 
 	bridge vlan add dev $swp3 vid 555
 	# Re-prime FDB
-	$ARPING -I br1.555 192.0.2.130 -fqc 1
+	$ARPING -I br1.555 -fqc 1 192.0.2.130
 	sleep 1
 	quick_test_span_gre_dir $tundev
 
@@ -290,7 +290,7 @@ test_span_gre_fdb_roaming()
 
 	bridge fdb del dev $swp2 $h3mac vlan 555 master 2>/dev/null
 	# Re-prime FDB
-	$ARPING -I br1.555 192.0.2.130 -fqc 1
+	$ARPING -I br1.555 -fqc 1 192.0.2.130
 	sleep 1
 	quick_test_span_gre_dir $tundev
 
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
index af008fbf2725..eb2d8034de9c 100755
--- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -98,12 +98,20 @@ setup_prepare()
 	h1_create
 	h2_create
 	switch_create
+
+	if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then
+		sysctl_set net.bridge.bridge-nf-call-iptables 0
+	fi
 }
 
 cleanup()
 {
 	pre_cleanup
 
+	if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then
+		sysctl_restore net.bridge.bridge-nf-call-iptables
+	fi
+
 	switch_destroy
 	h2_destroy
 	h1_destroy
diff --git a/tools/testing/selftests/net/forwarding/pedit_ip.sh b/tools/testing/selftests/net/forwarding/pedit_ip.sh
index d14efb2d23b2..9235674627ab 100755
--- a/tools/testing/selftests/net/forwarding/pedit_ip.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_ip.sh
@@ -91,12 +91,20 @@ setup_prepare()
 	h1_create
 	h2_create
 	switch_create
+
+	if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then
+		sysctl_set net.bridge.bridge-nf-call-iptables 0
+	fi
 }
 
 cleanup()
 {
 	pre_cleanup
 
+	if [ -f /proc/sys/net/bridge/bridge-nf-call-iptables ]; then
+		sysctl_restore net.bridge.bridge-nf-call-iptables
+	fi
+
 	switch_destroy
 	h2_destroy
 	h1_destroy
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index b98ea9449b8b..dfb6646cb97b 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -18,6 +18,8 @@
 # | 2001:db8:1::1/64                             2001:db8:2::1/64   |
 # |                                                                 |
 # +-----------------------------------------------------------------+
+#
+#shellcheck disable=SC2034 # SC doesn't see our uses of global variables
 
 ALL_TESTS="
 	ping_ipv4
@@ -27,6 +29,7 @@ ALL_TESTS="
 	ipv4_sip_equal_dip
 	ipv6_sip_equal_dip
 	ipv4_dip_link_local
+	ipv4_sip_link_local
 "
 
 NUM_NETIFS=4
@@ -330,6 +333,32 @@ ipv4_dip_link_local()
 	tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
 }
 
+ipv4_sip_link_local()
+{
+	local sip=169.254.1.1
+
+	RET=0
+
+	# Disable rpfilter to prevent packets to be dropped because of it.
+	sysctl_set net.ipv4.conf.all.rp_filter 0
+	sysctl_set net.ipv4.conf."$rp1".rp_filter 0
+
+	tc filter add dev "$rp2" egress protocol ip pref 1 handle 101 \
+		flower src_ip "$sip" action pass
+
+	$MZ "$h1" -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b "$rp1mac" \
+		-A "$sip" -B 198.51.100.2 -q
+
+	tc_check_packets "dev $rp2 egress" 101 5
+	check_err $? "Packets were dropped"
+
+	log_test "IPv4 source IP is link-local"
+
+	tc filter del dev "$rp2" egress protocol ip pref 1 handle 101 flower
+	sysctl_restore net.ipv4.conf."$rp1".rp_filter
+	sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh
index 5a58b1ec8aef..83e52abdbc2e 100755
--- a/tools/testing/selftests/net/forwarding/router_multicast.sh
+++ b/tools/testing/selftests/net/forwarding/router_multicast.sh
@@ -33,10 +33,6 @@ NUM_NETIFS=6
 source lib.sh
 source tc_common.sh
 
-require_command $MCD
-require_command $MC_CLI
-table_name=selftests
-
 h1_create()
 {
 	simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64
@@ -149,25 +145,6 @@ router_destroy()
 	ip link set dev $rp1 down
 }
 
-start_mcd()
-{
-	SMCROUTEDIR="$(mktemp -d)"
-
-	for ((i = 1; i <= $NUM_NETIFS; ++i)); do
-		echo "phyint ${NETIFS[p$i]} enable" >> \
-			$SMCROUTEDIR/$table_name.conf
-	done
-
-	$MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
-		-P $SMCROUTEDIR/$table_name.pid
-}
-
-kill_mcd()
-{
-	pkill $MCD
-	rm -rf $SMCROUTEDIR
-}
-
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
@@ -179,7 +156,7 @@ setup_prepare()
 	rp3=${NETIFS[p5]}
 	h3=${NETIFS[p6]}
 
-	start_mcd
+	adf_mcd_start || exit "$EXIT_STATUS"
 
 	vrf_prepare
 
@@ -206,7 +183,7 @@ cleanup()
 
 	vrf_cleanup
 
-	kill_mcd
+	defer_scopes_cleanup
 }
 
 create_mcast_sg()
@@ -214,9 +191,9 @@ create_mcast_sg()
 	local if_name=$1; shift
 	local s_addr=$1; shift
 	local mcast=$1; shift
-	local dest_ifs=${@}
+	local dest_ifs=("${@}")
 
-	$MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs
+	mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
 }
 
 delete_mcast_sg()
@@ -224,9 +201,9 @@ delete_mcast_sg()
 	local if_name=$1; shift
 	local s_addr=$1; shift
 	local mcast=$1; shift
-	local dest_ifs=${@}
+	local dest_ifs=("${@}")
 
-        $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs
+        mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
 }
 
 mcast_v4()
diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh
index 1f6f53e284b5..6269d5e23487 100755
--- a/tools/testing/selftests/net/forwarding/sch_ets.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets.sh
@@ -11,6 +11,7 @@ ALL_TESTS="
 	ets_test_strict
 	ets_test_mixed
 	ets_test_dwrr
+	ets_test_plug
 	classifier_mode
 	ets_test_strict
 	ets_test_mixed
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh
index 8f9922c695b0..0453210271dc 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_core.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh
@@ -165,8 +165,7 @@ h1_create()
 {
 	local i;
 
-	simple_if_init $h1
-	defer simple_if_fini $h1
+	adf_simple_if_init $h1
 
 	mtu_set $h1 9900
 	defer mtu_restore $h1
@@ -182,8 +181,7 @@ h2_create()
 {
 	local i
 
-	simple_if_init $h2
-	defer simple_if_fini $h2
+	adf_simple_if_init $h2
 
 	mtu_set $h2 9900
 	defer mtu_restore $h2
@@ -251,8 +249,7 @@ setup_prepare()
 	put=$swp2
 	hut=$h2
 
-	vrf_prepare
-	defer vrf_cleanup
+	adf_vrf_prepare
 
 	h1_create
 	h2_create
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
index 08240d3e3c87..79d837a2868a 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
@@ -224,3 +224,11 @@ ets_test_dwrr()
 	ets_set_dwrr_two_bands
 	xfail_on_slow ets_dwrr_test_01
 }
+
+ets_test_plug()
+{
+	ets_change_qdisc $put 2 "3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3" "1514 1514"
+	tc qdisc add dev $put handle 20: parent 10:4 plug
+	start_traffic_pktsize 100 $h1.10 192.0.2.1 192.0.2.2 00:c1:a0:c1:a0:00 "-c 1"
+	ets_qdisc_setup $put 2
+}
diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
index af166662b78a..f2a3d9254642 100755
--- a/tools/testing/selftests/net/forwarding/sch_red.sh
+++ b/tools/testing/selftests/net/forwarding/sch_red.sh
@@ -52,8 +52,7 @@ PKTSZ=1400
 
 h1_create()
 {
-	simple_if_init $h1 192.0.2.1/28
-	defer simple_if_fini $h1 192.0.2.1/28
+	adf_simple_if_init $h1 192.0.2.1/28
 
 	mtu_set $h1 10000
 	defer mtu_restore $h1
@@ -65,8 +64,7 @@ h1_create()
 
 h2_create()
 {
-	simple_if_init $h2 192.0.2.2/28
-	defer simple_if_fini $h2 192.0.2.2/28
+	adf_simple_if_init $h2 192.0.2.2/28
 
 	mtu_set $h2 10000
 	defer mtu_restore $h2
@@ -74,8 +72,7 @@ h2_create()
 
 h3_create()
 {
-	simple_if_init $h3 192.0.2.3/28
-	defer simple_if_fini $h3 192.0.2.3/28
+	adf_simple_if_init $h3 192.0.2.3/28
 
 	mtu_set $h3 10000
 	defer mtu_restore $h3
@@ -125,8 +122,7 @@ setup_prepare()
 
 	h3_mac=$(mac_get $h3)
 
-	vrf_prepare
-	defer vrf_cleanup
+	adf_vrf_prepare
 
 	h1_create
 	h2_create
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
index ec309a5086bc..070c17faa9e4 100644
--- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
@@ -59,8 +59,7 @@ host_create()
 	local dev=$1; shift
 	local host=$1; shift
 
-	simple_if_init $dev
-	defer simple_if_fini $dev
+	adf_simple_if_init $dev
 
 	mtu_set $dev 10000
 	defer mtu_restore $dev
@@ -149,8 +148,7 @@ setup_prepare()
 
 	h2_mac=$(mac_get $h2)
 
-	vrf_prepare
-	defer vrf_cleanup
+	adf_vrf_prepare
 
 	h1_create
 	h2_create
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index ea89e558672d..86edbc7e2489 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -223,7 +223,7 @@ mirred_egress_to_ingress_tcp_test()
 		ip_proto icmp \
 			action drop
 
-	ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
+	ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 > $mirred_e2i_tf2 &
 	local rpid=$!
 	ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
 	wait -n $rpid
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index b1daad19b01e..b58909a93112 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -6,7 +6,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
 	match_ip_tos_test match_indev_test match_ip_ttl_test
 	match_mpls_label_test \
 	match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \
-	match_mpls_lse_test"
+	match_mpls_lse_test match_erspan_opts_test"
 NUM_NETIFS=2
 source tc_common.sh
 source lib.sh
@@ -676,6 +676,56 @@ match_mpls_lse_test()
 	log_test "mpls lse match ($tcflags)"
 }
 
+match_erspan_opts_test()
+{
+	RET=0
+
+	check_tc_erspan_support $h2 || return 0
+
+	# h1 erspan setup
+	tunnel_create erspan1 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1001 \
+		tos C ttl 64 erspan_ver 1 erspan 6789 # ERSPAN Type II
+	tunnel_create erspan2 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1002 \
+		tos C ttl 64 erspan_ver 2 erspan_dir egress erspan_hwid 63 \
+		# ERSPAN Type III
+	ip link set dev erspan1 master v$h1
+	ip link set dev erspan2 master v$h1
+	# h2 erspan setup
+	ip link add ep-ex type erspan ttl 64 external # To collect tunnel info
+	ip link set ep-ex up
+	ip link set dev ep-ex master v$h2
+	tc qdisc add dev ep-ex clsact
+
+	# ERSPAN Type II [decap direction]
+	tc filter add dev ep-ex ingress protocol ip  handle 101 flower \
+		$tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+		enc_key_id 1001 erspan_opts 1:6789:0:0 \
+		action drop
+	# ERSPAN Type III [decap direction]
+	tc filter add dev ep-ex ingress protocol ip  handle 102 flower \
+		$tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+		enc_key_id 1002 erspan_opts 2:0:1:63 action drop
+
+	ep1mac=$(mac_get erspan1)
+	$MZ erspan1 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+	tc_check_packets "dev ep-ex ingress" 101 1
+	check_err $? "ERSPAN Type II"
+
+	ep2mac=$(mac_get erspan2)
+	$MZ erspan2 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+	tc_check_packets "dev ep-ex ingress" 102 1
+	check_err $? "ERSPAN Type III"
+
+	# h2 erspan cleanup
+	tc qdisc del dev ep-ex clsact
+	tunnel_destroy ep-ex
+	# h1 erspan cleanup
+	tunnel_destroy erspan2 # ERSPAN Type III
+	tunnel_destroy erspan1 # ERSPAN Type II
+
+	log_test "erspan_opts match ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh
new file mode 100755
index 000000000000..8992aeabfe0b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh
@@ -0,0 +1,421 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS=" \
+	test_clock_jump_backward \
+	test_taprio_after_ptp \
+	test_max_sdu \
+	test_clock_jump_backward_forward \
+"
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+source tsn_lib.sh
+
+require_command python3
+
+# The test assumes the usual topology from the README, where h1 is connected to
+# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge.
+# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2.
+# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to
+# swp1 (and both to CLOCK_REALTIME).
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+UDS_ADDRESS_H1="/var/run/ptp4l_h1"
+UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1"
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# Tunables
+NUM_PKTS=100
+STREAM_VID=10
+STREAM_PRIO_1=6
+STREAM_PRIO_2=5
+STREAM_PRIO_3=4
+# PTP uses TC 0
+ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2))
+# Use a conservative cycle of 10 ms to allow the test to still pass when the
+# kernel has some extra overhead like lockdep etc
+CYCLE_TIME_NS=10000000
+# Create two Gate Control List entries, one OPEN and one CLOSE, of equal
+# durations
+GATE_DURATION_NS=$((CYCLE_TIME_NS / 2))
+# Give 2/3 of the cycle time to user space and 1/3 to the kernel
+FUDGE_FACTOR=$((CYCLE_TIME_NS / 3))
+# Shift the isochron base time by half the gate time, so that packets are
+# always received by swp1 close to the middle of the time slot, to minimize
+# inaccuracies due to network sync
+SHIFT_TIME_NS=$((GATE_DURATION_NS / 2))
+
+path_delay=
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+switch_create()
+{
+	local h2_mac_addr=$(mac_get $h2)
+
+	ip link set $swp1 up
+	ip link set $swp2 up
+
+	ip link add br0 type bridge vlan_filtering 1
+	ip link set $swp1 master br0
+	ip link set $swp2 master br0
+	ip link set br0 up
+
+	bridge vlan add dev $swp2 vid $STREAM_VID
+	bridge vlan add dev $swp1 vid $STREAM_VID
+	bridge fdb add dev $swp2 \
+		$h2_mac_addr vlan $STREAM_VID static master
+}
+
+switch_destroy()
+{
+	ip link del br0
+}
+
+ptp_setup()
+{
+	# Set up swp1 as a master PHC for h1, synchronized to the local
+	# CLOCK_REALTIME.
+	phc2sys_start $UDS_ADDRESS_SWP1
+	ptp4l_start $h1 true $UDS_ADDRESS_H1
+	ptp4l_start $swp1 false $UDS_ADDRESS_SWP1
+}
+
+ptp_cleanup()
+{
+	ptp4l_stop $swp1
+	ptp4l_stop $h1
+	phc2sys_stop
+}
+
+txtime_setup()
+{
+	local if_name=$1
+
+	tc qdisc add dev $if_name clsact
+	# Classify PTP on TC 7 and isochron on TC 6
+	tc filter add dev $if_name egress protocol 0x88f7 \
+		flower action skbedit priority 7
+	tc filter add dev $if_name egress protocol 802.1Q \
+		flower vlan_ethtype 0xdead action skbedit priority 6
+	tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \
+		queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+		map 0 1 2 3 4 5 6 7 \
+		hw 1
+	# Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1.
+	tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \
+		clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+	tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \
+		clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+	tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \
+		clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+}
+
+txtime_cleanup()
+{
+	local if_name=$1
+
+	tc qdisc del dev $if_name clsact
+	tc qdisc del dev $if_name root
+}
+
+taprio_replace()
+{
+	local if_name="$1"; shift
+	local extra_args="$1"; shift
+
+	# STREAM_PRIO_1 always has an open gate.
+	# STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time)
+	# STREAM_PRIO_3 always has a closed gate.
+	tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \
+		queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+		map 0 1 2 3 4 5 6 7 \
+		sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \
+		sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \
+		base-time 0 flags 0x2 $extra_args
+	taprio_wait_for_admin $if_name
+}
+
+taprio_cleanup()
+{
+	local if_name=$1
+
+	tc qdisc del dev $if_name root
+}
+
+probe_path_delay()
+{
+	local isochron_dat="$(mktemp)"
+	local received
+
+	log_info "Probing path delay"
+
+	isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \
+		"$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \
+		"$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat"
+
+	received=$(isochron_report_num_received "$isochron_dat")
+	if [ "$received" != "$NUM_PKTS" ]; then
+		echo "Cannot establish basic data path between $h1 and $h2"
+		exit $ksft_fail
+	fi
+
+	printf "pdelay = {}\n" > isochron_data.py
+	isochron report --input-file "$isochron_dat" \
+		--printf-format "pdelay[%u] = %d - %d\n" \
+		--printf-args "qRT" \
+		>> isochron_data.py
+	cat <<-'EOF' > isochron_postprocess.py
+	#!/usr/bin/env python3
+
+	from isochron_data import pdelay
+	import numpy as np
+
+	w = np.array(list(pdelay.values()))
+	print("{}".format(np.max(w)))
+	EOF
+	path_delay=$(python3 ./isochron_postprocess.py)
+
+	log_info "Path delay from $h1 to $h2 estimated at $path_delay ns"
+
+	if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then
+		echo "Path delay larger than gate duration, aborting"
+		exit $ksft_fail
+	fi
+
+	rm -f ./isochron_data.py 2> /dev/null
+	rm -f ./isochron_postprocess.py 2> /dev/null
+	rm -f "$isochron_dat" 2> /dev/null
+}
+
+setup_prepare()
+{
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+
+	txtime_setup $h1
+
+	# Temporarily set up PTP just to probe the end-to-end path delay.
+	ptp_setup
+	probe_path_delay
+	ptp_cleanup
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	isochron_recv_stop
+	txtime_cleanup $h1
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+run_test()
+{
+	local base_time=$1; shift
+	local stream_prio=$1; shift
+	local expected_delay=$1; shift
+	local should_fail=$1; shift
+	local test_name=$1; shift
+	local isochron_dat="$(mktemp)"
+	local received
+	local median_delay
+
+	RET=0
+
+	# Set the shift time equal to the cycle time, which effectively
+	# cancels the default advance time. Packets won't be sent early in
+	# software, which ensures that they won't prematurely enter through
+	# the open gate in __test_out_of_band(). Also, the gate is open for
+	# long enough that this won't cause a problem in __test_in_band().
+	isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \
+		"$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \
+		"$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat"
+
+	received=$(isochron_report_num_received "$isochron_dat")
+	[ "$received" = "$NUM_PKTS" ]
+	check_err_fail $should_fail $? "Reception of $NUM_PKTS packets"
+
+	if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then
+		printf "pdelay = {}\n" > isochron_data.py
+		isochron report --input-file "$isochron_dat" \
+			--printf-format "pdelay[%u] = %d - %d\n" \
+			--printf-args "qRT" \
+			>> isochron_data.py
+		cat <<-'EOF' > isochron_postprocess.py
+		#!/usr/bin/env python3
+
+		from isochron_data import pdelay
+		import numpy as np
+
+		w = np.array(list(pdelay.values()))
+		print("{}".format(int(np.median(w))))
+		EOF
+		median_delay=$(python3 ./isochron_postprocess.py)
+
+		# If the condition below is true, packets were delayed by a closed gate
+		[ "$median_delay" -gt $((path_delay + expected_delay)) ]
+		check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay"
+
+		# If the condition below is true, packets were sent expecting them to
+		# hit a closed gate in the switch, but were not delayed
+		[ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ]
+		check_fail $? "Median delay $median_delay is less than expected delay $expected_delay"
+	fi
+
+	log_test "$test_name"
+
+	rm -f ./isochron_data.py 2> /dev/null
+	rm -f ./isochron_postprocess.py 2> /dev/null
+	rm -f "$isochron_dat" 2> /dev/null
+}
+
+__test_always_open()
+{
+	run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open"
+}
+
+__test_always_closed()
+{
+	run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed"
+}
+
+__test_in_band()
+{
+	# Send packets in-band with the OPEN gate entry
+	run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate"
+}
+
+__test_out_of_band()
+{
+	# Send packets in-band with the CLOSE gate entry
+	run_test 0.005000000 $STREAM_PRIO_2 \
+		$((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \
+		"Out of band with gate"
+}
+
+run_subtests()
+{
+	__test_always_open
+	__test_always_closed
+	__test_in_band
+	__test_out_of_band
+}
+
+test_taprio_after_ptp()
+{
+	log_info "Setting up taprio after PTP"
+	ptp_setup
+	taprio_replace $swp2
+	run_subtests
+	taprio_cleanup $swp2
+	ptp_cleanup
+}
+
+__test_under_max_sdu()
+{
+	# Limit max-sdu for STREAM_PRIO_1
+	taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0"
+	run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU"
+}
+
+__test_over_max_sdu()
+{
+	# Limit max-sdu for STREAM_PRIO_1
+	taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0"
+	run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU"
+}
+
+test_max_sdu()
+{
+	ptp_setup
+	__test_under_max_sdu
+	__test_over_max_sdu
+	taprio_cleanup $swp2
+	ptp_cleanup
+}
+
+# Perform a clock jump in the past without synchronization running, so that the
+# time base remains where it was set by phc_ctl.
+test_clock_jump_backward()
+{
+	# This is a more complex schedule specifically crafted in a way that
+	# has been problematic on NXP LS1028A. Not much to test with it other
+	# than the fact that it passes traffic.
+	tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \
+		queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \
+		base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \
+		sched-entry S 20 300000 sched-entry S 48 200000 \
+		sched-entry S 20 300000 sched-entry S 83 200000 \
+		sched-entry S 40 300000 sched-entry S 00 200000 flags 2
+
+	log_info "Forcing a backward clock jump"
+	phc_ctl $swp1 set 0
+
+	ping_test $h1 192.0.2.2
+	taprio_cleanup $swp2
+}
+
+# Test that taprio tolerates clock jumps.
+# Since ptp4l and phc2sys are running, it is expected for the time to
+# eventually recover (through yet another clock jump). Isochron waits
+# until that is the case.
+test_clock_jump_backward_forward()
+{
+	log_info "Forcing a backward and a forward clock jump"
+	taprio_replace $swp2
+	phc_ctl $swp1 set 0
+	ptp_setup
+	ping_test $h1 192.0.2.2
+	run_subtests
+	ptp_cleanup
+	taprio_cleanup $swp2
+}
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_test_skip "Could not test offloaded functionality"
+	exit $EXIT_STATUS
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
index b91bcd8008a9..08c044ff6689 100644
--- a/tools/testing/selftests/net/forwarding/tsn_lib.sh
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -2,6 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 # Copyright 2021-2022 NXP
 
+tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts
+
 REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes}
 REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes}
 
@@ -18,6 +20,7 @@ fi
 if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then
 	require_command phc2sys
 	require_command ptp4l
+	require_command phc_ctl
 fi
 
 phc2sys_start()
@@ -182,6 +185,7 @@ isochron_do()
 	local base_time=$1; shift
 	local cycle_time=$1; shift
 	local shift_time=$1; shift
+	local window_size=$1; shift
 	local num_pkts=$1; shift
 	local vid=$1; shift
 	local priority=$1; shift
@@ -212,6 +216,10 @@ isochron_do()
 		extra_args="${extra_args} --shift-time=${shift_time}"
 	fi
 
+	if ! [ -z "${window_size}" ]; then
+		extra_args="${extra_args} --window-size=${window_size}"
+	fi
+
 	if [ "${use_l2}" = "true" ]; then
 		extra_args="${extra_args} --l2 --etype=0xdead ${vid}"
 		receiver_extra_args="--l2 --etype=0xdead"
@@ -247,3 +255,21 @@ isochron_do()
 
 	cpufreq_restore ${ISOCHRON_CPU}
 }
+
+isochron_report_num_received()
+{
+	local isochron_dat=$1; shift
+
+	# Count all received packets by looking at the non-zero RX timestamps
+	isochron report \
+		--input-file "${isochron_dat}" \
+		--printf-format "%u\n" --printf-args "R" | \
+		grep -w -v '0' | wc -l
+}
+
+taprio_wait_for_admin()
+{
+	local if_name="$1"; shift
+
+	"$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name"
+}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index 3f9d50f1ef9e..457f41d5e584 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -428,6 +428,14 @@ __test_flood()
 test_flood()
 {
 	__test_flood de:ad:be:ef:13:37 192.0.2.100 "flood"
+
+	# Add an entry with arbitrary destination IP. Verify that packets are
+	# not duplicated (this can happen if hardware floods the packets, and
+	# then traps them due to misconfiguration, so software data path repeats
+	# flooding and resends packets).
+	bridge fdb append dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self
+	__test_flood de:ad:be:ef:13:37 192.0.2.100 "flood, unresolved FDB entry"
+	bridge fdb del dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self
 }
 
 vxlan_fdb_add_del()
@@ -559,6 +567,21 @@ vxlan_encapped_ping_do()
 	local inner_tos=$1; shift
 	local outer_tos=$1; shift
 
+	local ipv4hdr=$(:
+		    )"45:"$(                      : IP version + IHL
+		    )"$inner_tos:"$(              : IP TOS
+		    )"00:54:"$(                   : IP total length
+		    )"99:83:"$(                   : IP identification
+		    )"40:00:"$(                   : IP flags + frag off
+		    )"40:"$(                      : IP TTL
+		    )"01:"$(                      : IP proto
+		    )"CHECKSUM:"$(                : IP header csum
+		    )"c0:00:02:03:"$(             : IP saddr: 192.0.2.3
+		    )"c0:00:02:01"$(              : IP daddr: 192.0.2.1
+		)
+	local checksum=$(payload_template_calc_checksum "$ipv4hdr")
+	ipv4hdr=$(payload_template_expand_checksum "$ipv4hdr" $checksum)
+
 	$MZ $dev -c $count -d 100msec -q \
 		-b $next_hop_mac -B $dest_ip \
 		-t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(:
@@ -569,16 +592,7 @@ vxlan_encapped_ping_do()
 		    )"$dest_mac:"$(               : ETH daddr
 		    )"$(mac_get w2):"$(           : ETH saddr
 		    )"08:00:"$(                   : ETH type
-		    )"45:"$(                      : IP version + IHL
-		    )"$inner_tos:"$(              : IP TOS
-		    )"00:54:"$(                   : IP total length
-		    )"99:83:"$(                   : IP identification
-		    )"40:00:"$(                   : IP flags + frag off
-		    )"40:"$(                      : IP TTL
-		    )"01:"$(                      : IP proto
-		    )"00:00:"$(                   : IP header csum
-		    )"c0:00:02:03:"$(             : IP saddr: 192.0.2.3
-		    )"c0:00:02:01:"$(             : IP daddr: 192.0.2.1
+		    )"$ipv4hdr:"$(                : IPv4 header
 		    )"08:"$(                      : ICMP type
 		    )"00:"$(                      : ICMP code
 		    )"8b:f2:"$(                   : ICMP csum
@@ -740,6 +754,8 @@ test_learning()
 
 	vxlan_flood_test $mac $dst 0 10 0
 
+	# The entry should age out when it only forwards traffic
+	$MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q &
 	sleep 60
 
 	bridge fdb show brport vx1 | grep $mac | grep -q self
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
index a603f7b0a08f..e642feeada0e 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
@@ -695,7 +695,7 @@ vxlan_encapped_ping_do()
 		    )"6"$(			  : IP version
 		    )"$inner_tos"$(               : Traffic class
 		    )"0:00:00:"$(                 : Flow label
-		    )"00:08:"$(                   : Payload length
+		    )"00:03:"$(                   : Payload length
 		    )"3a:"$(                      : Next header
 		    )"04:"$(                      : Hop limit
 		    )"$saddr:"$(		  : IP saddr
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
index fb9a34cb50c6..afc65647f673 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
@@ -539,6 +539,21 @@ test_flood()
 		10 10 0 10 0
 	__test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \
 		10 0 10 0 10
+
+	# Add entries with arbitrary destination IP. Verify that packets are
+	# not duplicated (this can happen if hardware floods the packets, and
+	# then traps them due to misconfiguration, so software data path repeats
+	# flooding and resends packets).
+	bridge fdb append dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self
+	bridge fdb append dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self
+
+	__test_flood de:ad:be:ef:13:37 192.0.2.100 10 \
+		"flood vlan 10, unresolved FDB entry" 10 10 0 10 0
+	__test_flood ca:fe:be:ef:13:37 198.51.100.100 20 \
+		"flood vlan 20, unresolved FDB entry" 10 0 10 0 10
+
+	bridge fdb del dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self
+	bridge fdb del dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self
 }
 
 vxlan_fdb_add_del()
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
new file mode 100755
index 000000000000..2cf4c6d9245b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
@@ -0,0 +1,752 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------------------------+
+# | + $h1.10             + $h1.20           |
+# | | 192.0.2.1/28       | 2001:db8:1::1/64 |
+# | \________   ________/                   |
+# |          \ /                            |
+# |           + $h1                H1 (vrf) |
+# +-----------|-----------------------------+
+#             |
+# +-----------|----------------------------------------------------------------+
+# | +---------|--------------------------------------+       SWITCH (main vrf) |
+# | |         + $swp1                   BR1 (802.1q) |                         |
+# | |            vid 10 20                           |                         |
+# | |                                                |                         |
+# | |  + vx10 (vxlan)         + vx20 (vxlan)         |      + lo10 (dummy)     |
+# | |    local 192.0.2.100      local 2001:db8:4::1  |        192.0.2.100/28   |
+# | |    group 233.252.0.1      group ff0e::1:2:3    |        2001:db8:4::1/64 |
+# | |    id 1000                id 2000              |                         |
+# | |    vid 10 pvid untagged   vid 20 pvid untagged |                         |
+# | +------------------------------------------------+                         |
+# |                                                                            |
+# |   + $swp2                                                        $swp3 +   |
+# |   | 192.0.2.33/28                                        192.0.2.65/28 |   |
+# |   | 2001:db8:2::1/64                                  2001:db8:3::1/64 |   |
+# |   |                                                                    |   |
+# +---|--------------------------------------------------------------------|---+
+#     |                                                                    |
+# +---|--------------------------------+  +--------------------------------|---+
+# |   |                      H2 (vrf)  |  | H3 (vrf)                       |   |
+# | +-|----------------------------+   |  |  +-----------------------------|-+ |
+# | | + $h2           BR2 (802.1d) |   |  |  | BR3 (802.1d)            $h3 + | |
+# | |                              |   |  |  |                               | |
+# | | + v1$h2 (veth)               |   |  |  |                v1$h3 (veth) + | |
+# | +-|----------------------------+   |  |  +-----------------------------|-+ |
+# |   |                                |  |                                |   |
+# +---|--------------------------------+  +--------------------------------|---+
+#     |                                                                    |
+# +---|--------------------------------+  +--------------------------------|---+
+# |   + v2$h2 (veth)       NS2 (netns) |  | NS3 (netns)       v2$h3 (veth) +   |
+# |     192.0.2.34/28                  |  |                  192.0.2.66/28     |
+# |     2001:db8:2::2/64               |  |               2001:db8:3::2/64     |
+# |                                    |  |                                    |
+# | +--------------------------------+ |  | +--------------------------------+ |
+# | |                  BR1 (802.1q)  | |  | |                   BR1 (802.1q) | |
+# | |  + vx10 (vxlan)                | |  | |  + vx10 (vxlan)                | |
+# | |    local 192.0.2.34            | |  | |    local 192.0.2.50            | |
+# | |    group 233.252.0.1 dev v2$h2 | |  | |    group 233.252.0.1 dev v2$h3 | |
+# | |    id 1000 dstport $VXPORT     | |  | |    id 1000 dstport $VXPORT     | |
+# | |    vid 10 pvid untagged        | |  | |    vid 10 pvid untagged        | |
+# | |                                | |  | |                                | |
+# | |  + vx20 (vxlan)                | |  | |  + vx20 (vxlan)                | |
+# | |    local 2001:db8:2::2         | |  | |    local 2001:db8:3::2         | |
+# | |    group ff0e::1:2:3 dev v2$h2 | |  | |    group ff0e::1:2:3 dev v2$h3 | |
+# | |    id 2000 dstport $VXPORT     | |  | |    id 2000 dstport $VXPORT     | |
+# | |    vid 20 pvid untagged        | |  | |    vid 20 pvid untagged        | |
+# | |                                | |  | |                                | |
+# | |  + w1 (veth)                   | |  | |  + w1 (veth)                   | |
+# | |  | vid 10 20                   | |  | |  | vid 10 20                   | |
+# | +--|-----------------------------+ |  | +--|-----------------------------+ |
+# |    |                               |  |    |                               |
+# | +--|-----------------------------+ |  | +--|-----------------------------+ |
+# | |  + w2 (veth)        VW2 (vrf)  | |  | |  + w2 (veth)        VW2 (vrf)  | |
+# | |  |\                            | |  | |  |\                            | |
+# | |  | + w2.10                     | |  | |  | + w2.10                     | |
+# | |  |   192.0.2.3/28              | |  | |  |   192.0.2.4/28              | |
+# | |  |                             | |  | |  |                             | |
+# | |  + w2.20                       | |  | |  + w2.20                       | |
+# | |    2001:db8:1::3/64            | |  | |    2001:db8:1::4/64            | |
+# | +--------------------------------+ |  | +--------------------------------+ |
+# +------------------------------------+  +------------------------------------+
+#
+#shellcheck disable=SC2317 # SC doesn't see our uses of functions.
+
+: "${VXPORT:=4789}"
+export VXPORT
+
+: "${GROUP4:=233.252.0.1}"
+export GROUP4
+
+: "${GROUP6:=ff0e::1:2:3}"
+export GROUP6
+
+: "${IPMR:=lo10}"
+
+ALL_TESTS="
+	ipv4_nomcroute
+	ipv4_mcroute
+	ipv4_mcroute_changelink
+	ipv4_mcroute_starg
+	ipv4_mcroute_noroute
+	ipv4_mcroute_fdb
+	ipv4_mcroute_fdb_oif0
+	ipv4_mcroute_fdb_oif0_sep
+
+	ipv6_nomcroute
+	ipv6_mcroute
+	ipv6_mcroute_changelink
+	ipv6_mcroute_starg
+	ipv6_mcroute_noroute
+	ipv6_mcroute_fdb
+	ipv6_mcroute_fdb_oif0
+
+	ipv4_nomcroute_rx
+	ipv4_mcroute_rx
+	ipv4_mcroute_starg_rx
+	ipv4_mcroute_fdb_oif0_sep_rx
+	ipv4_mcroute_fdb_sep_rx
+
+	ipv6_nomcroute_rx
+	ipv6_mcroute_rx
+	ipv6_mcroute_starg_rx
+	ipv6_mcroute_fdb_sep_rx
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	adf_simple_if_init "$h1"
+
+	adf_ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10
+	adf_ip_link_set_up "$h1.10"
+	adf_ip_addr_add "$h1.10" 192.0.2.1/28
+
+	adf_ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20
+	adf_ip_link_set_up "$h1.20"
+	adf_ip_addr_add "$h1.20" 2001:db8:1::1/64
+}
+
+install_capture()
+{
+	local dev=$1; shift
+
+	tc qdisc add dev "$dev" clsact
+	defer tc qdisc del dev "$dev" clsact
+
+	tc filter add dev "$dev" ingress proto ip pref 104 \
+	   u32 match ip protocol 0x11 0xff \
+	       match u16 "$VXPORT" 0xffff at 0x16 \
+	       match u16 0x0800 0xffff at 0x30 \
+	       action pass
+	defer tc filter del dev "$dev" ingress proto ip pref 104
+
+	tc filter add dev "$dev" ingress proto ipv6 pref 106 \
+	   u32 match ip6 protocol 0x11 0xff \
+	       match u16 "$VXPORT" 0xffff at 0x2a \
+	       match u16 0x86dd 0xffff at 0x44 \
+	       match u8 0x11 0xff at 0x4c \
+	       action pass
+	defer tc filter del dev "$dev" ingress proto ipv6 pref 106
+}
+
+h2_create()
+{
+	# $h2
+	adf_ip_link_set_up "$h2"
+
+	# H2
+	vrf_create "v$h2"
+	defer vrf_destroy "v$h2"
+
+	adf_ip_link_set_up "v$h2"
+
+	# br2
+	adf_ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0
+	adf_ip_link_set_master br2 "v$h2"
+	adf_ip_link_set_up br2
+
+	# $h2
+	adf_ip_link_set_master "$h2" br2
+	install_capture "$h2"
+
+	# v1$h2
+	adf_ip_link_set_up "v1$h2"
+	adf_ip_link_set_master "v1$h2" br2
+}
+
+h3_create()
+{
+	# $h3
+	adf_ip_link_set_up "$h3"
+
+	# H3
+	vrf_create "v$h3"
+	defer vrf_destroy "v$h3"
+
+	adf_ip_link_set_up "v$h3"
+
+	# br3
+	adf_ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0
+	adf_ip_link_set_master br3 "v$h3"
+	adf_ip_link_set_up br3
+
+	# $h3
+	adf_ip_link_set_master "$h3" br3
+	install_capture "$h3"
+
+	# v1$h3
+	adf_ip_link_set_up "v1$h3"
+	adf_ip_link_set_master "v1$h3" br3
+}
+
+switch_create()
+{
+	local swp1_mac
+
+	# br1
+	swp1_mac=$(mac_get "$swp1")
+	adf_ip_link_add br1 type bridge vlan_filtering 1 \
+			    vlan_default_pvid 0 mcast_snooping 0
+	adf_ip_link_set_addr br1 "$swp1_mac"
+	adf_ip_link_set_up br1
+
+	# A dummy to force the IPv6 OIF=0 test to install a suitable MC route on
+	# $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test.
+	adf_ip_link_add "X$IPMR" up type dummy
+
+	# IPMR
+	adf_ip_link_add "$IPMR" up type dummy
+	adf_ip_addr_add "$IPMR" 192.0.2.100/28
+	adf_ip_addr_add "$IPMR" 2001:db8:4::1/64
+
+	# $swp1
+	adf_ip_link_set_up "$swp1"
+	adf_ip_link_set_master "$swp1" br1
+	adf_bridge_vlan_add vid 10 dev "$swp1"
+	adf_bridge_vlan_add vid 20 dev "$swp1"
+
+	# $swp2
+	adf_ip_link_set_up "$swp2"
+	adf_ip_addr_add "$swp2" 192.0.2.33/28
+	adf_ip_addr_add "$swp2" 2001:db8:2::1/64
+
+	# $swp3
+	adf_ip_link_set_up "$swp3"
+	adf_ip_addr_add "$swp3" 192.0.2.65/28
+	adf_ip_addr_add "$swp3" 2001:db8:3::1/64
+}
+
+vx_create()
+{
+	local name=$1; shift
+	local vid=$1; shift
+
+	adf_ip_link_add "$name" up type vxlan dstport "$VXPORT" \
+		nolearning noudpcsum tos inherit ttl 16 \
+		"$@"
+	adf_ip_link_set_master "$name" br1
+	adf_bridge_vlan_add vid "$vid" dev "$name" pvid untagged
+}
+export -f vx_create
+
+vx10_create()
+{
+	vx_create vx10 10 id 1000 "$@"
+}
+export -f vx10_create
+
+vx20_create()
+{
+	vx_create vx20 20 id 2000 "$@"
+}
+export -f vx20_create
+
+ns_init_common()
+{
+	local ns=$1; shift
+	local if_in=$1; shift
+	local ipv4_in=$1; shift
+	local ipv6_in=$1; shift
+	local ipv4_host=$1; shift
+	local ipv6_host=$1; shift
+
+	# v2$h2 / v2$h3
+	adf_ip_link_set_up "$if_in"
+	adf_ip_addr_add "$if_in" "$ipv4_in"
+	adf_ip_addr_add "$if_in" "$ipv6_in"
+
+	# br1
+	adf_ip_link_add br1 type bridge vlan_filtering 1 \
+		    vlan_default_pvid 0 mcast_snooping 0
+	adf_ip_link_set_up br1
+
+	# vx10, vx20
+	vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in"
+	vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in"
+
+	# w1
+	adf_ip_link_add w1 type veth peer name w2
+	adf_ip_link_set_master w1 br1
+	adf_ip_link_set_up w1
+	adf_bridge_vlan_add vid 10 dev w1
+	adf_bridge_vlan_add vid 20 dev w1
+
+	# w2
+	adf_simple_if_init w2
+
+	# w2.10
+	adf_ip_link_add w2.10 master vw2 link w2 type vlan id 10
+	adf_ip_link_set_up w2.10
+	adf_ip_addr_add w2.10 "$ipv4_host"
+
+	# w2.20
+	adf_ip_link_add w2.20 master vw2 link w2 type vlan id 20
+	adf_ip_link_set_up w2.20
+	adf_ip_addr_add w2.20 "$ipv6_host"
+}
+export -f ns_init_common
+
+ns2_create()
+{
+	# NS2
+	ip netns add ns2
+	defer ip netns del ns2
+
+	# v2$h2
+	ip link set dev "v2$h2" netns ns2
+	defer ip -n ns2 link set dev "v2$h2" netns 1
+
+	in_ns ns2 \
+	      ns_init_common ns2 "v2$h2" \
+			     192.0.2.34/28 2001:db8:2::2/64 \
+			     192.0.2.3/28  2001:db8:1::3/64
+}
+
+ns3_create()
+{
+	# NS3
+	ip netns add ns3
+	defer ip netns del ns3
+
+	# v2$h3
+	ip link set dev "v2$h3" netns ns3
+	defer ip -n ns3 link set dev "v2$h3" netns 1
+
+	ip -n ns3 link set dev "v2$h3" up
+
+	in_ns ns3 \
+	      ns_init_common ns3 "v2$h3" \
+			     192.0.2.66/28 2001:db8:3::2/64 \
+			     192.0.2.4/28  2001:db8:1::4/64
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	adf_vrf_prepare
+	adf_forwarding_enable
+
+	adf_ip_link_add "v1$h2" type veth peer name "v2$h2"
+	adf_ip_link_add "v1$h3" type veth peer name "v2$h3"
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+	ns2_create
+	ns3_create
+}
+
+adf_install_broken_sg()
+{
+	adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+	mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+	defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+
+	mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+	defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+}
+
+adf_install_rx()
+{
+	mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR"
+	defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10
+
+	mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR"
+	defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10
+
+	mc_cli add "$swp2" :: "$GROUP6" "$IPMR"
+	defer mc_cli remove "$swp2" :: "$GROUP6" lo10
+
+	mc_cli add "$swp3" :: "$GROUP6" "$IPMR"
+	defer mc_cli remove "$swp3" :: "$GROUP6" lo10
+}
+
+adf_install_sg()
+{
+	adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+	mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3"
+	defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3"
+
+	mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+	defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+
+	adf_install_rx
+}
+
+adf_install_sg_sep()
+{
+	adf_mcd_start lo || exit "$EXIT_STATUS"
+
+	mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+	defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+	mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+	defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+}
+
+adf_install_sg_sep_rx()
+{
+	local lo=$1; shift
+
+	adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS"
+
+	mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+	defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+	mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+	defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+
+	adf_install_rx
+}
+
+adf_install_starg()
+{
+	adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+	mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+	defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+
+	mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+	defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+
+	adf_install_rx
+}
+
+do_packets_v4()
+{
+	local mac
+
+	mac=$(mac_get "$h2")
+	"$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+	    -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q
+}
+
+do_packets_v6()
+{
+	local mac
+
+	mac=$(mac_get "$h2")
+	"$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+	    -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q
+}
+
+do_test()
+{
+	local ipv=$1; shift
+	local expect_h2=$1; shift
+	local expect_h3=$1; shift
+	local what=$1; shift
+
+	local pref=$((100 + ipv))
+	local t0_h2
+	local t0_h3
+	local t1_h2
+	local t1_h3
+	local d_h2
+	local d_h3
+
+	RET=0
+
+	t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+	t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+	"do_packets_v$ipv"
+	sleep 1
+
+	t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+	t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+	d_h2=$((t1_h2 - t0_h2))
+	d_h3=$((t1_h3 - t0_h3))
+
+	((d_h2 == expect_h2))
+	check_err $? "Expected $expect_h2 packets on H2, got $d_h2"
+
+	((d_h3 == expect_h3))
+	check_err $? "Expected $expect_h3 packets on H3, got $d_h3"
+
+	log_test "VXLAN MC flood $what"
+}
+
+ipv4_do_test_rx()
+{
+	local h3_should_fail=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ping_do "$h1.10" 192.0.2.3
+	check_err $? "H2 should respond"
+
+	ping_do "$h1.10" 192.0.2.4
+	check_err_fail "$h3_should_fail" $? "H3 responds"
+
+	log_test "VXLAN MC flood $what"
+}
+
+ipv6_do_test_rx()
+{
+	local h3_should_fail=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ping6_do "$h1.20" 2001:db8:1::3
+	check_err $? "H2 should respond"
+
+	ping6_do "$h1.20" 2001:db8:1::4
+	check_err_fail "$h3_should_fail" $? "H3 responds"
+
+	log_test "VXLAN MC flood $what"
+}
+
+ipv4_nomcroute()
+{
+	# Install a misleading (S,G) rule to attempt to trick the system into
+	# pushing the packets elsewhere.
+	adf_install_broken_sg
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2"
+	do_test 4 10 0 "IPv4 nomcroute"
+}
+
+ipv6_nomcroute()
+{
+	# Like for IPv4, install a misleading (S,G).
+	adf_install_broken_sg
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+	do_test 6 10 0 "IPv6 nomcroute"
+}
+
+ipv4_nomcroute_rx()
+{
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2"
+	ipv4_do_test_rx 1 "IPv4 nomcroute ping"
+}
+
+ipv6_nomcroute_rx()
+{
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+	ipv6_do_test_rx 1 "IPv6 nomcroute ping"
+}
+
+ipv4_mcroute()
+{
+	adf_install_sg
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	do_test 4 10 10 "IPv4 mcroute"
+}
+
+ipv6_mcroute()
+{
+	adf_install_sg
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	do_test 6 10 10 "IPv6 mcroute"
+}
+
+ipv4_mcroute_rx()
+{
+	adf_install_sg
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	ipv4_do_test_rx 0 "IPv4 mcroute ping"
+}
+
+ipv6_mcroute_rx()
+{
+	adf_install_sg
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	ipv6_do_test_rx 0 "IPv6 mcroute ping"
+}
+
+ipv4_mcroute_changelink()
+{
+	adf_install_sg
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR"
+	ip link set dev vx10 type vxlan mcroute
+	sleep 1
+	do_test 4 10 10 "IPv4 mcroute changelink"
+}
+
+ipv6_mcroute_changelink()
+{
+	adf_install_sg
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	ip link set dev vx20 type vxlan mcroute
+	sleep 1
+	do_test 6 10 10 "IPv6 mcroute changelink"
+}
+
+ipv4_mcroute_starg()
+{
+	adf_install_starg
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	do_test 4 10 10 "IPv4 mcroute (*,G)"
+}
+
+ipv6_mcroute_starg()
+{
+	adf_install_starg
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	do_test 6 10 10 "IPv6 mcroute (*,G)"
+}
+
+ipv4_mcroute_starg_rx()
+{
+	adf_install_starg
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping"
+}
+
+ipv6_mcroute_starg_rx()
+{
+	adf_install_starg
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping"
+}
+
+ipv4_mcroute_noroute()
+{
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	do_test 4 0 0 "IPv4 mcroute, no route"
+}
+
+ipv6_mcroute_noroute()
+{
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	do_test 6 0 0 "IPv6 mcroute, no route"
+}
+
+ipv4_mcroute_fdb()
+{
+	adf_install_sg
+	vx10_create local 192.0.2.100 dev "$IPMR" mcroute
+	bridge fdb add dev vx10 \
+		00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR"
+	do_test 4 10 10 "IPv4 mcroute FDB"
+}
+
+ipv6_mcroute_fdb()
+{
+	adf_install_sg
+	vx20_create local 2001:db8:4::1 dev "$IPMR" mcroute
+	bridge -6 fdb add dev vx20 \
+		00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR"
+	do_test 6 10 10 "IPv6 mcroute FDB"
+}
+
+# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup.
+ipv4_mcroute_fdb_oif0()
+{
+	adf_install_sg
+	vx10_create local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+	bridge fdb del dev vx10 00:00:00:00:00:00
+	bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+	do_test 4 10 10 "IPv4 mcroute oif=0"
+}
+
+ipv6_mcroute_fdb_oif0()
+{
+	# The IPv6 tunnel lookup does not fall back to selection by source
+	# address. Instead it just does a FIB match, and that would find one of
+	# the several ff00::/8 multicast routes -- each device has one. In order
+	# to reliably force the $IPMR device, add a /128 route for the
+	# destination group address.
+	ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR"
+	defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR"
+
+	adf_install_sg
+	vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+	bridge -6 fdb del dev vx20 00:00:00:00:00:00
+	bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6"
+	do_test 6 10 10 "IPv6 mcroute oif=0"
+}
+
+# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR.
+# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT.
+ipv4_mcroute_fdb_oif0_sep()
+{
+	adf_install_sg_sep
+
+	adf_ip_addr_add lo 192.0.2.120/28
+	vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+	bridge fdb del dev vx10 00:00:00:00:00:00
+	bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+	do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0"
+}
+
+ipv4_mcroute_fdb_oif0_sep_rx()
+{
+	adf_install_sg_sep_rx lo
+
+	adf_ip_addr_add lo 192.0.2.120/28
+	vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+	bridge fdb del dev vx10 00:00:00:00:00:00
+	bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+	ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping"
+}
+
+ipv4_mcroute_fdb_sep_rx()
+{
+	adf_install_sg_sep_rx lo
+
+	adf_ip_addr_add lo 192.0.2.120/28
+	vx10_create local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+	bridge fdb del dev vx10 00:00:00:00:00:00
+	bridge fdb add \
+	       dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo
+	ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping"
+}
+
+ipv6_mcroute_fdb_sep_rx()
+{
+	adf_install_sg_sep_rx "X$IPMR"
+
+	adf_ip_addr_add "X$IPMR" 2001:db8:5::1/64
+	vx20_create local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute
+	bridge -6 fdb del dev vx20 00:00:00:00:00:00
+	bridge -6 fdb add dev vx20 00:00:00:00:00:00 \
+			  self static dst "$GROUP6" via "X$IPMR"
+	ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh
index 46c31794b91b..709845123727 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh
@@ -47,8 +47,7 @@ source lib.sh
 
 h1_create()
 {
-	simple_if_init $h1 192.0.2.1/28
-	defer simple_if_fini $h1 192.0.2.1/28
+	adf_simple_if_init $h1 192.0.2.1/28
 
 	tc qdisc add dev $h1 clsact
 	defer tc qdisc del dev $h1 clsact
@@ -60,24 +59,23 @@ h1_create()
 
 switch_create()
 {
-	ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0
+	adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0
 	# Make sure the bridge uses the MAC address of the local port and not
 	# that of the VxLAN's device.
-	ip_link_set_addr br1 $(mac_get $swp1)
-	ip_link_set_up br1
+	adf_ip_link_set_addr br1 $(mac_get $swp1)
+	adf_ip_link_set_up br1
 
-	ip_link_set_up $rp1
-	ip_addr_add $rp1 192.0.2.17/28
-	ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18
+	adf_ip_link_set_up $rp1
+	adf_ip_addr_add $rp1 192.0.2.17/28
+	adf_ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18
 
-	ip_link_set_master $swp1 br1
-	ip_link_set_up $swp1
+	adf_ip_link_set_master $swp1 br1
+	adf_ip_link_set_up $swp1
 }
 
 vrp2_create()
 {
-	simple_if_init $rp2 192.0.2.18/28
-	defer simple_if_fini $rp2 192.0.2.18/28
+	adf_simple_if_init $rp2 192.0.2.18/28
 }
 
 setup_prepare()
@@ -88,11 +86,8 @@ setup_prepare()
 	rp1=${NETIFS[p3]}
 	rp2=${NETIFS[p4]}
 
-	vrf_prepare
-	defer vrf_cleanup
-
-	forwarding_enable
-	defer forwarding_restore
+	adf_vrf_prepare
+	adf_forwarding_enable
 
 	h1_create
 	switch_create
@@ -200,10 +195,10 @@ vxlan_ping_do()
 
 vxlan_device_add()
 {
-	ip_link_add vx1 up type vxlan id 1000		\
+	adf_ip_link_add vx1 up type vxlan id 1000		\
 		local 192.0.2.17 dstport "$VXPORT"	\
 		nolearning noudpcsum tos inherit ttl 100 "$@"
-	ip_link_set_master vx1 br1
+	adf_ip_link_set_master vx1 br1
 }
 
 vxlan_all_reserved_bits()
diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh
index 977070ed42b3..223f9efe4090 100755
--- a/tools/testing/selftests/net/fq_band_pktlimit.sh
+++ b/tools/testing/selftests/net/fq_band_pktlimit.sh
@@ -32,19 +32,19 @@ tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514
 DELAY=400000
 
 ./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000
-OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+OUT1="$(tc -s qdisc show dev dummy0 | grep '^ Sent')"
 
 ./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000
-OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+OUT2="$(tc -s qdisc show dev dummy0 | grep '^ Sent')"
 
 ./cmsg_sender -6 -p u -d "${DELAY}" -n 20 -P 7 fdaa::2 8000
-OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+OUT3="$(tc -s qdisc show dev dummy0 | grep '^ Sent')"
 
 # Initial stats will report zero sent, as all packets are still
 # queued in FQ. Sleep for at least the delay period and see that
 # twenty are now sent.
 sleep 0.6
-OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+OUT4="$(tc -s qdisc show dev dummy0 | grep '^ Sent')"
 
 # Log the output after the test
 echo "${OUT1}"
@@ -53,7 +53,7 @@ echo "${OUT3}"
 echo "${OUT4}"
 
 # Test the output for expected values
-echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10'  || die "unexpected drop count at 1"
-echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30'  || die "unexpected drop count at 2"
-echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40'  || die "unexpected drop count at 3"
-echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4"
+echo "${OUT1}" | grep -q '0 pkt (dropped 10'  || die "unexpected drop count at 1"
+echo "${OUT2}" | grep -q '0 pkt (dropped 30'  || die "unexpected drop count at 2"
+echo "${OUT3}" | grep -q '0 pkt (dropped 40'  || die "unexpected drop count at 3"
+echo "${OUT4}" | grep -q '20 pkt (dropped 40' || die "unexpected accept count at 4"
diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
new file mode 100755
index 000000000000..48eb999a3120
--- /dev/null
+++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./lib.sh
+
+PAUSE_ON_FAIL="no"
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+	cleanup_all_ns
+
+	exit "${EXIT_STATUS}"
+}
+
+# Add fake IPv4 and IPv6 networks on the loopback device, to be used as
+# underlay by future GRE devices.
+#
+setup_basenet()
+{
+	ip -netns "${NS0}" link set dev lo up
+	ip -netns "${NS0}" address add dev lo 192.0.2.10/24
+	ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad
+}
+
+# Check the IPv6 configuration of a network device.
+#
+# We currently check the generation of the link-local IPv6 address and the
+# creation of the ff00::/8 multicast route.
+#
+# Parameters:
+#
+#   * $1: The network device to test
+#   * $2: An extra regular expression that should be matched (to verify the
+#         presence of extra attributes)
+#   * $3: The expected return code from grep (to allow checking the absence of
+#         a link-local address)
+#   * $4: The user visible name for the scenario being tested
+#
+check_ipv6_device_config()
+{
+	local DEV="$1"
+	local EXTRA_MATCH="$2"
+	local XRET="$3"
+	local MSG="$4"
+
+	RET=0
+	set +e
+	ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}"
+	check_err_fail "${XRET}" $? "IPv6 link-local address generation"
+
+	ip -netns "${NS0}" -6 route show table local type multicast ff00::/8 proto kernel | grep -q "${DEV}"
+	check_err_fail 0 $? "IPv6 multicast route creation"
+
+	log_test "${MSG}"
+	set -e
+}
+
+# Create a GRE device and verify that it gets an IPv6 link-local address as
+# expected.
+#
+# Parameters:
+#
+#   * $1: The device type (gre, ip6gre, gretap or ip6gretap)
+#   * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any")
+#   * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any")
+#   * $4: The IPv6 interface identifier generation mode to use for the GRE
+#         device (eui64, none, stable-privacy or random).
+#
+test_gre_device()
+{
+	local GRE_TYPE="$1"
+	local LOCAL_IP="$2"
+	local REMOTE_IP="$3"
+	local MODE="$4"
+	local ADDR_GEN_MODE
+	local MATCH_REGEXP
+	local MSG
+
+	ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}"
+
+	case "${MODE}" in
+	    "eui64")
+		ADDR_GEN_MODE=0
+		MATCH_REGEXP=""
+		MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}"
+		XRET=0
+		;;
+	    "none")
+		ADDR_GEN_MODE=1
+		MATCH_REGEXP=""
+		MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}"
+		XRET=1 # No link-local address should be generated
+		;;
+	    "stable-privacy")
+		ADDR_GEN_MODE=2
+		MATCH_REGEXP="stable-privacy"
+		MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}"
+		XRET=0
+		# Initialise stable_secret (required for stable-privacy mode)
+		ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd"
+		;;
+	    "random")
+		ADDR_GEN_MODE=3
+		MATCH_REGEXP="stable-privacy"
+		MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}"
+		XRET=0
+		;;
+	esac
+
+	# Check the IPv6 device configuration when it goes up
+	ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
+	ip -netns "${NS0}" link set dev gretest up
+	check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}"
+
+	# Now disable link-local address generation
+	ip -netns "${NS0}" link set dev gretest down
+	ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1
+	ip -netns "${NS0}" link set dev gretest up
+
+	# Check the IPv6 device configuration when link-local address
+	# generation is re-enabled while the device is already up
+	ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
+	check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}"
+
+	ip -netns "${NS0}" link del dev gretest
+}
+
+test_gre4()
+{
+	local GRE_TYPE
+	local MODE
+
+	for GRE_TYPE in "gre" "gretap"; do
+		printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n"
+
+		for MODE in "eui64" "none" "stable-privacy" "random"; do
+			test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}"
+			test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}"
+			test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}"
+		done
+	done
+}
+
+test_gre6()
+{
+	local GRE_TYPE
+	local MODE
+
+	for GRE_TYPE in "ip6gre" "ip6gretap"; do
+		printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n"
+
+		for MODE in "eui64" "none" "stable-privacy" "random"; do
+			test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}"
+			test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}"
+			test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}"
+		done
+	done
+}
+
+usage()
+{
+	echo "Usage: $0 [-p]"
+	exit 1
+}
+
+while getopts :p o
+do
+	case $o in
+		p) PAUSE_ON_FAIL="yes";;
+		*) usage;;
+	esac
+done
+
+setup_ns NS0
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_basenet
+
+test_gre4
+test_gre6
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
deleted file mode 100755
index 02c21ff4ca81..000000000000
--- a/tools/testing/selftests/net/gro.sh
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly SERVER_MAC="aa:00:00:00:00:02"
-readonly CLIENT_MAC="aa:00:00:00:00:01"
-readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
-readonly PROTOS=("ipv4" "ipv6")
-dev=""
-test="all"
-proto="ipv4"
-
-run_test() {
-  local server_pid=0
-  local exit_code=0
-  local protocol=$1
-  local test=$2
-  local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
-  "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
-
-  setup_ns
-  # Each test is run 3 times to deflake, because given the receive timing,
-  # not all packets that should coalesce will be considered in the same flow
-  # on every try.
-  for tries in {1..3}; do
-    # Actual test starts here
-    ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
-      1>>log.txt &
-    server_pid=$!
-    sleep 0.5  # to allow for socket init
-    ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \
-      1>>log.txt
-    wait "${server_pid}"
-    exit_code=$?
-    if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \
-          ${exit_code} -ne 0 ]]; then
-        echo "Ignoring errors due to slow environment" 1>&2
-        exit_code=0
-    fi
-    if [[ "${exit_code}" -eq 0 ]]; then
-        break;
-    fi
-  done
-  cleanup_ns
-  echo ${exit_code}
-}
-
-run_all_tests() {
-  local failed_tests=()
-  for proto in "${PROTOS[@]}"; do
-    for test in "${TESTS[@]}"; do
-      echo "running test ${proto} ${test}" >&2
-      exit_code=$(run_test $proto $test)
-      if [[ "${exit_code}" -ne 0 ]]; then
-        failed_tests+=("${proto}_${test}")
-      fi;
-    done;
-  done
-  if [[ ${#failed_tests[@]} -ne 0 ]]; then
-    echo "failed tests: ${failed_tests[*]}. \
-    Please see log.txt for more logs"
-    exit 1
-  else
-    echo "All Tests Succeeded!"
-  fi;
-}
-
-usage() {
-  echo "Usage: $0 \
-  [-i <DEV>] \
-  [-t data|ack|flags|tcp|ip|large] \
-  [-p <ipv4|ipv6>]" 1>&2;
-  exit 1;
-}
-
-while getopts "i:t:p:" opt; do
-  case "${opt}" in
-    i)
-      dev="${OPTARG}"
-      ;;
-    t)
-      test="${OPTARG}"
-      ;;
-    p)
-      proto="${OPTARG}"
-      ;;
-    *)
-      usage
-      ;;
-  esac
-done
-
-if [ -n "$dev" ]; then
-	source setup_loopback.sh
-else
-	source setup_veth.sh
-fi
-
-setup
-trap cleanup EXIT
-if [[ "${test}" == "all" ]]; then
-  run_all_tests
-else
-  run_test "${proto}" "${test}"
-fi;
diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile
index 884cd2cc0681..31fb9326cf53 100644
--- a/tools/testing/selftests/net/hsr/Makefile
+++ b/tools/testing/selftests/net/hsr/Makefile
@@ -2,7 +2,13 @@
 
 top_srcdir = ../../../../..
 
-TEST_PROGS := hsr_ping.sh hsr_redbox.sh
+TEST_PROGS := \
+	hsr_ping.sh \
+	hsr_redbox.sh \
+	link_faults.sh \
+	prp_ping.sh \
+# end of TEST_PROGS
+
 TEST_FILES += hsr_common.sh
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config
index 555a868743f0..205cc4d3d64b 100644
--- a/tools/testing/selftests/net/hsr/config
+++ b/tools/testing/selftests/net/hsr/config
@@ -1,6 +1,6 @@
+CONFIG_BRIDGE=y
+CONFIG_HSR=y
 CONFIG_IPV6=y
 CONFIG_NET_SCH_NETEM=m
-CONFIG_HSR=y
 CONFIG_VETH=y
-CONFIG_BRIDGE=y
 CONFIG_VLAN_8021Q=m
diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
index 5a65f4f836be..f4d685df4345 100755
--- a/tools/testing/selftests/net/hsr/hsr_ping.sh
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -27,31 +27,34 @@ while getopts "$optstring" option;do
 esac
 done
 
-do_complete_ping_test()
+do_ping_tests()
 {
-	echo "INFO: Initial validation ping."
-	# Each node has to be able each one.
-	do_ping "$ns1" 100.64.0.2
-	do_ping "$ns2" 100.64.0.1
-	do_ping "$ns3" 100.64.0.1
-	stop_if_error "Initial validation failed."
-
-	do_ping "$ns1" 100.64.0.3
-	do_ping "$ns2" 100.64.0.3
-	do_ping "$ns3" 100.64.0.2
+	local netid="$1"
 
-	do_ping "$ns1" dead:beef:1::2
-	do_ping "$ns1" dead:beef:1::3
-	do_ping "$ns2" dead:beef:1::1
-	do_ping "$ns2" dead:beef:1::2
-	do_ping "$ns3" dead:beef:1::1
-	do_ping "$ns3" dead:beef:1::2
+	echo "INFO: Running ping tests."
 
-	stop_if_error "Initial validation failed."
+	echo "INFO: Initial validation ping."
+	# Each node has to be able to reach each one.
+	do_ping "$ns1" "100.64.$netid.2"
+	do_ping "$ns1" "100.64.$netid.3"
+	do_ping "$ns2" "100.64.$netid.1"
+	do_ping "$ns2" "100.64.$netid.3"
+	do_ping "$ns3" "100.64.$netid.1"
+	do_ping "$ns3" "100.64.$netid.2"
+	stop_if_error "Initial validation failed on IPv4."
+
+	do_ping "$ns1" "dead:beef:$netid::2"
+	do_ping "$ns1" "dead:beef:$netid::3"
+	do_ping "$ns2" "dead:beef:$netid::1"
+	do_ping "$ns2" "dead:beef:$netid::2"
+	do_ping "$ns3" "dead:beef:$netid::1"
+	do_ping "$ns3" "dead:beef:$netid::2"
+	stop_if_error "Initial validation failed on IPv6."
 
 # Wait until supervisor all supervision frames have been processed and the node
 # entries have been merged. Otherwise duplicate frames will be observed which is
 # valid at this stage.
+	echo "INFO: Wait for node table entries to be merged."
 	WAIT=5
 	while [ ${WAIT} -gt 0 ]
 	do
@@ -68,62 +71,30 @@ do_complete_ping_test()
 	sleep 1
 
 	echo "INFO: Longer ping test."
-	do_ping_long "$ns1" 100.64.0.2
-	do_ping_long "$ns1" dead:beef:1::2
-	do_ping_long "$ns1" 100.64.0.3
-	do_ping_long "$ns1" dead:beef:1::3
-
-	stop_if_error "Longer ping test failed."
-
-	do_ping_long "$ns2" 100.64.0.1
-	do_ping_long "$ns2" dead:beef:1::1
-	do_ping_long "$ns2" 100.64.0.3
-	do_ping_long "$ns2" dead:beef:1::2
-	stop_if_error "Longer ping test failed."
-
-	do_ping_long "$ns3" 100.64.0.1
-	do_ping_long "$ns3" dead:beef:1::1
-	do_ping_long "$ns3" 100.64.0.2
-	do_ping_long "$ns3" dead:beef:1::2
-	stop_if_error "Longer ping test failed."
-
-	echo "INFO: Cutting one link."
-	do_ping_long "$ns1" 100.64.0.3 &
-
-	sleep 3
-	ip -net "$ns3" link set ns3eth1 down
-	wait
-
-	ip -net "$ns3" link set ns3eth1 up
-
-	stop_if_error "Failed with one link down."
-
-	echo "INFO: Delay the link and drop a few packages."
-	tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms
-	tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25%
-
-	do_ping_long "$ns1" 100.64.0.2
-	do_ping_long "$ns1" 100.64.0.3
-
-	stop_if_error "Failed with delay and packetloss."
-
-	do_ping_long "$ns2" 100.64.0.1
-	do_ping_long "$ns2" 100.64.0.3
-
-	stop_if_error "Failed with delay and packetloss."
-
-	do_ping_long "$ns3" 100.64.0.1
-	do_ping_long "$ns3" 100.64.0.2
-	stop_if_error "Failed with delay and packetloss."
-
-	echo "INFO: All good."
+	do_ping_long "$ns1" "100.64.$netid.2"
+	do_ping_long "$ns1" "dead:beef:$netid::2"
+	do_ping_long "$ns1" "100.64.$netid.3"
+	do_ping_long "$ns1" "dead:beef:$netid::3"
+	stop_if_error "Longer ping test failed (ns1)."
+
+	do_ping_long "$ns2" "100.64.$netid.1"
+	do_ping_long "$ns2" "dead:beef:$netid::1"
+	do_ping_long "$ns2" "100.64.$netid.3"
+	do_ping_long "$ns2" "dead:beef:$netid::3"
+	stop_if_error "Longer ping test failed (ns2)."
+
+	do_ping_long "$ns3" "100.64.$netid.1"
+	do_ping_long "$ns3" "dead:beef:$netid::1"
+	do_ping_long "$ns3" "100.64.$netid.2"
+	do_ping_long "$ns3" "dead:beef:$netid::2"
+	stop_if_error "Longer ping test failed (ns3)."
 }
 
 setup_hsr_interfaces()
 {
 	local HSRv="$1"
 
-	echo "INFO: preparing interfaces for HSRv${HSRv}."
+	echo "INFO: Preparing interfaces for HSRv${HSRv}."
 # Three HSR nodes. Each node has one link to each of its neighbour, two links in total.
 #
 #    ns1eth1 ----- ns2eth1
@@ -140,17 +111,20 @@ setup_hsr_interfaces()
 	ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2"
 
 	# HSRv0/1
-	ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0
-	ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0
-	ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0
+	ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 \
+		slave2 ns1eth2 supervision 45 version "$HSRv" proto 0
+	ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 \
+		slave2 ns2eth2 supervision 45 version "$HSRv" proto 0
+	ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 \
+		slave2 ns3eth2 supervision 45 version "$HSRv" proto 0
 
 	# IP for HSR
 	ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1
-	ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad
+	ip -net "$ns1" addr add dead:beef:0::1/64 dev hsr1 nodad
 	ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2
-	ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad
+	ip -net "$ns2" addr add dead:beef:0::2/64 dev hsr2 nodad
 	ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3
-	ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad
+	ip -net "$ns3" addr add dead:beef:0::3/64 dev hsr3 nodad
 
 	ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1
 	ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2
@@ -177,113 +151,56 @@ setup_hsr_interfaces()
 
 setup_vlan_interfaces() {
 	ip -net "$ns1" link add link hsr1 name hsr1.2 type vlan id 2
-	ip -net "$ns1" link add link hsr1 name hsr1.3 type vlan id 3
-	ip -net "$ns1" link add link hsr1 name hsr1.4 type vlan id 4
-	ip -net "$ns1" link add link hsr1 name hsr1.5 type vlan id 5
-
 	ip -net "$ns2" link add link hsr2 name hsr2.2 type vlan id 2
-	ip -net "$ns2" link add link hsr2 name hsr2.3 type vlan id 3
-	ip -net "$ns2" link add link hsr2 name hsr2.4 type vlan id 4
-	ip -net "$ns2" link add link hsr2 name hsr2.5 type vlan id 5
-
 	ip -net "$ns3" link add link hsr3 name hsr3.2 type vlan id 2
-	ip -net "$ns3" link add link hsr3 name hsr3.3 type vlan id 3
-	ip -net "$ns3" link add link hsr3 name hsr3.4 type vlan id 4
-	ip -net "$ns3" link add link hsr3 name hsr3.5 type vlan id 5
 
 	ip -net "$ns1" addr add 100.64.2.1/24 dev hsr1.2
-	ip -net "$ns1" addr add 100.64.3.1/24 dev hsr1.3
-	ip -net "$ns1" addr add 100.64.4.1/24 dev hsr1.4
-	ip -net "$ns1" addr add 100.64.5.1/24 dev hsr1.5
+	ip -net "$ns1" addr add dead:beef:2::1/64 dev hsr1.2 nodad
 
 	ip -net "$ns2" addr add 100.64.2.2/24 dev hsr2.2
-	ip -net "$ns2" addr add 100.64.3.2/24 dev hsr2.3
-	ip -net "$ns2" addr add 100.64.4.2/24 dev hsr2.4
-	ip -net "$ns2" addr add 100.64.5.2/24 dev hsr2.5
+	ip -net "$ns2" addr add dead:beef:2::2/64 dev hsr2.2 nodad
 
 	ip -net "$ns3" addr add 100.64.2.3/24 dev hsr3.2
-	ip -net "$ns3" addr add 100.64.3.3/24 dev hsr3.3
-	ip -net "$ns3" addr add 100.64.4.3/24 dev hsr3.4
-	ip -net "$ns3" addr add 100.64.5.3/24 dev hsr3.5
+	ip -net "$ns3" addr add dead:beef:2::3/64 dev hsr3.2 nodad
 
 	ip -net "$ns1" link set dev hsr1.2 up
-	ip -net "$ns1" link set dev hsr1.3 up
-	ip -net "$ns1" link set dev hsr1.4 up
-	ip -net "$ns1" link set dev hsr1.5 up
-
 	ip -net "$ns2" link set dev hsr2.2 up
-	ip -net "$ns2" link set dev hsr2.3 up
-	ip -net "$ns2" link set dev hsr2.4 up
-	ip -net "$ns2" link set dev hsr2.5 up
-
 	ip -net "$ns3" link set dev hsr3.2 up
-	ip -net "$ns3" link set dev hsr3.3 up
-	ip -net "$ns3" link set dev hsr3.4 up
-	ip -net "$ns3" link set dev hsr3.5 up
 
 }
 
-hsr_vlan_ping() {
-	do_ping "$ns1" 100.64.2.2
-	do_ping "$ns1" 100.64.3.2
-	do_ping "$ns1" 100.64.4.2
-	do_ping "$ns1" 100.64.5.2
-
-	do_ping "$ns1" 100.64.2.3
-	do_ping "$ns1" 100.64.3.3
-	do_ping "$ns1" 100.64.4.3
-	do_ping "$ns1" 100.64.5.3
-
-	do_ping "$ns2" 100.64.2.1
-	do_ping "$ns2" 100.64.3.1
-	do_ping "$ns2" 100.64.4.1
-	do_ping "$ns2" 100.64.5.1
-
-	do_ping "$ns2" 100.64.2.3
-	do_ping "$ns2" 100.64.3.3
-	do_ping "$ns2" 100.64.4.3
-	do_ping "$ns2" 100.64.5.3
-
-	do_ping "$ns3" 100.64.2.1
-	do_ping "$ns3" 100.64.3.1
-	do_ping "$ns3" 100.64.4.1
-	do_ping "$ns3" 100.64.5.1
-
-	do_ping "$ns3" 100.64.2.2
-	do_ping "$ns3" 100.64.3.2
-	do_ping "$ns3" 100.64.4.2
-	do_ping "$ns3" 100.64.5.2
+run_ping_tests()
+{
+	echo "INFO: Running ping tests."
+	do_ping_tests 0
 }
 
-run_vlan_tests() {
+run_vlan_tests()
+{
 	vlan_challenged_hsr1=$(ip net exec "$ns1" ethtool -k hsr1 | grep "vlan-challenged" | awk '{print $2}')
 	vlan_challenged_hsr2=$(ip net exec "$ns2" ethtool -k hsr2 | grep "vlan-challenged" | awk '{print $2}')
 	vlan_challenged_hsr3=$(ip net exec "$ns3" ethtool -k hsr3 | grep "vlan-challenged" | awk '{print $2}')
 
 	if [[ "$vlan_challenged_hsr1" = "off" || "$vlan_challenged_hsr2" = "off" || "$vlan_challenged_hsr3" = "off" ]]; then
-		echo "INFO: Running VLAN tests"
+		echo "INFO: Running VLAN ping tests"
 		setup_vlan_interfaces
-		hsr_vlan_ping
+		do_ping_tests 2
 	else
 		echo "INFO: Not Running VLAN tests as the device does not support VLAN"
 	fi
 }
 
 check_prerequisites
-setup_ns ns1 ns2 ns3
-
 trap cleanup_all_ns EXIT
 
+setup_ns ns1 ns2 ns3
 setup_hsr_interfaces 0
-do_complete_ping_test
-
+run_ping_tests
 run_vlan_tests
 
 setup_ns ns1 ns2 ns3
-
 setup_hsr_interfaces 1
-do_complete_ping_test
-
+run_ping_tests
 run_vlan_tests
 
 exit $ret
diff --git a/tools/testing/selftests/net/hsr/link_faults.sh b/tools/testing/selftests/net/hsr/link_faults.sh
new file mode 100755
index 000000000000..be526281571c
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/link_faults.sh
@@ -0,0 +1,378 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# shellcheck disable=SC2329
+
+source ../lib.sh
+
+ALL_TESTS="
+	test_clean_hsrv0
+	test_cut_link_hsrv0
+	test_packet_loss_hsrv0
+	test_high_packet_loss_hsrv0
+	test_reordering_hsrv0
+
+	test_clean_hsrv1
+	test_cut_link_hsrv1
+	test_packet_loss_hsrv1
+	test_high_packet_loss_hsrv1
+	test_reordering_hsrv1
+
+	test_clean_prp
+	test_cut_link_prp
+	test_packet_loss_prp
+	test_high_packet_loss_prp
+	test_reordering_prp
+"
+
+# The tests are running ping for 5sec with a relatively short interval in
+# different scenarios with faulty links (cut links, packet loss, delay,
+# reordering) that should be recoverable by HSR/PRP. The ping interval (10ms)
+# is short enough that the base delay (50ms) leads to a queue in the netem
+# qdiscs which is needed for reordering.
+
+setup_hsr_topo()
+{
+	# Three HSR nodes in a ring, every node has a LAN A interface connected
+	# to the LAN B interface of the next node.
+	#
+	#    node1            node2
+	#
+	#     vethA -------- vethB
+	#   hsr1                 hsr2
+	#     vethB          vethA
+	#         \          /
+	#         vethA  vethB
+	#             hsr3
+	#
+	#            node3
+
+	local ver="$1"
+
+	setup_ns node1 node2 node3
+
+	# veth links
+	# shellcheck disable=SC2154 # variables assigned by setup_ns
+	ip link add vethA netns "$node1" type veth peer name vethB netns "$node2"
+	# shellcheck disable=SC2154 # variables assigned by setup_ns
+	ip link add vethA netns "$node2" type veth peer name vethB netns "$node3"
+	ip link add vethA netns "$node3" type veth peer name vethB netns "$node1"
+
+	# MAC addresses (not needed for HSR operation, but helps with debugging)
+	ip -net "$node1" link set address 00:11:22:00:01:01 dev vethA
+	ip -net "$node1" link set address 00:11:22:00:01:02 dev vethB
+
+	ip -net "$node2" link set address 00:11:22:00:02:01 dev vethA
+	ip -net "$node2" link set address 00:11:22:00:02:02 dev vethB
+
+	ip -net "$node3" link set address 00:11:22:00:03:01 dev vethA
+	ip -net "$node3" link set address 00:11:22:00:03:02 dev vethB
+
+	# HSR interfaces
+	ip -net "$node1" link add name hsr1 type hsr proto 0 version "$ver" \
+		slave1 vethA slave2 vethB supervision 45
+	ip -net "$node2" link add name hsr2 type hsr proto 0 version "$ver" \
+		slave1 vethA slave2 vethB supervision 45
+	ip -net "$node3" link add name hsr3 type hsr proto 0 version "$ver" \
+		slave1 vethA slave2 vethB supervision 45
+
+	# IP addresses
+	ip -net "$node1" addr add 100.64.0.1/24 dev hsr1
+	ip -net "$node2" addr add 100.64.0.2/24 dev hsr2
+	ip -net "$node3" addr add 100.64.0.3/24 dev hsr3
+
+	# Set all links up
+	ip -net "$node1" link set vethA up
+	ip -net "$node1" link set vethB up
+	ip -net "$node1" link set hsr1 up
+
+	ip -net "$node2" link set vethA up
+	ip -net "$node2" link set vethB up
+	ip -net "$node2" link set hsr2 up
+
+	ip -net "$node3" link set vethA up
+	ip -net "$node3" link set vethB up
+	ip -net "$node3" link set hsr3 up
+}
+
+setup_prp_topo()
+{
+	# Two PRP nodes, connected by two links (treated as LAN A and LAN B).
+	#
+	#       vethA ----- vethA
+	#     prp1             prp2
+	#       vethB ----- vethB
+	#
+	#     node1           node2
+
+	setup_ns node1 node2
+
+	# veth links
+	ip link add vethA netns "$node1" type veth peer name vethA netns "$node2"
+	ip link add vethB netns "$node1" type veth peer name vethB netns "$node2"
+
+	# MAC addresses will be copied from LAN A interface
+	ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA
+	ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA
+
+	# PRP interfaces
+	ip -net "$node1" link add name prp1 type hsr \
+		slave1 vethA slave2 vethB supervision 45 proto 1
+	ip -net "$node2" link add name prp2 type hsr \
+		slave1 vethA slave2 vethB supervision 45 proto 1
+
+	# IP addresses
+	ip -net "$node1" addr add 100.64.0.1/24 dev prp1
+	ip -net "$node2" addr add 100.64.0.2/24 dev prp2
+
+	# All links up
+	ip -net "$node1" link set vethA up
+	ip -net "$node1" link set vethB up
+	ip -net "$node1" link set prp1 up
+
+	ip -net "$node2" link set vethA up
+	ip -net "$node2" link set vethB up
+	ip -net "$node2" link set prp2 up
+}
+
+wait_for_hsr_node_table()
+{
+	log_info "Wait for node table entries to be merged."
+	WAIT=5
+	while [ "${WAIT}" -gt 0 ]; do
+		nts=$(cat /sys/kernel/debug/hsr/hsr*/node_table)
+
+		# We need entries in the node tables, and they need to be merged
+		if (echo "$nts" | grep -qE "^([0-9a-f]{2}:){5}") && \
+		    ! (echo "$nts" | grep -q "00:00:00:00:00:00"); then
+			return
+		fi
+
+		sleep 1
+		((WAIT--))
+	done
+	check_err 1 "Failed to wait for merged node table entries"
+}
+
+setup_topo()
+{
+	local proto="$1"
+
+	if [ "$proto" = "HSRv0" ]; then
+		setup_hsr_topo 0
+		wait_for_hsr_node_table
+	elif [ "$proto" = "HSRv1" ]; then
+		setup_hsr_topo 1
+		wait_for_hsr_node_table
+	elif [ "$proto" = "PRP" ]; then
+		setup_prp_topo
+	else
+		check_err 1 "Unknown protocol (${proto})"
+	fi
+}
+
+check_ping()
+{
+	local node="$1"
+	local dst="$2"
+	local accepted_dups="$3"
+	local ping_args="-q -i 0.01 -c 400"
+
+	log_info "Running ping $node -> $dst"
+	# shellcheck disable=SC2086
+	output=$(ip netns exec "$node" ping $ping_args "$dst" | \
+		grep "packets transmitted")
+	log_info "$output"
+
+	dups=0
+	loss=0
+
+	if [[ "$output" =~ \+([0-9]+)" duplicates" ]]; then
+		dups="${BASH_REMATCH[1]}"
+	fi
+	if [[ "$output" =~ ([0-9\.]+\%)" packet loss" ]]; then
+		loss="${BASH_REMATCH[1]}"
+	fi
+
+	if [ "$dups" -gt "$accepted_dups" ]; then
+		check_err 1 "Unexpected duplicate packets (${dups})"
+	fi
+	if [ "$loss" != "0%" ]; then
+		check_err 1 "Unexpected packet loss (${loss})"
+	fi
+}
+
+test_clean()
+{
+	local proto="$1"
+
+	RET=0
+	tname="${FUNCNAME[0]} - ${proto}"
+
+	setup_topo "$proto"
+	if ((RET != ksft_pass)); then
+		log_test "${tname} setup"
+		return
+	fi
+
+	check_ping "$node1" "100.64.0.2" 0
+
+	log_test "${tname}"
+}
+
+test_clean_hsrv0()
+{
+	test_clean "HSRv0"
+}
+
+test_clean_hsrv1()
+{
+	test_clean "HSRv1"
+}
+
+test_clean_prp()
+{
+	test_clean "PRP"
+}
+
+test_cut_link()
+{
+	local proto="$1"
+
+	RET=0
+	tname="${FUNCNAME[0]} - ${proto}"
+
+	setup_topo "$proto"
+	if ((RET != ksft_pass)); then
+		log_test "${tname} setup"
+		return
+	fi
+
+	# Cutting link from subshell, so check_ping can run in the normal shell
+	# with access to global variables from the test harness.
+	(
+		sleep 2
+		log_info "Cutting link"
+		ip -net "$node1" link set vethB down
+	) &
+	check_ping "$node1" "100.64.0.2" 0
+
+	wait
+	log_test "${tname}"
+}
+
+
+test_cut_link_hsrv0()
+{
+	test_cut_link "HSRv0"
+}
+
+test_cut_link_hsrv1()
+{
+	test_cut_link "HSRv1"
+}
+
+test_cut_link_prp()
+{
+	test_cut_link "PRP"
+}
+
+test_packet_loss()
+{
+	local proto="$1"
+	local loss="$2"
+
+	RET=0
+	tname="${FUNCNAME[0]} - ${proto}, ${loss}"
+
+	setup_topo "$proto"
+	if ((RET != ksft_pass)); then
+		log_test "${tname} setup"
+		return
+	fi
+
+	# Packet loss with lower delay makes sure the packets on the lossy link
+	# arrive first.
+	tc -net "$node1" qdisc add dev vethA root netem delay 50ms
+	tc -net "$node1" qdisc add dev vethB root netem delay 20ms loss "$loss"
+
+	check_ping "$node1" "100.64.0.2" 40
+
+	log_test "${tname}"
+}
+
+test_packet_loss_hsrv0()
+{
+	test_packet_loss "HSRv0" "20%"
+}
+
+test_packet_loss_hsrv1()
+{
+	test_packet_loss "HSRv1" "20%"
+}
+
+test_packet_loss_prp()
+{
+	test_packet_loss "PRP" "20%"
+}
+
+test_high_packet_loss_hsrv0()
+{
+	test_packet_loss "HSRv0" "80%"
+}
+
+test_high_packet_loss_hsrv1()
+{
+	test_packet_loss "HSRv1" "80%"
+}
+
+test_high_packet_loss_prp()
+{
+	test_packet_loss "PRP" "80%"
+}
+
+test_reordering()
+{
+	local proto="$1"
+
+	RET=0
+	tname="${FUNCNAME[0]} - ${proto}"
+
+	setup_topo "$proto"
+	if ((RET != ksft_pass)); then
+		log_test "${tname} setup"
+		return
+	fi
+
+	tc -net "$node1" qdisc add dev vethA root netem delay 50ms
+	tc -net "$node1" qdisc add dev vethB root netem delay 50ms reorder 20%
+
+	check_ping "$node1" "100.64.0.2" 40
+
+	log_test "${tname}"
+}
+
+test_reordering_hsrv0()
+{
+	test_reordering "HSRv0"
+}
+
+test_reordering_hsrv1()
+{
+	test_reordering "HSRv1"
+}
+
+test_reordering_prp()
+{
+	test_reordering "PRP"
+}
+
+cleanup()
+{
+	cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/hsr/prp_ping.sh b/tools/testing/selftests/net/hsr/prp_ping.sh
new file mode 100755
index 000000000000..fd2ba9f05d4c
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/prp_ping.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ipv6=true
+
+source ./hsr_common.sh
+
+optstring="h4"
+usage() {
+	echo "Usage: $0 [OPTION]"
+	echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
+}
+
+while getopts "$optstring" option;do
+	case "$option" in
+	"h")
+		usage "$0"
+		exit 0
+		;;
+	"4")
+		ipv6=false
+		;;
+	"?")
+		usage "$0"
+		exit 1
+		;;
+esac
+done
+
+setup_prp_interfaces()
+{
+	echo "INFO: Preparing interfaces for PRP"
+# Two PRP nodes, connected by two links (treated as LAN A and LAN B).
+#
+#       vethA ----- vethA
+#     prp1             prp2
+#       vethB ----- vethB
+#
+#     node1           node2
+
+	# Interfaces
+	# shellcheck disable=SC2154 # variables assigned by setup_ns
+	ip link add vethA netns "$node1" type veth peer name vethA netns "$node2"
+	ip link add vethB netns "$node1" type veth peer name vethB netns "$node2"
+
+	# MAC addresses will be copied from LAN A interface
+	ip -net "$node1" link set address 00:11:22:00:00:01 dev vethA
+	ip -net "$node2" link set address 00:11:22:00:00:02 dev vethA
+
+	# PRP
+	ip -net "$node1" link add name prp1 type hsr \
+		slave1 vethA slave2 vethB supervision 45 proto 1
+	ip -net "$node2" link add name prp2 type hsr \
+		slave1 vethA slave2 vethB supervision 45 proto 1
+
+	# IP addresses
+	ip -net "$node1" addr add 100.64.0.1/24 dev prp1
+	ip -net "$node1" addr add dead:beef:0::1/64 dev prp1 nodad
+	ip -net "$node2" addr add 100.64.0.2/24 dev prp2
+	ip -net "$node2" addr add dead:beef:0::2/64 dev prp2 nodad
+
+	# All links up
+	ip -net "$node1" link set vethA up
+	ip -net "$node1" link set vethB up
+	ip -net "$node1" link set prp1 up
+
+	ip -net "$node2" link set vethA up
+	ip -net "$node2" link set vethB up
+	ip -net "$node2" link set prp2 up
+}
+
+setup_vlan_interfaces()
+{
+	# Interfaces
+	ip -net "$node1" link add link prp1 name prp1.2 type vlan id 2
+	ip -net "$node2" link add link prp2 name prp2.2 type vlan id 2
+
+	# IP addresses
+	ip -net "$node1" addr add 100.64.2.1/24 dev prp1.2
+	ip -net "$node1" addr add dead:beef:2::1/64 dev prp1.2 nodad
+
+	ip -net "$node2" addr add 100.64.2.2/24 dev prp2.2
+	ip -net "$node2" addr add dead:beef:2::2/64 dev prp2.2 nodad
+
+	# All links up
+	ip -net "$node1" link set prp1.2 up
+	ip -net "$node2" link set prp2.2 up
+}
+
+do_ping_tests()
+{
+	local netid="$1"
+
+	echo "INFO: Initial validation ping"
+
+	do_ping "$node1" "100.64.$netid.2"
+	do_ping "$node2" "100.64.$netid.1"
+	stop_if_error "Initial validation failed on IPv4"
+
+	do_ping "$node1" "dead:beef:$netid::2"
+	do_ping "$node2" "dead:beef:$netid::1"
+	stop_if_error "Initial validation failed on IPv6"
+
+	echo "INFO: Longer ping test."
+
+	do_ping_long "$node1" "100.64.$netid.2"
+	do_ping_long "$node2" "100.64.$netid.1"
+	stop_if_error "Longer ping test failed on IPv4."
+
+	do_ping_long "$node1" "dead:beef:$netid::2"
+	do_ping_long "$node2" "dead:beef:$netid::1"
+	stop_if_error "Longer ping test failed on IPv6."
+}
+
+run_ping_tests()
+{
+	echo "INFO: Running ping tests"
+	do_ping_tests 0
+}
+
+run_vlan_ping_tests()
+{
+	vlan_challenged_prp1=$(ip net exec "$node1" ethtool -k prp1 | \
+		grep "vlan-challenged" | awk '{print $2}')
+	vlan_challenged_prp2=$(ip net exec "$node2" ethtool -k prp2 | \
+		grep "vlan-challenged" | awk '{print $2}')
+
+	if [[ "$vlan_challenged_prp1" = "off" || \
+	      "$vlan_challenged_prp2" = "off" ]]; then
+		echo "INFO: Running VLAN ping tests"
+		setup_vlan_interfaces
+		do_ping_tests 2
+	else
+		echo "INFO: Not Running VLAN tests as the device does not support VLAN"
+	fi
+}
+
+check_prerequisites
+trap cleanup_all_ns EXIT
+
+setup_ns node1 node2
+setup_prp_interfaces
+
+run_ping_tests
+run_vlan_ping_tests
+
+exit $ret
diff --git a/tools/testing/selftests/net/hsr/settings b/tools/testing/selftests/net/hsr/settings
index 0fbc037f2aa8..a953c96aa16e 100644
--- a/tools/testing/selftests/net/hsr/settings
+++ b/tools/testing/selftests/net/hsr/settings
@@ -1 +1 @@
-timeout=50
+timeout=180
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index d6f0e449c029..b13c89a99ecb 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -178,8 +178,6 @@ setup()
 		else
 			ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
 			ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
-			ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
-			ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
 
 			ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
 			ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
diff --git a/tools/testing/selftests/net/icmp_rfc4884.c b/tools/testing/selftests/net/icmp_rfc4884.c
new file mode 100644
index 000000000000..cd826b913557
--- /dev/null
+++ b/tools/testing/selftests/net/icmp_rfc4884.c
@@ -0,0 +1,679 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <linux/errqueue.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <netinet/in.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "../kselftest_harness.h"
+
+static const unsigned short src_port = 44444;
+static const unsigned short dst_port = 55555;
+static const int min_orig_dgram_len = 128;
+static const int min_payload_len_v4 =
+	min_orig_dgram_len - sizeof(struct iphdr) - sizeof(struct udphdr);
+static const int min_payload_len_v6 =
+	min_orig_dgram_len - sizeof(struct ipv6hdr) - sizeof(struct udphdr);
+static const uint8_t orig_payload_byte =  0xAA;
+
+struct sockaddr_inet {
+	union {
+		struct sockaddr_in6 v6;
+		struct sockaddr_in v4;
+		struct sockaddr sa;
+	};
+	socklen_t len;
+};
+
+struct ip_case_info {
+	int	domain;
+	int	level;
+	int	opt1;
+	int	opt2;
+	int	proto;
+	int	(*build_func)(uint8_t *buf, ssize_t buflen, bool with_ext,
+			      int payload_len, bool bad_csum, bool bad_len,
+			      bool smaller_len);
+	int	min_payload;
+};
+
+static int bringup_loopback(void)
+{
+	struct ifreq ifr = {
+		.ifr_name = "lo"
+	};
+	int fd;
+
+	fd = socket(AF_INET, SOCK_DGRAM, 0);
+	if (fd < 0)
+		return -1;
+
+	if (ioctl(fd, SIOCGIFFLAGS, &ifr) < 0)
+		goto err;
+
+	ifr.ifr_flags = ifr.ifr_flags | IFF_UP;
+
+	if (ioctl(fd, SIOCSIFFLAGS, &ifr) < 0)
+		goto err;
+
+	close(fd);
+	return 0;
+
+err:
+	close(fd);
+	return -1;
+}
+
+static uint16_t csum(const void *buf, size_t len)
+{
+	const uint8_t *data = buf;
+	uint32_t sum = 0;
+
+	while (len > 1) {
+		sum += (data[0] << 8) | data[1];
+		data += 2;
+		len -= 2;
+	}
+
+	if (len == 1)
+		sum += data[0] << 8;
+
+	while (sum >> 16)
+		sum = (sum & 0xFFFF) + (sum >> 16);
+
+	return ~sum & 0xFFFF;
+}
+
+static int poll_err(int fd)
+{
+	struct pollfd pfd;
+
+	memset(&pfd, 0, sizeof(pfd));
+	pfd.fd = fd;
+
+	if (poll(&pfd, 1, 5000) != 1 || pfd.revents != POLLERR)
+		return -1;
+
+	return 0;
+}
+
+static void set_addr(struct sockaddr_inet *addr, int domain,
+		     unsigned short port)
+{
+	memset(addr, 0, sizeof(*addr));
+
+	switch (domain) {
+	case AF_INET:
+		addr->v4.sin_family = AF_INET;
+		addr->v4.sin_port = htons(port);
+		addr->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		addr->len = sizeof(addr->v4);
+		break;
+	case AF_INET6:
+		addr->v6.sin6_family = AF_INET6;
+		addr->v6.sin6_port = htons(port);
+		addr->v6.sin6_addr = in6addr_loopback;
+		addr->len = sizeof(addr->v6);
+		break;
+	}
+}
+
+static int bind_and_setsockopt(int fd, const struct ip_case_info *info)
+{
+	struct sockaddr_inet addr;
+	int opt = 1;
+
+	set_addr(&addr, info->domain, src_port);
+
+	if (setsockopt(fd, info->level, info->opt1, &opt, sizeof(opt)) < 0)
+		return -1;
+
+	if (setsockopt(fd, info->level, info->opt2, &opt, sizeof(opt)) < 0)
+		return -1;
+
+	return bind(fd, &addr.sa, addr.len);
+}
+
+static int build_rfc4884_ext(uint8_t *buf, size_t buflen, bool bad_csum,
+			     bool bad_len, bool smaller_len)
+{
+	struct icmp_extobj_hdr *objh;
+	struct icmp_ext_hdr *exthdr;
+	size_t obj_len, ext_len;
+	uint16_t sum;
+
+	/* Use an object payload of 4 bytes */
+	obj_len = sizeof(*objh) + sizeof(uint32_t);
+	ext_len = sizeof(*exthdr) + obj_len;
+
+	if (ext_len > buflen)
+		return -EINVAL;
+
+	exthdr = (struct icmp_ext_hdr *)buf;
+	objh = (struct icmp_extobj_hdr *)(buf + sizeof(*exthdr));
+
+	exthdr->version = 2;
+	/* When encoding a bad object length, either encode a length too small
+	 * to fit the object header or too big to fit in the packet.
+	 */
+	if (bad_len)
+		obj_len = smaller_len ? sizeof(*objh) - 1 : obj_len * 2;
+	objh->length = htons(obj_len);
+
+	sum = csum(buf, ext_len);
+	exthdr->checksum = htons(bad_csum ? sum - 1 : sum);
+
+	return ext_len;
+}
+
+static int build_orig_dgram_v4(uint8_t *buf, ssize_t buflen, int payload_len)
+{
+	struct udphdr *udph;
+	struct iphdr *iph;
+	size_t len = 0;
+
+	len = sizeof(*iph) + sizeof(*udph) + payload_len;
+	if (len > buflen)
+		return -EINVAL;
+
+	iph = (struct iphdr *)buf;
+	udph = (struct udphdr *)(buf + sizeof(*iph));
+
+	iph->version = 4;
+	iph->ihl = 5;
+	iph->protocol = IPPROTO_UDP;
+	iph->saddr = htonl(INADDR_LOOPBACK);
+	iph->daddr = htonl(INADDR_LOOPBACK);
+	iph->tot_len = htons(len);
+	iph->check = htons(csum(iph, sizeof(*iph)));
+
+	udph->source = htons(src_port);
+	udph->dest = htons(dst_port);
+	udph->len = htons(sizeof(*udph) + payload_len);
+
+	memset(buf + sizeof(*iph) + sizeof(*udph), orig_payload_byte,
+	       payload_len);
+
+	return len;
+}
+
+static int build_orig_dgram_v6(uint8_t *buf, ssize_t buflen, int payload_len)
+{
+	struct udphdr *udph;
+	struct ipv6hdr *iph;
+	size_t len = 0;
+
+	len = sizeof(*iph) + sizeof(*udph) + payload_len;
+	if (len > buflen)
+		return -EINVAL;
+
+	iph = (struct ipv6hdr *)buf;
+	udph = (struct udphdr *)(buf + sizeof(*iph));
+
+	iph->version = 6;
+	iph->payload_len = htons(sizeof(*udph) + payload_len);
+	iph->nexthdr = IPPROTO_UDP;
+	iph->saddr = in6addr_loopback;
+	iph->daddr = in6addr_loopback;
+
+	udph->source = htons(src_port);
+	udph->dest = htons(dst_port);
+	udph->len = htons(sizeof(*udph) + payload_len);
+
+	memset(buf + sizeof(*iph) + sizeof(*udph), orig_payload_byte,
+	       payload_len);
+
+	return len;
+}
+
+static int build_icmpv4_pkt(uint8_t *buf, ssize_t buflen, bool with_ext,
+			    int payload_len, bool bad_csum, bool bad_len,
+			    bool smaller_len)
+{
+	struct icmphdr *icmph;
+	int len, ret;
+
+	len = sizeof(*icmph);
+	memset(buf, 0, buflen);
+
+	icmph = (struct icmphdr *)buf;
+	icmph->type = ICMP_DEST_UNREACH;
+	icmph->code = ICMP_PORT_UNREACH;
+	icmph->checksum = 0;
+
+	ret = build_orig_dgram_v4(buf + len, buflen - len, payload_len);
+	if (ret < 0)
+		return ret;
+
+	len += ret;
+
+	icmph->un.reserved[1] = (len - sizeof(*icmph)) / sizeof(uint32_t);
+
+	if (with_ext) {
+		ret = build_rfc4884_ext(buf + len, buflen - len,
+					bad_csum, bad_len, smaller_len);
+		if (ret < 0)
+			return ret;
+
+		len += ret;
+	}
+
+	icmph->checksum = htons(csum(icmph, len));
+	return len;
+}
+
+static int build_icmpv6_pkt(uint8_t *buf, ssize_t buflen, bool with_ext,
+			    int payload_len, bool bad_csum, bool bad_len,
+			    bool smaller_len)
+{
+	struct icmp6hdr *icmph;
+	int len, ret;
+
+	len = sizeof(*icmph);
+	memset(buf, 0, buflen);
+
+	icmph = (struct icmp6hdr *)buf;
+	icmph->icmp6_type = ICMPV6_DEST_UNREACH;
+	icmph->icmp6_code = ICMPV6_PORT_UNREACH;
+	icmph->icmp6_cksum = 0;
+
+	ret = build_orig_dgram_v6(buf + len, buflen - len, payload_len);
+	if (ret < 0)
+		return ret;
+
+	len += ret;
+
+	icmph->icmp6_datagram_len = (len - sizeof(*icmph)) / sizeof(uint64_t);
+
+	if (with_ext) {
+		ret = build_rfc4884_ext(buf + len, buflen - len,
+					bad_csum, bad_len, smaller_len);
+		if (ret < 0)
+			return ret;
+
+		len += ret;
+	}
+
+	icmph->icmp6_cksum = htons(csum(icmph, len));
+	return len;
+}
+
+FIXTURE(rfc4884) {};
+
+FIXTURE_SETUP(rfc4884)
+{
+	int ret;
+
+	ret = unshare(CLONE_NEWNET);
+	ASSERT_EQ(ret, 0) {
+		TH_LOG("unshare(CLONE_NEWNET) failed: %s", strerror(errno));
+	}
+
+	ret = bringup_loopback();
+	ASSERT_EQ(ret, 0) TH_LOG("Failed to bring up loopback interface");
+}
+
+FIXTURE_TEARDOWN(rfc4884)
+{
+}
+
+const struct ip_case_info ipv4_info = {
+	.domain		= AF_INET,
+	.level		= SOL_IP,
+	.opt1		= IP_RECVERR,
+	.opt2		= IP_RECVERR_RFC4884,
+	.proto		= IPPROTO_ICMP,
+	.build_func	= build_icmpv4_pkt,
+	.min_payload	= min_payload_len_v4,
+};
+
+const struct ip_case_info ipv6_info = {
+	.domain		= AF_INET6,
+	.level		= SOL_IPV6,
+	.opt1		= IPV6_RECVERR,
+	.opt2		= IPV6_RECVERR_RFC4884,
+	.proto		= IPPROTO_ICMPV6,
+	.build_func	= build_icmpv6_pkt,
+	.min_payload	= min_payload_len_v6,
+};
+
+FIXTURE_VARIANT(rfc4884) {
+	/* IPv4/v6 related information */
+	struct ip_case_info	info;
+	/* Whether to append an ICMP extension or not */
+	bool			with_ext;
+	/* UDP payload length */
+	int			payload_len;
+	/* Whether to generate a bad checksum in the ICMP extension structure */
+	bool			bad_csum;
+	/* Whether to generate a bad length in the ICMP object header */
+	bool			bad_len;
+	/* Whether it is too small to fit the object header or too big to fit
+	 * in the packet
+	 */
+	bool			smaller_len;
+};
+
+/* Tests that a valid ICMPv4 error message with extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext_small_payload) {
+	.info		= ipv4_info,
+	.with_ext	= true,
+	.payload_len	= 64,
+	.bad_csum	= false,
+	.bad_len	= false,
+};
+
+/* Tests that a valid ICMPv4 error message with extension and 128 bytes original
+ * datagram, generates an error with the expected offset, and does not raise the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext) {
+	.info		= ipv4_info,
+	.with_ext	= true,
+	.payload_len	= min_payload_len_v4,
+	.bad_csum	= false,
+	.bad_len	= false,
+};
+
+/* Tests that a valid ICMPv4 error message with extension and the original
+ * datagram is larger than 128 bytes, generates an error with the expected
+ * offset, and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_ext_large_payload) {
+	.info		= ipv4_info,
+	.with_ext	= true,
+	.payload_len	= 256,
+	.bad_csum	= false,
+	.bad_len	= false,
+};
+
+/* Tests that a valid ICMPv4 error message without extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_small_payload) {
+	.info		= ipv4_info,
+	.with_ext	= false,
+	.payload_len	= 64,
+	.bad_csum	= false,
+	.bad_len	= false,
+};
+
+/* Tests that a valid ICMPv4 error message without extension and 128 bytes
+ * original datagram, generates an error with zero offset, and does not raise
+ * the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_min_payload) {
+	.info		= ipv4_info,
+	.with_ext	= false,
+	.payload_len	= min_payload_len_v4,
+	.bad_csum	= false,
+	.bad_len	= false,
+};
+
+/* Tests that a valid ICMPv4 error message without extension and the original
+ * datagram is larger than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_no_ext_large_payload) {
+	.info		= ipv4_info,
+	.with_ext	= false,
+	.payload_len	= 256,
+	.bad_csum	= false,
+	.bad_len	= false,
+};
+
+/* Tests that an ICMPv4 error message with extension and an invalid checksum,
+ * generates an error with the expected offset, and raises the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_checksum) {
+	.info		= ipv4_info,
+	.with_ext	= true,
+	.payload_len	= min_payload_len_v4,
+	.bad_csum	= true,
+	.bad_len	= false,
+};
+
+/* Tests that an ICMPv4 error message with extension and an object length
+ * smaller than the object header, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_length_small) {
+	.info		= ipv4_info,
+	.with_ext	= true,
+	.payload_len	= min_payload_len_v4,
+	.bad_csum	= false,
+	.bad_len	= true,
+	.smaller_len	= true,
+};
+
+/* Tests that an ICMPv4 error message with extension and an object length that
+ * is too big to fit in the packet, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv4_invalid_ext_length_large) {
+	.info		= ipv4_info,
+	.with_ext	= true,
+	.payload_len	= min_payload_len_v4,
+	.bad_csum	= false,
+	.bad_len	= true,
+	.smaller_len	= false,
+};
+
+/* Tests that a valid ICMPv6 error message with extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext_small_payload) {
+	.info		= ipv6_info,
+	.with_ext	= true,
+	.payload_len	= 64,
+	.bad_csum	= false,
+	.bad_len	= false,
+};
+
+/* Tests that a valid ICMPv6 error message with extension and 128 bytes original
+ * datagram, generates an error with the expected offset, and does not raise the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext) {
+	.info		= ipv6_info,
+	.with_ext	= true,
+	.payload_len	= min_payload_len_v6,
+	.bad_csum	= false,
+	.bad_len	= false,
+};
+
+/* Tests that a valid ICMPv6 error message with extension and the original
+ * datagram is larger than 128 bytes, generates an error with the expected
+ * offset, and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_ext_large_payload) {
+	.info		= ipv6_info,
+	.with_ext	= true,
+	.payload_len	= 256,
+	.bad_csum	= false,
+	.bad_len	= false,
+};
+/* Tests that a valid ICMPv6 error message without extension and the original
+ * datagram is smaller than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_small_payload) {
+	.info		= ipv6_info,
+	.with_ext	= false,
+	.payload_len	= 64,
+	.bad_csum	= false,
+	.bad_len	= false,
+};
+
+/* Tests that a valid ICMPv6 error message without extension and 128 bytes
+ * original datagram, generates an error with zero offset, and does not
+ * raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_min_payload) {
+	.info		= ipv6_info,
+	.with_ext	= false,
+	.payload_len	= min_payload_len_v6,
+	.bad_csum	= false,
+	.bad_len	= false,
+};
+
+/* Tests that a valid ICMPv6 error message without extension and the original
+ * datagram is larger than 128 bytes, generates an error with zero offset,
+ * and does not raise the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_no_ext_large_payload) {
+	.info		= ipv6_info,
+	.with_ext	= false,
+	.payload_len	= 256,
+	.bad_csum	= false,
+	.bad_len	= false,
+};
+
+/* Tests that an ICMPv6 error message with extension and an invalid checksum,
+ * generates an error with the expected offset, and raises the
+ * SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_checksum) {
+	.info		= ipv6_info,
+	.with_ext	= true,
+	.payload_len	= min_payload_len_v6,
+	.bad_csum	= true,
+	.bad_len	= false,
+};
+
+/* Tests that an ICMPv6 error message with extension and an object length
+ * smaller than the object header, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_length_small) {
+	.info		= ipv6_info,
+	.with_ext	= true,
+	.payload_len	= min_payload_len_v6,
+	.bad_csum	= false,
+	.bad_len	= true,
+	.smaller_len	= true,
+};
+
+/* Tests that an ICMPv6 error message with extension and an object length that
+ * is too big to fit in the packet, generates an error with the expected offset,
+ * and raises the SO_EE_RFC4884_FLAG_INVALID flag.
+ */
+FIXTURE_VARIANT_ADD(rfc4884, ipv6_invalid_ext_length_large) {
+	.info		= ipv6_info,
+	.with_ext	= true,
+	.payload_len	= min_payload_len_v6,
+	.bad_csum	= false,
+	.bad_len	= true,
+	.smaller_len	= false,
+};
+
+static void
+check_rfc4884_offset(struct __test_metadata *_metadata, int sock,
+		     const FIXTURE_VARIANT(rfc4884) *v)
+{
+	char rxbuf[1024];
+	char ctrl[1024];
+	struct iovec iov = {
+		.iov_base = rxbuf,
+		.iov_len = sizeof(rxbuf)
+	};
+	struct msghdr msg = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+		.msg_control = ctrl,
+		.msg_controllen = sizeof(ctrl),
+	};
+	struct cmsghdr *cmsg;
+	int recv;
+
+	ASSERT_EQ(poll_err(sock), 0);
+
+	recv = recvmsg(sock, &msg, MSG_ERRQUEUE);
+	ASSERT_GE(recv, 0) TH_LOG("recvmsg(MSG_ERRQUEUE) failed");
+
+	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+		bool is_invalid, expected_invalid;
+		struct sock_extended_err *ee;
+		int expected_off;
+		uint16_t off;
+
+		if (cmsg->cmsg_level != v->info.level ||
+		    cmsg->cmsg_type != v->info.opt1) {
+			TH_LOG("Unrelated cmsgs were encountered in recvmsg()");
+			continue;
+		}
+
+		ee = (struct sock_extended_err *)CMSG_DATA(cmsg);
+		off = ee->ee_rfc4884.len;
+		is_invalid = ee->ee_rfc4884.flags & SO_EE_RFC4884_FLAG_INVALID;
+
+		expected_invalid = v->bad_csum || v->bad_len;
+		ASSERT_EQ(is_invalid, expected_invalid) {
+			TH_LOG("Expected invalidity flag to be %d, but got %d",
+			       expected_invalid, is_invalid);
+		}
+
+		expected_off =
+			(v->with_ext && v->payload_len >= v->info.min_payload) ?
+			v->payload_len : 0;
+		ASSERT_EQ(off, expected_off) {
+			TH_LOG("Expected RFC4884 offset %u, got %u",
+			       expected_off, off);
+		}
+		break;
+	}
+}
+
+TEST_F(rfc4884, rfc4884)
+{
+	const typeof(variant) v = variant;
+	struct sockaddr_inet addr;
+	uint8_t pkt[1024];
+	int dgram, raw;
+	int len, sent;
+	int err;
+
+	dgram = socket(v->info.domain, SOCK_DGRAM, 0);
+	ASSERT_GE(dgram, 0) TH_LOG("Opening datagram socket failed");
+
+	err = bind_and_setsockopt(dgram, &v->info);
+	ASSERT_EQ(err, 0) TH_LOG("Bind failed");
+
+	raw = socket(v->info.domain, SOCK_RAW, v->info.proto);
+	ASSERT_GE(raw, 0) TH_LOG("Opening raw socket failed");
+
+	len = v->info.build_func(pkt, sizeof(pkt), v->with_ext, v->payload_len,
+				 v->bad_csum, v->bad_len, v->smaller_len);
+	ASSERT_GT(len, 0) TH_LOG("Building packet failed");
+
+	set_addr(&addr, v->info.domain, 0);
+	sent = sendto(raw, pkt, len, 0, &addr.sa, addr.len);
+	ASSERT_EQ(len, sent) TH_LOG("Sending packet failed");
+
+	check_rfc4884_offset(_metadata, dgram, v);
+
+	close(dgram);
+	close(raw);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
index 76e604e4810e..7bfeeb133705 100644
--- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
@@ -106,14 +106,14 @@ static void do_tx(int domain, int type, int protocol)
 
 	ret = io_uring_queue_init(512, &ring, 0);
 	if (ret)
-		error(1, ret, "io_uring: queue init");
+		error(1, -ret, "io_uring: queue init");
 
 	iov.iov_base = payload;
 	iov.iov_len = cfg_payload_len;
 
 	ret = io_uring_register_buffers(&ring, &iov, 1);
 	if (ret)
-		error(1, ret, "io_uring: buffer registration");
+		error(1, -ret, "io_uring: buffer registration");
 
 	tstop = gettimeofday_ms() + cfg_runtime_ms;
 	do {
@@ -149,24 +149,24 @@ static void do_tx(int domain, int type, int protocol)
 
 		ret = io_uring_submit(&ring);
 		if (ret != cfg_nr_reqs)
-			error(1, ret, "submit");
+			error(1, -ret, "submit");
 
 		if (cfg_cork)
 			do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
 		for (i = 0; i < cfg_nr_reqs; i++) {
 			ret = io_uring_wait_cqe(&ring, &cqe);
 			if (ret)
-				error(1, ret, "wait cqe");
+				error(1, -ret, "wait cqe");
 
 			if (cqe->user_data != NONZC_TAG &&
 			    cqe->user_data != ZC_TAG)
-				error(1, -EINVAL, "invalid cqe->user_data");
+				error(1, EINVAL, "invalid cqe->user_data");
 
 			if (cqe->flags & IORING_CQE_F_NOTIF) {
 				if (cqe->flags & IORING_CQE_F_MORE)
-					error(1, -EINVAL, "invalid notif flags");
+					error(1, EINVAL, "invalid notif flags");
 				if (compl_cqes <= 0)
-					error(1, -EINVAL, "notification mismatch");
+					error(1, EINVAL, "notification mismatch");
 				compl_cqes--;
 				i--;
 				io_uring_cqe_seen(&ring);
@@ -174,14 +174,14 @@ static void do_tx(int domain, int type, int protocol)
 			}
 			if (cqe->flags & IORING_CQE_F_MORE) {
 				if (cqe->user_data != ZC_TAG)
-					error(1, cqe->res, "unexpected F_MORE");
+					error(1, -cqe->res, "unexpected F_MORE");
 				compl_cqes++;
 			}
 			if (cqe->res >= 0) {
 				packets++;
 				bytes += cqe->res;
 			} else if (cqe->res != -EAGAIN) {
-				error(1, cqe->res, "send failed");
+				error(1, -cqe->res, "send failed");
 			}
 			io_uring_cqe_seen(&ring);
 		}
@@ -190,11 +190,11 @@ static void do_tx(int domain, int type, int protocol)
 	while (compl_cqes) {
 		ret = io_uring_wait_cqe(&ring, &cqe);
 		if (ret)
-			error(1, ret, "wait cqe");
+			error(1, -ret, "wait cqe");
 		if (cqe->flags & IORING_CQE_F_MORE)
-			error(1, -EINVAL, "invalid notif flags");
+			error(1, EINVAL, "invalid notif flags");
 		if (!(cqe->flags & IORING_CQE_F_NOTIF))
-			error(1, -EINVAL, "missing notif flag");
+			error(1, EINVAL, "missing notif flag");
 
 		io_uring_cqe_seen(&ring);
 		compl_cqes--;
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
index 123439545013..8c3647de9b4c 100755
--- a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
@@ -77,9 +77,13 @@ esac
 
 # Start of state changes: install cleanup handler
 
+old_io_uring_disabled=0
 cleanup() {
 	ip netns del "${NS2}"
 	ip netns del "${NS1}"
+	if [ "$old_io_uring_disabled" -ne 0 ]; then
+		sysctl -w -q kernel.io_uring_disabled="$old_io_uring_disabled" 2>/dev/null || true
+	fi
 }
 
 trap cleanup EXIT
@@ -122,5 +126,10 @@ do_test() {
 	wait
 }
 
+old_io_uring_disabled=$(sysctl -n kernel.io_uring_disabled 2>/dev/null || echo "0")
+if [ "$old_io_uring_disabled" -ne 0 ]; then
+	sysctl -w -q kernel.io_uring_disabled=0
+fi
+
 do_test "${EXTRA_ARGS}"
 echo ok
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
index 845c26dd01a9..b2b99889942f 100755
--- a/tools/testing/selftests/net/ioam6.sh
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -273,8 +273,8 @@ setup()
   ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 &>/dev/null
   ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 &>/dev/null
 
-  ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null
   ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null
+  ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null
   ip -netns $ioam_node_alpha link set veth0 up &>/dev/null
   ip -netns $ioam_node_alpha link set lo up &>/dev/null
   ip -netns $ioam_node_alpha route add 2001:db8:2::/64 \
diff --git a/tools/testing/selftests/net/ip6_tunnel.sh b/tools/testing/selftests/net/ip6_tunnel.sh
new file mode 100755
index 000000000000..fe081a521819
--- /dev/null
+++ b/tools/testing/selftests/net/ip6_tunnel.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# Test that IPv4-over-IPv6 tunneling works.
+
+source lib.sh
+set -e
+
+setup_prepare() {
+  ip link add transport1 type veth peer name transport2
+
+  setup_ns ns1
+  ip link set transport1 netns $ns1
+  ip -n $ns1 address add 2001:db8::1/64 dev transport1 nodad
+  ip -n $ns1 address add 2001:db8::3/64 dev transport1 nodad
+  ip -n $ns1 link set transport1 up
+  ip -n $ns1 link add link transport1 name tunnel4 type ip6tnl mode ipip6 local 2001:db8::1 remote 2001:db8::2
+  ip -n $ns1 address add 172.0.0.1/32 peer 172.0.0.2/32 dev tunnel4
+  ip -n $ns1 link set tunnel4 up
+  ip -n $ns1 link add link transport1 name tunnel6 type ip6tnl mode ip6ip6 local 2001:db8::3 remote 2001:db8::4
+  ip -n $ns1 address add 2001:db8:6::1/64 dev tunnel6
+  ip -n $ns1 link set tunnel6 up
+
+  setup_ns ns2
+  ip link set transport2 netns $ns2
+  ip -n $ns2 address add 2001:db8::2/64 dev transport2 nodad
+  ip -n $ns2 address add 2001:db8::4/64 dev transport2 nodad
+  ip -n $ns2 link set transport2 up
+  ip -n $ns2 link add link transport2 name tunnel4 type ip6tnl mode ipip6 local 2001:db8::2 remote 2001:db8::1
+  ip -n $ns2 address add 172.0.0.2/32 peer 172.0.0.1/32 dev tunnel4
+  ip -n $ns2 link set tunnel4 up
+  ip -n $ns2 link add link transport2 name tunnel6 type ip6tnl mode ip6ip6 local 2001:db8::4 remote 2001:db8::3
+  ip -n $ns2 address add 2001:db8:6::2/64 dev tunnel6
+  ip -n $ns2 link set tunnel6 up
+}
+
+cleanup() {
+  cleanup_all_ns
+  # in case the namespaces haven't been set up yet
+  ip link delete transport1 &>/dev/null || true
+}
+
+trap cleanup EXIT
+setup_prepare
+ip netns exec $ns1 ping -q -W1 -c1 172.0.0.2 >/dev/null
+ip netns exec $ns1 ping -q -W1 -c1 2001:db8:6::2 >/dev/null
diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c
index 29451d2244b7..e6834a6cfc8f 100644
--- a/tools/testing/selftests/net/ip_local_port_range.c
+++ b/tools/testing/selftests/net/ip_local_port_range.c
@@ -10,7 +10,7 @@
 #include <fcntl.h>
 #include <netinet/ip.h>
 
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 #ifndef IP_LOCAL_PORT_RANGE
 #define IP_LOCAL_PORT_RANGE 51
diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh
index 6c6ad346eaa0..4ff746db1256 100755
--- a/tools/testing/selftests/net/ip_local_port_range.sh
+++ b/tools/testing/selftests/net/ip_local_port_range.sh
@@ -2,4 +2,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ./in_netns.sh \
-  sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range'
+  sh -c 'sysctl -q -w net.mptcp.enabled=1 && \
+         sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && \
+         ./ip_local_port_range'
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index 9b44a091802c..89c32c354c00 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -34,7 +34,7 @@
 #include <time.h>
 #include <unistd.h>
 
-#include "../kselftest.h"
+#include "kselftest.h"
 
 #define printk(fmt, ...)						\
 	ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__)
@@ -43,6 +43,10 @@
 
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
 
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER)	__builtin_offsetof(TYPE, MEMBER)
+#endif
+
 #define IPV4_STR_SZ	16	/* xxx.xxx.xxx.xxx is longest + \0 */
 #define MAX_PAYLOAD	2048
 #define XFRM_ALGO_KEY_BUF_SIZE	512
@@ -58,8 +62,6 @@
 #define VETH_FMT	"ktst-%d"
 #define VETH_LEN	12
 
-#define XFRM_ALGO_NR_KEYS 29
-
 static int nsfd_parent	= -1;
 static int nsfd_childa	= -1;
 static int nsfd_childb	= -1;
@@ -92,7 +94,6 @@ struct xfrm_key_entry xfrm_key_entries[] = {
 	{"cbc(cast5)", 128},
 	{"cbc(serpent)", 128},
 	{"hmac(sha1)", 160},
-	{"hmac(rmd160)", 160},
 	{"cbc(des3_ede)", 192},
 	{"hmac(sha256)", 256},
 	{"cbc(aes)", 256},
@@ -809,7 +810,7 @@ static int xfrm_fill_key(char *name, char *buf,
 {
 	int i;
 
-	for (i = 0; i < XFRM_ALGO_NR_KEYS; i++) {
+	for (i = 0; i < ARRAY_SIZE(xfrm_key_entries); i++) {
 		if (strncmp(name, xfrm_key_entries[i].algo_name, ALGO_LEN) == 0)
 			*key_len = xfrm_key_entries[i].key_len;
 	}
@@ -827,13 +828,16 @@ static int xfrm_fill_key(char *name, char *buf,
 static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz,
 		struct xfrm_desc *desc)
 {
-	struct {
+	union {
 		union {
 			struct xfrm_algo	alg;
 			struct xfrm_algo_aead	aead;
 			struct xfrm_algo_auth	auth;
 		} u;
-		char buf[XFRM_ALGO_KEY_BUF_SIZE];
+		struct {
+			unsigned char __offset_to_FAM[offsetof(struct xfrm_algo_auth, alg_key)];
+			char buf[XFRM_ALGO_KEY_BUF_SIZE];
+		};
 	} alg = {};
 	size_t alen, elen, clen, aelen;
 	unsigned short type;
@@ -2054,8 +2058,7 @@ static int write_desc(int proto, int test_desc_fd,
 int proto_list[] = { IPPROTO_AH, IPPROTO_COMP, IPPROTO_ESP };
 char *ah_list[] = {
 	"digest_null", "hmac(md5)", "hmac(sha1)", "hmac(sha256)",
-	"hmac(sha384)", "hmac(sha512)", "hmac(rmd160)",
-	"xcbc(aes)", "cmac(aes)"
+	"hmac(sha384)", "hmac(sha512)", "xcbc(aes)", "cmac(aes)"
 };
 char *comp_list[] = {
 	"deflate",
diff --git a/tools/testing/selftests/net/ipv6_force_forwarding.sh b/tools/testing/selftests/net/ipv6_force_forwarding.sh
new file mode 100755
index 000000000000..bf0243366caa
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_force_forwarding.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test IPv6 force_forwarding interface property
+#
+# This test verifies that the force_forwarding property works correctly:
+# - When global forwarding is disabled, packets are not forwarded normally
+# - When force_forwarding is enabled on an interface, packets are forwarded
+#   regardless of the global forwarding setting
+
+source lib.sh
+
+cleanup() {
+    cleanup_ns $ns1 $ns2 $ns3
+}
+
+trap cleanup EXIT
+
+setup_test() {
+    # Create three namespaces: sender, router, receiver
+    setup_ns ns1 ns2 ns3
+
+    # Create veth pairs: ns1 <-> ns2 <-> ns3
+    ip link add name veth12 type veth peer name veth21
+    ip link add name veth23 type veth peer name veth32
+
+    # Move interfaces to namespaces
+    ip link set veth12 netns $ns1
+    ip link set veth21 netns $ns2
+    ip link set veth23 netns $ns2
+    ip link set veth32 netns $ns3
+
+    # Configure interfaces
+    ip -n $ns1 addr add 2001:db8:1::1/64 dev veth12 nodad
+    ip -n $ns2 addr add 2001:db8:1::2/64 dev veth21 nodad
+    ip -n $ns2 addr add 2001:db8:2::1/64 dev veth23 nodad
+    ip -n $ns3 addr add 2001:db8:2::2/64 dev veth32 nodad
+
+    # Bring up interfaces
+    ip -n $ns1 link set veth12 up
+    ip -n $ns2 link set veth21 up
+    ip -n $ns2 link set veth23 up
+    ip -n $ns3 link set veth32 up
+
+    # Add routes
+    ip -n $ns1 route add 2001:db8:2::/64 via 2001:db8:1::2
+    ip -n $ns3 route add 2001:db8:1::/64 via 2001:db8:2::1
+
+    # Disable global forwarding
+    ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=0
+}
+
+test_force_forwarding() {
+    local ret=0
+
+    echo "TEST: force_forwarding functionality"
+
+    # Check if force_forwarding sysctl exists
+    if ! ip netns exec $ns2 test -f /proc/sys/net/ipv6/conf/veth21/force_forwarding; then
+        echo "SKIP: force_forwarding not available"
+        return $ksft_skip
+    fi
+
+    # Test 1: Without force_forwarding, ping should fail
+    ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=0
+    ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=0
+
+    if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+        echo "FAIL: ping succeeded when forwarding disabled"
+        ret=1
+    else
+        echo "PASS: forwarding disabled correctly"
+    fi
+
+    # Test 2: With force_forwarding enabled, ping should succeed
+    ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=1
+    ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=1
+
+    if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+        echo "PASS: force_forwarding enabled forwarding"
+    else
+        echo "FAIL: ping failed with force_forwarding enabled"
+        ret=1
+    fi
+
+    return $ret
+}
+
+echo "IPv6 force_forwarding test"
+echo "=========================="
+
+setup_test
+test_force_forwarding
+ret=$?
+
+if [ $ret -eq 0 ]; then
+    echo "OK"
+    exit 0
+elif [ $ret -eq $ksft_skip ]; then
+    echo "SKIP"
+    exit $ksft_skip
+else
+    echo "FAIL"
+    exit 1
+fi
diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c
new file mode 100644
index 000000000000..672c9fe086a7
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_fragmentation.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: Brett A C Sheffield <bacs@librecast.net>
+ *
+ * Kernel selftest for the IPv6 fragmentation regression which affected stable
+ * kernels:
+ *
+ *   https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com
+ *
+ * Commit: a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable
+ * without some prerequisite commits.
+ *
+ * This caused a regression when sending IPv6 UDP packets by preventing
+ * fragmentation and instead returning -1 (EMSGSIZE).
+ *
+ * This selftest demonstrates the issue by sending an IPv6 UDP packet to
+ * localhost (::1) on the loopback interface from the autoconfigured link-local
+ * address.
+ *
+ * sendmsg(2) returns bytes sent correctly on a working kernel, and returns -1
+ * (EMSGSIZE) when the regression is present.
+ *
+ * The regression was not present in the mainline kernel, but add this test to
+ * catch similar breakage in future.
+ */
+
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sched.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include "kselftest.h"
+
+#define MTU 1500
+#define LARGER_THAN_MTU 8192
+
+static void setup(void)
+{
+	struct ifreq ifr = {
+		.ifr_name = "lo"
+	};
+	int ctl;
+
+	/* we need to set MTU, so do this in a namespace to play nicely */
+	if (unshare(CLONE_NEWNET) == -1)
+		error(KSFT_FAIL, errno, "unshare");
+
+	ctl = socket(AF_LOCAL, SOCK_STREAM, 0);
+	if (ctl == -1)
+		error(KSFT_FAIL, errno, "socket");
+
+	/* ensure MTU is smaller than what we plan to send */
+	ifr.ifr_mtu = MTU;
+	if (ioctl(ctl, SIOCSIFMTU, &ifr) == -1)
+		error(KSFT_FAIL, errno, "ioctl: set MTU");
+
+	/* bring up interface */
+	if (ioctl(ctl, SIOCGIFFLAGS, &ifr) == -1)
+		error(KSFT_FAIL, errno, "ioctl SIOCGIFFLAGS");
+	ifr.ifr_flags = ifr.ifr_flags | IFF_UP;
+	if (ioctl(ctl, SIOCSIFFLAGS, &ifr) == -1)
+		error(KSFT_FAIL, errno, "ioctl: bring interface up");
+
+	if (close(ctl) == -1)
+		error(KSFT_FAIL, errno, "close");
+}
+
+int main(void)
+{
+	struct in6_addr addr = {
+		.s6_addr[15] = 0x01,  /* ::1 */
+	};
+	struct sockaddr_in6 sa = {
+		.sin6_family = AF_INET6,
+		.sin6_addr = addr,
+		.sin6_port = htons(9) /* port 9/udp (DISCARD) */
+	};
+	static char buf[LARGER_THAN_MTU] = {0};
+	struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) };
+	struct msghdr msg = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+		.msg_name = (struct sockaddr *)&sa,
+		.msg_namelen = sizeof(sa),
+	};
+	ssize_t rc;
+	int s;
+
+	printf("Testing IPv6 fragmentation\n");
+	setup();
+	s = socket(AF_INET6, SOCK_DGRAM, 0);
+send_again:
+	rc = sendmsg(s, &msg, 0);
+	if (rc == -1) {
+		/* if interface wasn't ready, try again */
+		if (errno == EADDRNOTAVAIL) {
+			usleep(1000);
+			goto send_again;
+		}
+		error(KSFT_FAIL, errno, "sendmsg");
+	} else if (rc != LARGER_THAN_MTU) {
+		error(KSFT_FAIL, errno, "sendmsg returned %zi, expected %i",
+				rc, LARGER_THAN_MTU);
+	}
+	printf("[PASS] sendmsg() returned %zi\n", rc);
+	if (close(s) == -1)
+		error(KSFT_FAIL, errno, "close");
+	return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
index a6b2b1f9c641..c6866e42f95c 100755
--- a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
+++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
@@ -69,7 +69,6 @@
 # which can affect the conditions needed to trigger a soft lockup.
 
 source lib.sh
-source net_helper.sh
 
 TEST_DURATION=300
 ROUTING_TABLE_REFRESH_PERIOD=0.01
diff --git a/tools/testing/selftests/net/ipvtap_test.sh b/tools/testing/selftests/net/ipvtap_test.sh
new file mode 100755
index 000000000000..354ca7ce8584
--- /dev/null
+++ b/tools/testing/selftests/net/ipvtap_test.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Simple tests for ipvtap
+
+
+#
+# The testing environment looks this way:
+#
+# |------HNS-------|     |------PHY-------|
+# |      veth<----------------->veth      |
+# |------|--|------|     |----------------|
+#        |  |
+#        |  |            |-----TST0-------|
+#        |  |------------|----ipvlan      |
+#        |               |----------------|
+#        |
+#        |               |-----TST1-------|
+#        |---------------|----ipvlan      |
+#                        |----------------|
+#
+
+ALL_TESTS="
+	test_ip_set
+"
+
+source lib.sh
+
+DEBUG=0
+
+VETH_HOST=vethtst.h
+VETH_PHY=vethtst.p
+
+NS_COUNT=32
+IP_ITERATIONS=1024
+IPSET_TIMEOUT="60s"
+
+ns_run() {
+	ns=$1
+	shift
+	if [[ "$ns" == "global" ]]; then
+		"$@" >/dev/null
+	else
+		ip netns exec "$ns" "$@" >/dev/null
+	fi
+}
+
+test_ip_setup_env() {
+	setup_ns NS_PHY
+	setup_ns HST_NS
+
+	# setup simulated other-host (phy) and host itself
+	ns_run "$HST_NS" ip link add $VETH_HOST type veth peer name $VETH_PHY \
+		netns "$NS_PHY" >/dev/null
+	ns_run "$HST_NS" ip link set $VETH_HOST up
+	ns_run "$NS_PHY" ip link set $VETH_PHY up
+
+	for ((i=0; i<NS_COUNT; i++)); do
+		setup_ns ipvlan_ns_$i
+		ns="ipvlan_ns_$i"
+		if [ "$DEBUG" = "1" ]; then
+			echo "created NS ${!ns}"
+		fi
+		if ! ns_run "$HST_NS" ip link add netns ${!ns} ipvlan0 \
+		    link $VETH_HOST \
+		    type ipvtap mode l2 bridge; then
+			exit_error "FAIL: Failed to configure ipvlan link."
+		fi
+	done
+}
+
+test_ip_cleanup_env() {
+	ns_run "$HST_NS" ip link del $VETH_HOST
+	cleanup_all_ns
+}
+
+exit_error() {
+	echo "$1"
+	exit $ksft_fail
+}
+
+rnd() {
+	echo $(( RANDOM % 32 + 16 ))
+}
+
+test_ip_set_thread() {
+	# Here we are trying to create some IP conflicts between namespaces.
+	# If just add/remove IP, nothing interesting will happen.
+	# But if add random IP and then remove random IP,
+	# eventually conflicts start to apear.
+	ip link set ipvlan0 up
+	for ((i=0; i<IP_ITERATIONS; i++)); do
+		v=$(rnd)
+		ip a a "172.25.0.$v/24" dev ipvlan0 2>/dev/null
+		ip a a "fc00::$v/64" dev ipvlan0 2>/dev/null
+		v=$(rnd)
+		ip a d "172.25.0.$v/24" dev ipvlan0 2>/dev/null
+		ip a d "fc00::$v/64" dev ipvlan0 2>/dev/null
+	done
+}
+
+test_ip_set() {
+	RET=0
+
+	trap test_ip_cleanup_env EXIT
+
+	test_ip_setup_env
+
+	declare -A ns_pids
+	for ((i=0; i<NS_COUNT; i++)); do
+		ns="ipvlan_ns_$i"
+		ns_run ${!ns} timeout "$IPSET_TIMEOUT" \
+			bash -c "$0 test_ip_set_thread"&
+		ns_pids[$i]=$!
+	done
+
+	for ((i=0; i<NS_COUNT; i++)); do
+		wait "${ns_pids[$i]}"
+	done
+
+	declare -A all_ips
+	for ((i=0; i<NS_COUNT; i++)); do
+		ns="ipvlan_ns_$i"
+		ip_output=$(ip netns exec ${!ns} ip a l dev ipvlan0 | grep inet)
+		while IFS= read -r nsip_out; do
+			if [[ -z $nsip_out ]]; then
+				continue;
+			fi
+			nsip=$(awk '{print $2}' <<< "$nsip_out")
+			if [[ -v all_ips[$nsip] ]]; then
+				RET=$ksft_fail
+				log_test "conflict for $nsip"
+				return "$RET"
+			else
+				all_ips[$nsip]=$i
+			fi
+		done <<< "$ip_output"
+	done
+
+	if [ "$DEBUG" = "1" ]; then
+		for key in "${!all_ips[@]}"; do
+			echo "$key: ${all_ips[$key]}"
+		done
+	fi
+
+	trap - EXIT
+	test_ip_cleanup_env
+
+	log_test "test multithreaded ip set"
+}
+
+if [[ "$1" == "-d" ]]; then
+	DEBUG=1
+	shift
+fi
+
+if [[ "$1" == "-t" ]]; then
+	shift
+	TESTS="$*"
+fi
+
+if [[ "$1" == "test_ip_set_thread" ]]; then
+	test_ip_set_thread
+else
+	require_command ip
+
+	tests_run
+fi
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 0bd9a038a1f0..b3827b43782b 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -43,7 +43,7 @@ __ksft_status_merge()
 		weights[$i]=$((weight++))
 	done
 
-	if [[ ${weights[$a]} > ${weights[$b]} ]]; then
+	if [[ ${weights[$a]} -ge ${weights[$b]} ]]; then
 		echo "$a"
 		return 0
 	else
@@ -217,11 +217,74 @@ setup_ns()
 			return $ksft_skip
 		fi
 		ip -n "${!ns_name}" link set lo up
+		ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+		ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.default.rp_filter=0
 		ns_list+=("${!ns_name}")
 	done
 	NS_LIST+=("${ns_list[@]}")
 }
 
+in_all_ns()
+{
+	local ret=0
+	local ns_list=("${NS_LIST[@]}")
+
+	for ns in "${ns_list[@]}"; do
+		ip netns exec "${ns}" "$@"
+		(( ret = ret || $? ))
+	done
+
+	return "${ret}"
+}
+
+# Create netdevsim with given id and net namespace.
+create_netdevsim() {
+    local id="$1"
+    local ns="$2"
+
+    modprobe netdevsim &> /dev/null
+    udevadm settle
+
+    echo "$id 1" | ip netns exec $ns tee /sys/bus/netdevsim/new_device >/dev/null
+    local dev=$(ip netns exec $ns ls /sys/bus/netdevsim/devices/netdevsim$id/net)
+    ip -netns $ns link set dev $dev name nsim$id
+    ip -netns $ns link set dev nsim$id up
+
+    echo nsim$id
+}
+
+create_netdevsim_port() {
+    local nsim_id="$1"
+    local ns="$2"
+    local port_id="$3"
+    local perm_addr="$4"
+    local orig_dev
+    local new_dev
+    local nsim_path
+
+    nsim_path="/sys/bus/netdevsim/devices/netdevsim$nsim_id"
+
+    echo "$port_id $perm_addr" | ip netns exec "$ns" tee "$nsim_path"/new_port > /dev/null || return 1
+
+    orig_dev=$(ip netns exec "$ns" find "$nsim_path"/net/ -maxdepth 1 -name 'e*' | tail -n 1)
+    orig_dev=$(basename "$orig_dev")
+    new_dev="nsim${nsim_id}p$port_id"
+
+    ip -netns "$ns" link set dev "$orig_dev" name "$new_dev"
+    ip -netns "$ns" link set dev "$new_dev" up
+
+    echo "$new_dev"
+}
+
+# Remove netdevsim with given id.
+cleanup_netdevsim() {
+    local id="$1"
+
+    if [ -d "/sys/bus/netdevsim/devices/netdevsim$id/net" ]; then
+        echo "$id" > /sys/bus/netdevsim/del_device
+    fi
+}
+
 tc_rule_stats_get()
 {
 	local dev=$1; shift
@@ -230,7 +293,8 @@ tc_rule_stats_get()
 	local selector=${1:-.packets}; shift
 
 	tc -j -s filter show dev $dev $dir pref $pref \
-	    | jq ".[1].options.actions[].stats$selector"
+	    | jq ".[] | select(.options.actions) |
+		  .options.actions[].stats$selector"
 }
 
 tc_rule_handle_stats_get()
@@ -245,6 +309,30 @@ tc_rule_handle_stats_get()
 		  .options.actions[0].stats$selector"
 }
 
+# attach a qdisc with two children match/no-match and a flower filter to match
+tc_set_flower_counter() {
+	local -r ns=$1
+	local -r ipver=$2
+	local -r dev=$3
+	local -r flower_expr=$4
+
+	tc -n $ns qdisc add dev $dev root handle 1: prio bands 2 \
+			priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+	tc -n $ns qdisc add dev $dev parent 1:1 handle 11: pfifo
+	tc -n $ns qdisc add dev $dev parent 1:2 handle 12: pfifo
+
+	tc -n $ns filter add dev $dev parent 1: protocol ipv$ipver \
+			flower $flower_expr classid 1:2
+}
+
+tc_get_flower_counter() {
+	local -r ns=$1
+	local -r dev=$2
+
+	tc -n $ns -j -s qdisc show dev $dev handle 12: | jq .[0].packets
+}
+
 ret_set_ksft_status()
 {
 	local ksft_status=$1; shift
@@ -261,7 +349,7 @@ log_test_result()
 	local test_name=$1; shift
 	local opt_str=$1; shift
 	local result=$1; shift
-	local retmsg=$1; shift
+	local retmsg=$1
 
 	printf "TEST: %-60s  [%s]\n" "$test_name $opt_str" "$result"
 	if [[ $retmsg ]]; then
@@ -439,7 +527,8 @@ mac_get()
 {
 	local if_name=$1
 
-	ip -j link show dev $if_name | jq -r '.[]["address"]'
+	run_on "$if_name" \
+		ip -j link show dev "$if_name" | jq -r '.[]["address"]'
 }
 
 kill_process()
@@ -450,78 +539,153 @@ kill_process()
 	{ kill $pid && wait $pid; } 2>/dev/null
 }
 
-ip_link_add()
+check_command()
+{
+	local cmd=$1; shift
+
+	if [[ ! -x "$(command -v "$cmd")" ]]; then
+		log_test_skip "$cmd not installed"
+		return $EXIT_STATUS
+	fi
+}
+
+require_command()
+{
+	local cmd=$1; shift
+
+	if ! check_command "$cmd"; then
+		exit $EXIT_STATUS
+	fi
+}
+
+adf_ip_link_add()
 {
 	local name=$1; shift
 
-	ip link add name "$name" "$@"
-	defer ip link del dev "$name"
+	ip link add name "$name" "$@" && \
+		defer ip link del dev "$name"
 }
 
-ip_link_set_master()
+adf_ip_link_set_master()
 {
 	local member=$1; shift
 	local master=$1; shift
 
-	ip link set dev "$member" master "$master"
-	defer ip link set dev "$member" nomaster
+	ip link set dev "$member" master "$master" && \
+		defer ip link set dev "$member" nomaster
 }
 
-ip_link_set_addr()
+adf_ip_link_set_addr()
 {
 	local name=$1; shift
 	local addr=$1; shift
 
 	local old_addr=$(mac_get "$name")
-	ip link set dev "$name" address "$addr"
-	defer ip link set dev "$name" address "$old_addr"
+	ip link set dev "$name" address "$addr" && \
+		defer ip link set dev "$name" address "$old_addr"
 }
 
-ip_link_is_up()
+ip_link_has_flag()
 {
 	local name=$1; shift
+	local flag=$1; shift
 
 	local state=$(ip -j link show "$name" |
-		      jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"')
-	[[ $state == "UP" ]]
+		      jq --arg flag "$flag" 'any(.[].flags[]; . == $flag)')
+	[[ $state == true ]]
+}
+
+ip_link_is_up()
+{
+	ip_link_has_flag "$1" UP
 }
 
-ip_link_set_up()
+adf_ip_link_set_up()
 {
 	local name=$1; shift
 
 	if ! ip_link_is_up "$name"; then
-		ip link set dev "$name" up
-		defer ip link set dev "$name" down
+		ip link set dev "$name" up && \
+			defer ip link set dev "$name" down
 	fi
 }
 
-ip_link_set_down()
+adf_ip_link_set_down()
 {
 	local name=$1; shift
 
 	if ip_link_is_up "$name"; then
-		ip link set dev "$name" down
-		defer ip link set dev "$name" up
+		ip link set dev "$name" down && \
+			defer ip link set dev "$name" up
 	fi
 }
 
-ip_addr_add()
+adf_ip_addr_add()
 {
 	local name=$1; shift
 
-	ip addr add dev "$name" "$@"
-	defer ip addr del dev "$name" "$@"
+	ip addr add dev "$name" "$@" && \
+		defer ip addr del dev "$name" "$@"
+}
+
+adf_ip_route_add()
+{
+	ip route add "$@" && \
+		defer ip route del "$@"
+}
+
+adf_bridge_vlan_add()
+{
+	bridge vlan add "$@" && \
+		defer bridge vlan del "$@"
+}
+
+wait_local_port_listen()
+{
+	local listener_ns="${1}"
+	local port="${2}"
+	local protocol="${3}"
+	local pattern
+	local i
+
+	pattern=":$(printf "%04X" "${port}") "
+
+	# for tcp protocol additionally check the socket state
+	[ ${protocol} = "tcp" ] && pattern="${pattern}0A"
+	for i in $(seq 10); do
+		if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
+		   /proc/net/"${protocol}"* | grep -q "${pattern}"; then
+			break
+		fi
+		sleep 0.1
+	done
 }
 
-ip_route_add()
+cmd_jq()
 {
-	ip route add "$@"
-	defer ip route del "$@"
+	local cmd=$1
+	local jq_exp=$2
+	local jq_opts=$3
+	local ret
+	local output
+
+	output="$($cmd)"
+	# it the command fails, return error right away
+	ret=$?
+	if [[ $ret -ne 0 ]]; then
+		return $ret
+	fi
+	output=$(echo $output | jq -r $jq_opts "$jq_exp")
+	ret=$?
+	if [[ $ret -ne 0 ]]; then
+		return $ret
+	fi
+	echo $output
+	# return success only in case of non-empty output
+	[ ! -z "$output" ]
 }
 
-bridge_vlan_add()
+run_on()
 {
-	bridge vlan add "$@"
-	defer bridge vlan del "$@"
+	shift; "$@"
 }
diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore
index 1ebc6187f421..6cd2b762af5d 100644
--- a/tools/testing/selftests/net/lib/.gitignore
+++ b/tools/testing/selftests/net/lib/.gitignore
@@ -1,2 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 csum
+gro
+xdp_helper
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index c22623b9a2a5..ff83603397d0 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -5,11 +5,18 @@ CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES)
 # Additional include paths needed by kselftest.h
 CFLAGS += -I../../
 
-TEST_FILES := ../../../../../Documentation/netlink/specs
-TEST_FILES += ../../../../net/ynl
+TEST_FILES := \
+	../../../../net/ynl \
+	../../../../../Documentation/netlink/specs \
+	ksft_setup_loopback.sh \
+# end of TEST_FILES
 
-TEST_GEN_FILES += csum
-TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
+TEST_GEN_FILES := \
+	$(patsubst %.c,%.o,$(wildcard *.bpf.c)) \
+	csum \
+	gro \
+	xdp_helper \
+# end of TEST_GEN_FILES
 
 TEST_INCLUDES := $(wildcard py/*.py sh/*.sh)
 
diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c
index 27437590eeb5..e28884ce3ab3 100644
--- a/tools/testing/selftests/net/lib/csum.c
+++ b/tools/testing/selftests/net/lib/csum.c
@@ -707,7 +707,7 @@ static uint32_t recv_get_packet_csum_status(struct msghdr *msg)
 			      cm->cmsg_level, cm->cmsg_type);
 
 		if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata)))
-			error(1, 0, "cmsg: len=%lu expected=%lu",
+			error(1, 0, "cmsg: len=%zu expected=%zu",
 			      cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata)));
 
 		aux = (void *)CMSG_DATA(cm);
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/lib/gro.c
index b2184847e388..11b16ae5f0e8 100644
--- a/tools/testing/selftests/net/gro.c
+++ b/tools/testing/selftests/net/lib/gro.c
@@ -3,26 +3,52 @@
  * This testsuite provides conformance testing for GRO coalescing.
  *
  * Test cases:
- * 1.data
+ *
+ * data_*:
  *  Data packets of the same size and same header setup with correct
  *  sequence numbers coalesce. The one exception being the last data
  *  packet coalesced: it can be smaller than the rest and coalesced
  *  as long as it is in the same flow.
- * 2.ack
+ *   - data_same:    same size packets coalesce
+ *   - data_lrg_sml:   large then small coalesces
+ *   - data_lrg_1byte: large then 1 byte coalesces (Ethernet padding)
+ *   - data_sml_lrg:   small then large doesn't coalesce
+ *   - data_burst:   two bursts of two, separated by 100ms
+ *
+ * ack:
  *  Pure ACK does not coalesce.
- * 3.flags
- *  Specific test cases: no packets with PSH, SYN, URG, RST set will
- *  be coalesced.
- * 4.tcp
+ *
+ * flags_*:
+ *  No packets with PSH, SYN, URG, RST, CWR set will be coalesced.
+ *   - flags_psh, flags_syn, flags_rst, flags_urg, flags_cwr
+ *
+ * tcp_*:
  *  Packets with incorrect checksum, non-consecutive seqno and
  *  different TCP header options shouldn't coalesce. Nit: given that
  *  some extension headers have paddings, such as timestamp, headers
- *  that are padding differently would not be coalesced.
- * 5.ip:
- *  Packets with different (ECN, TTL, TOS) header, ip options or
- *  ip fragments (ipv6) shouldn't coalesce.
- * 6.large:
+ *  that are padded differently would not be coalesced.
+ *   - tcp_csum: incorrect checksum
+ *   - tcp_seq:  non-consecutive sequence numbers
+ *   - tcp_ts:   different timestamps
+ *   - tcp_opt:  different TCP options
+ *
+ * ip_*:
+ *  Packets with different (ECN, TTL, TOS) header, IP options or
+ *  IP fragments shouldn't coalesce.
+ *   - ip_ecn, ip_tos:            shared between IPv4/IPv6
+ *   - ip_csum:                   IPv4 only, bad IP header checksum
+ *   - ip_ttl, ip_opt, ip_frag4:  IPv4 only
+ *   - ip_id_df*:                 IPv4 IP ID field coalescing tests
+ *   - ip_frag6, ip_v6ext_*:      IPv6 only
+ *
+ * large_*:
  *  Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *   - large_max: exceeding max size
+ *   - large_rem: remainder handling
+ *
+ * single, capacity:
+ *  Boring cases used to test coalescing machinery itself and stats
+ *  more than protocol behavior.
  *
  * MSS is defined as 4096 - header because if it is too small
  * (i.e. 1500 MTU - header), it will result in many packets,
@@ -44,6 +70,7 @@
 #include <linux/filter.h>
 #include <linux/if_packet.h>
 #include <linux/ipv6.h>
+#include <linux/net_tstamp.h>
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <netinet/in.h>
@@ -55,9 +82,11 @@
 #include <stdio.h>
 #include <stdarg.h>
 #include <string.h>
+#include <time.h>
 #include <unistd.h>
 
-#include "../kselftest.h"
+#include "kselftest.h"
+#include "ksft.h"
 
 #define DPORT 8000
 #define SPORT 1500
@@ -66,11 +95,12 @@
 #define START_SEQ 100
 #define START_ACK 100
 #define ETH_P_NONE 0
-#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
-#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
-#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
-#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
-#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define ASSUMED_MTU 4096
+#define MAX_MSS (ASSUMED_MTU - sizeof(struct iphdr) - sizeof(struct tcphdr))
+#define MAX_HDR_LEN \
+	(ETH_HLEN + sizeof(struct ipv6hdr) * 2 + sizeof(struct tcphdr))
+#define MAX_LARGE_PKT_CNT ((IP_MAXPACKET - (MAX_HDR_LEN - ETH_HLEN)) /	\
+			   (ASSUMED_MTU - (MAX_HDR_LEN - ETH_HLEN)))
 #define MIN_EXTHDR_SIZE 8
 #define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00"
 #define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11"
@@ -78,6 +108,15 @@
 #define ipv6_optlen(p)  (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
 
+enum flush_id_case {
+	FLUSH_ID_DF1_INC,
+	FLUSH_ID_DF1_FIXED,
+	FLUSH_ID_DF0_INC,
+	FLUSH_ID_DF0_FIXED,
+	FLUSH_ID_DF1_INC_FIXED,
+	FLUSH_ID_DF1_FIXED_INC,
+};
+
 static const char *addr6_src = "fdaa::2";
 static const char *addr6_dst = "fdaa::1";
 static const char *addr4_src = "192.168.1.200";
@@ -93,7 +132,33 @@ static bool tx_socket = true;
 static int tcp_offset = -1;
 static int total_hdr_len = -1;
 static int ethhdr_proto = -1;
-static const int num_flush_id_cases = 6;
+static bool ipip;
+static bool ip6ip6;
+static uint64_t txtime_ns;
+static int num_flows = 4;
+static bool order_check;
+
+#define CAPACITY_PAYLOAD_LEN 200
+
+#define TXTIME_DELAY_MS 5
+
+/* Max TCP payload that GRO will coalesce. The outer header overhead
+ * varies by encapsulation, reducing the effective max payload.
+ */
+static int max_payload(void)
+{
+	return IP_MAXPACKET - (total_hdr_len - ETH_HLEN);
+}
+
+static int calc_mss(void)
+{
+	return ASSUMED_MTU - (total_hdr_len - ETH_HLEN);
+}
+
+static int num_large_pkt(void)
+{
+	return max_payload() / calc_mss();
+}
 
 static void vlog(const char *fmt, ...)
 {
@@ -112,30 +177,30 @@ static void setup_sock_filter(int fd)
 	const int ethproto_off = offsetof(struct ethhdr, h_proto);
 	int optlen = 0;
 	int ipproto_off, opt_ipproto_off;
-	int next_off;
 
 	if (proto == PF_INET)
-		next_off = offsetof(struct iphdr, protocol);
+		ipproto_off = tcp_offset - sizeof(struct iphdr) +
+			      offsetof(struct iphdr, protocol);
 	else
-		next_off = offsetof(struct ipv6hdr, nexthdr);
-	ipproto_off = ETH_HLEN + next_off;
+		ipproto_off = tcp_offset - sizeof(struct ipv6hdr) +
+			      offsetof(struct ipv6hdr, nexthdr);
 
 	/* Overridden later if exthdrs are used: */
 	opt_ipproto_off = ipproto_off;
 
-	if (strcmp(testname, "ip") == 0) {
-		if (proto == PF_INET)
-			optlen = sizeof(struct ip_timestamp);
-		else {
-			BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
-			BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
-			BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
-
-			/* same size for HBH and Fragment extension header types */
-			optlen = MIN_EXTHDR_SIZE;
-			opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
-				+ offsetof(struct ip6_ext, ip6e_nxt);
-		}
+	if (strcmp(testname, "ip_opt") == 0) {
+		optlen = sizeof(struct ip_timestamp);
+	} else if (strcmp(testname, "ip_frag6") == 0 ||
+		   strcmp(testname, "ip_v6ext_same") == 0 ||
+		   strcmp(testname, "ip_v6ext_diff") == 0) {
+		BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
+		BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
+		BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
+
+		/* same size for HBH and Fragment extension header types */
+		optlen = MIN_EXTHDR_SIZE;
+		opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
+			+ offsetof(struct ip6_ext, ip6e_nxt);
 	}
 
 	/* this filter validates the following:
@@ -244,7 +309,7 @@ static void fill_datalinklayer(void *buf)
 	eth->h_proto = ethhdr_proto;
 }
 
-static void fill_networklayer(void *buf, int payload_len)
+static void fill_networklayer(void *buf, int payload_len, int protocol)
 {
 	struct ipv6hdr *ip6h = buf;
 	struct iphdr *iph = buf;
@@ -254,7 +319,7 @@ static void fill_networklayer(void *buf, int payload_len)
 
 		ip6h->version = 6;
 		ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
-		ip6h->nexthdr = IPPROTO_TCP;
+		ip6h->nexthdr = protocol;
 		ip6h->hop_limit = 8;
 		if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1)
 			error(1, errno, "inet_pton source ip6");
@@ -266,7 +331,7 @@ static void fill_networklayer(void *buf, int payload_len)
 		iph->version = 4;
 		iph->ihl = 5;
 		iph->ttl = 8;
-		iph->protocol	= IPPROTO_TCP;
+		iph->protocol	= protocol;
 		iph->tot_len = htons(sizeof(struct tcphdr) +
 				payload_len + sizeof(struct iphdr));
 		iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
@@ -299,52 +364,155 @@ static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
 
 static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
 {
+	char control[CMSG_SPACE(sizeof(uint64_t))];
+	struct msghdr msg = {};
+	struct iovec iov = {};
+	struct cmsghdr *cm;
 	int ret = -1;
 
-	ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+	iov.iov_base = buf;
+	iov.iov_len = len;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_name = daddr;
+	msg.msg_namelen = sizeof(*daddr);
+
+	if (txtime_ns) {
+		memset(control, 0, sizeof(control));
+		msg.msg_control = control;
+		msg.msg_controllen = sizeof(control);
+
+		cm = CMSG_FIRSTHDR(&msg);
+		cm->cmsg_level = SOL_SOCKET;
+		cm->cmsg_type = SCM_TXTIME;
+		cm->cmsg_len = CMSG_LEN(sizeof(uint64_t));
+		memcpy(CMSG_DATA(cm), &txtime_ns, sizeof(txtime_ns));
+	}
+
+	ret = sendmsg(fd, &msg, 0);
 	if (ret == -1)
-		error(1, errno, "sendto failure");
+		error(1, errno, "sendmsg failure");
 	if (ret != len)
-		error(1, errno, "sendto wrong length");
+		error(1, 0, "sendmsg wrong length: %d vs %d", ret, len);
 }
 
 static void create_packet(void *buf, int seq_offset, int ack_offset,
 			  int payload_len, int fin)
 {
+	int ip_hdr_len = (proto == PF_INET) ?
+			 sizeof(struct iphdr) : sizeof(struct ipv6hdr);
+	int inner_ip_off = tcp_offset - ip_hdr_len;
+
 	memset(buf, 0, total_hdr_len);
 	memset(buf + total_hdr_len, 'a', payload_len);
+
 	fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
 			    payload_len, fin);
-	fill_networklayer(buf + ETH_HLEN, payload_len);
+
+	fill_networklayer(buf + inner_ip_off, payload_len, IPPROTO_TCP);
+	if (inner_ip_off > ETH_HLEN) {
+		int encap_proto = (proto == PF_INET) ?
+				  IPPROTO_IPIP : IPPROTO_IPV6;
+
+		fill_networklayer(buf + ETH_HLEN,
+				  payload_len + ip_hdr_len, encap_proto);
+	}
+
 	fill_datalinklayer(buf);
 }
 
-/* send one extra flag, not first and not last pkt */
-static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
-		       int rst, int urg)
+static void create_capacity_packet(void *buf, int flow_id, int pkt_idx, int psh)
 {
-	static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
-	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
-	int payload_len, pkt_size, flag, i;
+	int seq_offset = pkt_idx * CAPACITY_PAYLOAD_LEN;
 	struct tcphdr *tcph;
 
-	payload_len = PAYLOAD_LEN * psh;
-	pkt_size = total_hdr_len + payload_len;
-	flag = NUM_PACKETS / 2;
+	create_packet(buf, seq_offset, 0, CAPACITY_PAYLOAD_LEN, 0);
 
-	create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+	/* Customize for this flow id */
+	memset(buf + total_hdr_len, 'a' + flow_id, CAPACITY_PAYLOAD_LEN);
 
-	tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+	tcph = buf + tcp_offset;
+	tcph->source = htons(SPORT + flow_id);
+	tcph->psh = psh;
+	tcph->check = 0;
+	tcph->check = tcp_checksum(tcph, CAPACITY_PAYLOAD_LEN);
+}
+
+/* Send a capacity test, 2 packets per flow, all first packets then all second:
+ *  A1 B1 C1 D1 ... A2 B2 C2 D2 ...
+ */
+static void send_capacity(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + CAPACITY_PAYLOAD_LEN];
+	int pkt_size = total_hdr_len + CAPACITY_PAYLOAD_LEN;
+	int i;
+
+	/* Send first packet of each flow (no PSH) */
+	for (i = 0; i < num_flows; i++) {
+		create_capacity_packet(buf, i, 0, 0);
+		write_packet(fd, buf, pkt_size, daddr);
+	}
+
+	/* Send second packet of each flow (with PSH to flush) */
+	for (i = 0; i < num_flows; i++) {
+		create_capacity_packet(buf, i, 1, 1);
+		write_packet(fd, buf, pkt_size, daddr);
+	}
+}
+
+#ifndef TH_CWR
+#define TH_CWR 0x80
+#endif
+static void set_flags(struct tcphdr *tcph, int payload_len, int psh, int syn,
+		      int rst, int urg, int cwr)
+{
 	tcph->psh = psh;
 	tcph->syn = syn;
 	tcph->rst = rst;
 	tcph->urg = urg;
+	if (cwr)
+		tcph->th_flags |= TH_CWR;
+	else
+		tcph->th_flags &= ~TH_CWR;
 	tcph->check = 0;
 	tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+/* send extra flags of the (NUM_PACKETS / 2) and (NUM_PACKETS / 2 - 1)
+ * pkts, not first and not last pkt
+ */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+		       int rst, int urg, int cwr)
+{
+	static char flag_buf[2][MAX_HDR_LEN + PAYLOAD_LEN];
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	int payload_len, pkt_size, i;
+	struct tcphdr *tcph;
+	int flag[2];
+
+	payload_len = PAYLOAD_LEN * (psh || cwr);
+	pkt_size = total_hdr_len + payload_len;
+	flag[0] = NUM_PACKETS / 2;
+	flag[1] = NUM_PACKETS / 2 - 1;
+
+	/* Create and configure packets with flags
+	 */
+	for (i = 0; i < 2; i++) {
+		if (flag[i] > 0) {
+			create_packet(flag_buf[i], flag[i] * payload_len, 0,
+				      payload_len, 0);
+			tcph = (struct tcphdr *)(flag_buf[i] + tcp_offset);
+			set_flags(tcph, payload_len, psh, syn, rst, urg, cwr);
+		}
+	}
 
 	for (i = 0; i < NUM_PACKETS + 1; i++) {
-		if (i == flag) {
-			write_packet(fd, flag_buf, pkt_size, daddr);
+		if (i == flag[0]) {
+			write_packet(fd, flag_buf[0], pkt_size, daddr);
+			continue;
+		} else if (i == flag[1] && cwr) {
+			write_packet(fd, flag_buf[1], pkt_size, daddr);
 			continue;
 		}
 		create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
@@ -371,18 +539,20 @@ static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
  */
 static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
 {
-	static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
-	static char last[TOTAL_HDR_LEN + MSS];
-	static char new_seg[TOTAL_HDR_LEN + MSS];
+	static char pkts[MAX_LARGE_PKT_CNT][MAX_HDR_LEN + MAX_MSS];
+	static char new_seg[MAX_HDR_LEN + MAX_MSS];
+	static char last[MAX_HDR_LEN + MAX_MSS];
+	const int num_pkt = num_large_pkt();
+	const int mss = calc_mss();
 	int i;
 
-	for (i = 0; i < NUM_LARGE_PKT; i++)
-		create_packet(pkts[i], i * MSS, 0, MSS, 0);
-	create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
-	create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+	for (i = 0; i < num_pkt; i++)
+		create_packet(pkts[i], i * mss, 0, mss, 0);
+	create_packet(last, num_pkt * mss, 0, remainder, 0);
+	create_packet(new_seg, (num_pkt + 1) * mss, 0, remainder, 0);
 
-	for (i = 0; i < NUM_LARGE_PKT; i++)
-		write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+	for (i = 0; i < num_pkt; i++)
+		write_packet(fd, pkts[i], total_hdr_len + mss, daddr);
 	write_packet(fd, last, total_hdr_len + remainder, daddr);
 	write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
 }
@@ -402,8 +572,7 @@ static void send_ack(int fd, struct sockaddr_ll *daddr)
 static void recompute_packet(char *buf, char *no_ext, int extlen)
 {
 	struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
-	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
-	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+	int off;
 
 	memmove(buf, no_ext, total_hdr_len);
 	memmove(buf + total_hdr_len + extlen,
@@ -413,11 +582,22 @@ static void recompute_packet(char *buf, char *no_ext, int extlen)
 	tcphdr->check = 0;
 	tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
 	if (proto == PF_INET) {
-		iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
-		iph->check = 0;
-		iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+		for (off = ETH_HLEN; off < tcp_offset;
+		     off += sizeof(struct iphdr)) {
+			struct iphdr *iph = (struct iphdr *)(buf + off);
+
+			iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+			iph->check = 0;
+			iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+		}
 	} else {
-		ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+		for (off = ETH_HLEN; off < tcp_offset;
+		     off += sizeof(struct ipv6hdr)) {
+			struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + off);
+
+			ip6h->payload_len =
+				htons(ntohs(ip6h->payload_len) + extlen);
+		}
 	}
 }
 
@@ -506,6 +686,24 @@ static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
 	write_packet(fd, buf, pkt_size, daddr);
 }
 
+/* Packets with incorrect IPv4 header checksum don't coalesce. */
+static void send_changed_ip_checksum(int fd, struct sockaddr_ll *daddr)
+{
+	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+	int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+	iph->check = iph->check - 1;
+	write_packet(fd, buf, pkt_size, daddr);
+
+	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+	write_packet(fd, buf, pkt_size, daddr);
+}
+
  /* Packets with non-consecutive sequence number don't coalesce.*/
 static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
 {
@@ -627,7 +825,8 @@ static void fix_ip4_checksum(struct iphdr *iph)
 	iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
 }
 
-static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
+static void send_flush_id_case(int fd, struct sockaddr_ll *daddr,
+			       enum flush_id_case tcase)
 {
 	static char buf1[MAX_HDR_LEN + PAYLOAD_LEN];
 	static char buf2[MAX_HDR_LEN + PAYLOAD_LEN];
@@ -646,7 +845,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
 	create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
 
 	switch (tcase) {
-	case 0: /* DF=1, Incrementing - should coalesce */
+	case FLUSH_ID_DF1_INC: /* DF=1, Incrementing - should coalesce */
 		iph1->frag_off |= htons(IP_DF);
 		iph1->id = htons(8);
 
@@ -654,7 +853,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
 		iph2->id = htons(9);
 		break;
 
-	case 1: /* DF=1, Fixed - should coalesce */
+	case FLUSH_ID_DF1_FIXED: /* DF=1, Fixed - should coalesce */
 		iph1->frag_off |= htons(IP_DF);
 		iph1->id = htons(8);
 
@@ -662,7 +861,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
 		iph2->id = htons(8);
 		break;
 
-	case 2: /* DF=0, Incrementing - should coalesce */
+	case FLUSH_ID_DF0_INC: /* DF=0, Incrementing - should coalesce */
 		iph1->frag_off &= ~htons(IP_DF);
 		iph1->id = htons(8);
 
@@ -670,7 +869,7 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
 		iph2->id = htons(9);
 		break;
 
-	case 3: /* DF=0, Fixed - should not coalesce */
+	case FLUSH_ID_DF0_FIXED: /* DF=0, Fixed - should coalesce */
 		iph1->frag_off &= ~htons(IP_DF);
 		iph1->id = htons(8);
 
@@ -678,9 +877,10 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
 		iph2->id = htons(8);
 		break;
 
-	case 4: /* DF=1, two packets incrementing, and one fixed - should
-		 * coalesce only the first two packets
-		 */
+	case FLUSH_ID_DF1_INC_FIXED: /* DF=1, two packets incrementing, and
+				      * one fixed - should coalesce only the
+				      * first two packets
+				      */
 		iph1->frag_off |= htons(IP_DF);
 		iph1->id = htons(8);
 
@@ -692,9 +892,10 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
 		send_three = true;
 		break;
 
-	case 5: /* DF=1, two packets fixed, and one incrementing - should
-		 * coalesce only the first two packets
-		 */
+	case FLUSH_ID_DF1_FIXED_INC: /* DF=1, two packets fixed, and one
+				      * incrementing - should coalesce only
+				      * the first two packets
+				      */
 		iph1->frag_off |= htons(IP_DF);
 		iph1->id = htons(8);
 
@@ -718,27 +919,17 @@ static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
 	}
 }
 
-static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt)
-{
-	for (int i = 0; i < num_flush_id_cases; i++) {
-		sleep(1);
-		send_flush_id_case(fd, daddr, i);
-		sleep(1);
-		write_packet(fd, fin_pkt, total_hdr_len, daddr);
-	}
-}
-
 static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
 {
 	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
 	static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
 
 	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
-	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1);
+	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1);
 	write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
 
 	create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
-	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2);
+	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2);
 	write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
 }
 
@@ -777,7 +968,7 @@ static void send_fragment4(int fd, struct sockaddr_ll *daddr)
 	 */
 	memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
 	fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
-	fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
+	fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN, IPPROTO_TCP);
 	fill_datalinklayer(buf);
 
 	iph->frag_off = htons(0x6000); // DF = 1, MF = 1
@@ -905,6 +1096,28 @@ static void set_timeout(int fd)
 		error(1, errno, "cannot set timeout, setsockopt failed");
 }
 
+static void set_rcvbuf(int fd)
+{
+	int bufsize = 1 * 1024 * 1024; /* 1 MB */
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)))
+		error(1, errno, "cannot set rcvbuf size, setsockopt failed");
+}
+
+static void recv_error(int fd, int rcv_errno)
+{
+	struct tpacket_stats stats;
+	socklen_t len;
+
+	len = sizeof(stats);
+	if (getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, &stats, &len))
+		error(1, errno, "can't get stats");
+
+	fprintf(stderr, "Socket stats: packets=%u, drops=%u\n",
+		stats.tp_packets, stats.tp_drops);
+	error(1, rcv_errno, "could not receive");
+}
+
 static void check_recv_pkts(int fd, int *correct_payload,
 			    int correct_num_pkts)
 {
@@ -929,11 +1142,12 @@ static void check_recv_pkts(int fd, int *correct_payload,
 		ip_ext_len = 0;
 		pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
 		if (pkt_size < 0)
-			error(1, errno, "could not receive");
+			recv_error(fd, errno);
 
 		if (iph->version == 4)
 			ip_ext_len = (iph->ihl - 5) * 4;
-		else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+		else if (ip6h->version == 6 && !ip6ip6 &&
+			 ip6h->nexthdr != IPPROTO_TCP)
 			ip_ext_len = MIN_EXTHDR_SIZE;
 
 		tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
@@ -967,8 +1181,130 @@ static void check_recv_pkts(int fd, int *correct_payload,
 	printf("Test succeeded\n\n");
 }
 
+static void check_capacity_pkts(int fd)
+{
+	static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+	struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+	int num_pkt = 0, num_coal = 0, pkt_idx;
+	const char *fail_reason = NULL;
+	int flow_order[num_flows * 2];
+	int coalesced[num_flows];
+	struct tcphdr *tcph;
+	int ip_ext_len = 0;
+	int total_data = 0;
+	int pkt_size = -1;
+	int data_len = 0;
+	int flow_id;
+	int sport;
+
+	memset(coalesced, 0, sizeof(coalesced));
+	memset(flow_order, -1, sizeof(flow_order));
+
+	while (1) {
+		ip_ext_len = 0;
+		pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+		if (pkt_size < 0)
+			recv_error(fd, errno);
+
+		if (iph->version == 4)
+			ip_ext_len = (iph->ihl - 5) * 4;
+		else if (ip6h->version == 6 && !ip6ip6 &&
+			 ip6h->nexthdr != IPPROTO_TCP)
+			ip_ext_len = MIN_EXTHDR_SIZE;
+
+		tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+		if (tcph->fin)
+			break;
+
+		sport = ntohs(tcph->source);
+		flow_id = sport - SPORT;
+
+		if (flow_id < 0 || flow_id >= num_flows) {
+			vlog("Invalid flow_id %d from sport %d\n",
+			     flow_id, sport);
+			fail_reason = fail_reason ?: "invalid packet";
+			continue;
+		}
+
+		/* Calculate payload length */
+		if (pkt_size == ETH_ZLEN && iph->version == 4) {
+			data_len = ntohs(iph->tot_len)
+				- sizeof(struct tcphdr) - sizeof(struct iphdr);
+		} else {
+			data_len = pkt_size - total_hdr_len - ip_ext_len;
+		}
+
+		if (num_pkt < num_flows * 2) {
+			flow_order[num_pkt] = flow_id;
+		} else if (num_pkt == num_flows * 2) {
+			vlog("More packets than expected (%d)\n",
+			     num_flows * 2);
+			fail_reason = fail_reason ?: "too many packets";
+		}
+		coalesced[flow_id] = data_len;
+
+		if (data_len == CAPACITY_PAYLOAD_LEN * 2) {
+			num_coal++;
+		} else {
+			vlog("Pkt %d: flow %d, sport %d, len %d (expected %d)\n",
+			     num_pkt, flow_id, sport, data_len,
+			     CAPACITY_PAYLOAD_LEN * 2);
+			fail_reason = fail_reason ?: "not coalesced";
+		}
+
+		num_pkt++;
+		total_data += data_len;
+	}
+
+	/* Check flow ordering. We expect to see all non-coalesced first segs
+	 * then interleaved coalesced and non-coalesced second frames.
+	 */
+	pkt_idx = 0;
+	for (flow_id = 0; order_check && flow_id < num_flows; flow_id++) {
+		bool coaled = coalesced[flow_id] > CAPACITY_PAYLOAD_LEN;
+
+		if (coaled)
+			continue;
+
+		if (flow_order[pkt_idx] != flow_id) {
+			vlog("Flow order mismatch (non-coalesced) at position %d: expected flow %d, got flow %d\n",
+			     pkt_idx, flow_id, flow_order[pkt_idx]);
+			fail_reason = fail_reason ?: "bad packet order (1)";
+		}
+		pkt_idx++;
+	}
+	for (flow_id = 0; order_check && flow_id < num_flows; flow_id++) {
+		bool coaled = coalesced[flow_id] > CAPACITY_PAYLOAD_LEN;
+
+		if (flow_order[pkt_idx] != flow_id) {
+			vlog("Flow order mismatch at position %d: expected flow %d, got flow %d, coalesced: %d\n",
+			     pkt_idx, flow_id, flow_order[pkt_idx], coaled);
+			fail_reason = fail_reason ?: "bad packet order (2)";
+		}
+		pkt_idx++;
+	}
+
+	if (!fail_reason) {
+		vlog("All %d flows coalesced correctly\n", num_flows);
+		printf("Test succeeded\n\n");
+	} else {
+		printf("FAILED\n");
+	}
+
+	/* Always print stats for external validation */
+	printf("STATS: received=%d wire=%d coalesced=%d\n",
+	       num_pkt, num_pkt + num_coal, num_coal);
+
+	if (fail_reason)
+		error(1, 0, "capacity test failed %s", fail_reason);
+}
+
 static void gro_sender(void)
 {
+	int bufsize = 4 * 1024 * 1024; /* 4 MB */
+	const int fin_delay_us = 100 * 1000;
 	static char fin_pkt[MAX_HDR_LEN];
 	struct sockaddr_ll daddr = {};
 	int txfd = -1;
@@ -977,6 +1313,27 @@ static void gro_sender(void)
 	if (txfd < 0)
 		error(1, errno, "socket creation");
 
+	if (setsockopt(txfd, SOL_SOCKET, SO_SNDBUF, &bufsize, sizeof(bufsize)))
+		error(1, errno, "cannot set sndbuf size, setsockopt failed");
+
+	/* Enable SO_TXTIME unless test case generates more than one flow
+	 * SO_TXTIME could result in qdisc layer sorting the packets at sender.
+	 */
+	if (strcmp(testname, "single") && strcmp(testname, "capacity")) {
+		struct sock_txtime so_txtime = { .clockid = CLOCK_MONOTONIC, };
+		struct timespec ts;
+
+		if (setsockopt(txfd, SOL_SOCKET, SO_TXTIME,
+			       &so_txtime, sizeof(so_txtime)))
+			error(1, errno, "setsockopt SO_TXTIME");
+
+		if (clock_gettime(CLOCK_MONOTONIC, &ts))
+			error(1, errno, "clock_gettime");
+
+		txtime_ns = ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+		txtime_ns += TXTIME_DELAY_MS * 1000000ULL;
+	}
+
 	memset(&daddr, 0, sizeof(daddr));
 	daddr.sll_ifindex = if_nametoindex(ifname);
 	if (daddr.sll_ifindex == 0)
@@ -986,101 +1343,164 @@ static void gro_sender(void)
 	daddr.sll_halen = ETH_ALEN;
 	create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
 
-	if (strcmp(testname, "data") == 0) {
+	/* data sub-tests */
+	if (strcmp(testname, "data_same") == 0) {
 		send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+	} else if (strcmp(testname, "data_lrg_sml") == 0) {
 		send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+	} else if (strcmp(testname, "data_lrg_1byte") == 0) {
+		send_data_pkts(txfd, &daddr, PAYLOAD_LEN, 1);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "data_sml_lrg") == 0) {
 		send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "data_burst") == 0) {
+		static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+
+		create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+		write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr);
+		create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+		write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr);
+
+		usleep(100 * 1000); /* 100ms */
+		create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+		write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr);
+		create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+		write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr);
+
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+	/* ack test */
 	} else if (strcmp(testname, "ack") == 0) {
 		send_ack(txfd, &daddr);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-	} else if (strcmp(testname, "flags") == 0) {
-		send_flags(txfd, &daddr, 1, 0, 0, 0);
-		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
 
-		send_flags(txfd, &daddr, 0, 1, 0, 0);
+	/* flags sub-tests */
+	} else if (strcmp(testname, "flags_psh") == 0) {
+		send_flags(txfd, &daddr, 1, 0, 0, 0, 0);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
-		send_flags(txfd, &daddr, 0, 0, 1, 0);
+	} else if (strcmp(testname, "flags_syn") == 0) {
+		send_flags(txfd, &daddr, 0, 1, 0, 0, 0);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
-		send_flags(txfd, &daddr, 0, 0, 0, 1);
+	} else if (strcmp(testname, "flags_rst") == 0) {
+		send_flags(txfd, &daddr, 0, 0, 1, 0, 0);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-	} else if (strcmp(testname, "tcp") == 0) {
-		send_changed_checksum(txfd, &daddr);
+	} else if (strcmp(testname, "flags_urg") == 0) {
+		send_flags(txfd, &daddr, 0, 0, 0, 1, 0);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "flags_cwr") == 0) {
+		send_flags(txfd, &daddr, 0, 0, 0, 0, 1);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
 
+	/* tcp sub-tests */
+	} else if (strcmp(testname, "tcp_csum") == 0) {
+		send_changed_checksum(txfd, &daddr);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "tcp_seq") == 0) {
 		send_changed_seq(txfd, &daddr);
+		usleep(fin_delay_us);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+	} else if (strcmp(testname, "tcp_ts") == 0) {
 		send_changed_ts(txfd, &daddr);
+		usleep(fin_delay_us);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+	} else if (strcmp(testname, "tcp_opt") == 0) {
 		send_diff_opt(txfd, &daddr);
+		usleep(fin_delay_us);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-	} else if (strcmp(testname, "ip") == 0) {
+
+	/* ip sub-tests - shared between IPv4 and IPv6 */
+	} else if (strcmp(testname, "ip_ecn") == 0) {
 		send_changed_ECN(txfd, &daddr);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
+	} else if (strcmp(testname, "ip_tos") == 0) {
 		send_changed_tos(txfd, &daddr);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-		if (proto == PF_INET) {
-			/* Modified packets may be received out of order.
-			 * Sleep function added to enforce test boundaries
-			 * so that fin pkts are not received prior to other pkts.
-			 */
-			sleep(1);
-			send_changed_ttl(txfd, &daddr);
-			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
-			sleep(1);
-			send_ip_options(txfd, &daddr);
-			sleep(1);
-			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
-			sleep(1);
-			send_fragment4(txfd, &daddr);
-			sleep(1);
-			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
-			test_flush_id(txfd, &daddr, fin_pkt);
-		} else if (proto == PF_INET6) {
-			sleep(1);
-			send_fragment6(txfd, &daddr);
-			sleep(1);
-			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
-			sleep(1);
-			/* send IPv6 packets with ext header with same payload */
-			send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
-			sleep(1);
-			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
-			sleep(1);
-			/* send IPv6 packets with ext header with different payload */
-			send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
-			sleep(1);
-			write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-		}
-	} else if (strcmp(testname, "large") == 0) {
-		/* 20 is the difference between min iphdr size
-		 * and min ipv6hdr size. Like MAX_HDR_SIZE,
-		 * MAX_PAYLOAD is defined with the larger header of the two.
-		 */
-		int offset = proto == PF_INET ? 20 : 0;
-		int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+	/* ip sub-tests - IPv4 only */
+	} else if (strcmp(testname, "ip_csum") == 0) {
+		send_changed_ip_checksum(txfd, &daddr);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip_ttl") == 0) {
+		send_changed_ttl(txfd, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip_opt") == 0) {
+		send_ip_options(txfd, &daddr);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip_frag4") == 0) {
+		send_fragment4(txfd, &daddr);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip_id_df1_inc") == 0) {
+		send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip_id_df1_fixed") == 0) {
+		send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip_id_df0_inc") == 0) {
+		send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_INC);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip_id_df0_fixed") == 0) {
+		send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_FIXED);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) {
+		send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC_FIXED);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) {
+		send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED_INC);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+	/* ip sub-tests - IPv6 only */
+	} else if (strcmp(testname, "ip_frag6") == 0) {
+		send_fragment6(txfd, &daddr);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip_v6ext_same") == 0) {
+		send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "ip_v6ext_diff") == 0) {
+		send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+	/* large sub-tests */
+	} else if (strcmp(testname, "large_max") == 0) {
+		int remainder = max_payload() % calc_mss();
 
 		send_large(txfd, &daddr, remainder);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "large_rem") == 0) {
+		int remainder = max_payload() % calc_mss();
 
 		send_large(txfd, &daddr, remainder + 1);
 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+	/* machinery sub-tests */
+	} else if (strcmp(testname, "single") == 0) {
+		static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+
+		create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+		write_packet(txfd, buf, total_hdr_len + PAYLOAD_LEN, &daddr);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+	} else if (strcmp(testname, "capacity") == 0) {
+		send_capacity(txfd, &daddr);
+		usleep(fin_delay_us);
+		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
 	} else {
-		error(1, 0, "Unknown testcase");
+		error(1, 0, "Unknown testcase: %s", testname);
 	}
 
 	if (close(txfd))
@@ -1097,146 +1517,207 @@ static void gro_receiver(void)
 		error(1, 0, "socket creation");
 	setup_sock_filter(rxfd);
 	set_timeout(rxfd);
+	set_rcvbuf(rxfd);
 	bind_packetsocket(rxfd);
 
+	ksft_ready();
+
 	memset(correct_payload, 0, sizeof(correct_payload));
 
-	if (strcmp(testname, "data") == 0) {
+	/* data sub-tests */
+	if (strcmp(testname, "data_same") == 0) {
 		printf("pure data packet of same size: ");
 		correct_payload[0] = PAYLOAD_LEN * 2;
 		check_recv_pkts(rxfd, correct_payload, 1);
-
+	} else if (strcmp(testname, "data_lrg_sml") == 0) {
 		printf("large data packets followed by a smaller one: ");
 		correct_payload[0] = PAYLOAD_LEN * 1.5;
 		check_recv_pkts(rxfd, correct_payload, 1);
-
+	} else if (strcmp(testname, "data_lrg_1byte") == 0) {
+		printf("large data packet followed by a 1 byte one: ");
+		correct_payload[0] = PAYLOAD_LEN + 1;
+		check_recv_pkts(rxfd, correct_payload, 1);
+	} else if (strcmp(testname, "data_sml_lrg") == 0) {
 		printf("small data packets followed by a larger one: ");
 		correct_payload[0] = PAYLOAD_LEN / 2;
 		correct_payload[1] = PAYLOAD_LEN;
 		check_recv_pkts(rxfd, correct_payload, 2);
+	} else if (strcmp(testname, "data_burst") == 0) {
+		printf("two bursts of two data packets: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		correct_payload[1] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+	/* ack test */
 	} else if (strcmp(testname, "ack") == 0) {
 		printf("duplicate ack and pure ack: ");
 		check_recv_pkts(rxfd, correct_payload, 3);
-	} else if (strcmp(testname, "flags") == 0) {
+
+	/* flags sub-tests */
+	} else if (strcmp(testname, "flags_psh") == 0) {
 		correct_payload[0] = PAYLOAD_LEN * 3;
 		correct_payload[1] = PAYLOAD_LEN * 2;
-
 		printf("psh flag ends coalescing: ");
 		check_recv_pkts(rxfd, correct_payload, 2);
-
+	} else if (strcmp(testname, "flags_syn") == 0) {
 		correct_payload[0] = PAYLOAD_LEN * 2;
 		correct_payload[1] = 0;
 		correct_payload[2] = PAYLOAD_LEN * 2;
 		printf("syn flag ends coalescing: ");
 		check_recv_pkts(rxfd, correct_payload, 3);
-
+	} else if (strcmp(testname, "flags_rst") == 0) {
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		correct_payload[1] = 0;
+		correct_payload[2] = PAYLOAD_LEN * 2;
 		printf("rst flag ends coalescing: ");
 		check_recv_pkts(rxfd, correct_payload, 3);
-
+	} else if (strcmp(testname, "flags_urg") == 0) {
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		correct_payload[1] = 0;
+		correct_payload[2] = PAYLOAD_LEN * 2;
 		printf("urg flag ends coalescing: ");
 		check_recv_pkts(rxfd, correct_payload, 3);
-	} else if (strcmp(testname, "tcp") == 0) {
+	} else if (strcmp(testname, "flags_cwr") == 0) {
 		correct_payload[0] = PAYLOAD_LEN;
-		correct_payload[1] = PAYLOAD_LEN;
-		correct_payload[2] = PAYLOAD_LEN;
-		correct_payload[3] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN * 2;
+		correct_payload[2] = PAYLOAD_LEN * 2;
+		printf("cwr flag ends coalescing: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
 
+	/* tcp sub-tests */
+	} else if (strcmp(testname, "tcp_csum") == 0) {
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
 		printf("changed checksum does not coalesce: ");
 		check_recv_pkts(rxfd, correct_payload, 2);
-
+	} else if (strcmp(testname, "tcp_seq") == 0) {
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
 		printf("Wrong Seq number doesn't coalesce: ");
 		check_recv_pkts(rxfd, correct_payload, 2);
-
-		printf("Different timestamp doesn't coalesce: ");
+	} else if (strcmp(testname, "tcp_ts") == 0) {
 		correct_payload[0] = PAYLOAD_LEN * 2;
+		correct_payload[1] = PAYLOAD_LEN;
+		correct_payload[2] = PAYLOAD_LEN;
+		correct_payload[3] = PAYLOAD_LEN;
+		printf("Different timestamp doesn't coalesce: ");
 		check_recv_pkts(rxfd, correct_payload, 4);
-
-		printf("Different options doesn't coalesce: ");
+	} else if (strcmp(testname, "tcp_opt") == 0) {
 		correct_payload[0] = PAYLOAD_LEN * 2;
+		correct_payload[1] = PAYLOAD_LEN;
+		printf("Different options doesn't coalesce: ");
 		check_recv_pkts(rxfd, correct_payload, 2);
-	} else if (strcmp(testname, "ip") == 0) {
+
+	/* ip sub-tests - shared between IPv4 and IPv6 */
+	} else if (strcmp(testname, "ip_ecn") == 0) {
 		correct_payload[0] = PAYLOAD_LEN;
 		correct_payload[1] = PAYLOAD_LEN;
-
 		printf("different ECN doesn't coalesce: ");
 		check_recv_pkts(rxfd, correct_payload, 2);
-
+	} else if (strcmp(testname, "ip_tos") == 0) {
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
 		printf("different tos doesn't coalesce: ");
 		check_recv_pkts(rxfd, correct_payload, 2);
 
-		if (proto == PF_INET) {
-			printf("different ttl doesn't coalesce: ");
-			check_recv_pkts(rxfd, correct_payload, 2);
-
-			printf("ip options doesn't coalesce: ");
-			correct_payload[2] = PAYLOAD_LEN;
-			check_recv_pkts(rxfd, correct_payload, 3);
-
-			printf("fragmented ip4 doesn't coalesce: ");
-			check_recv_pkts(rxfd, correct_payload, 2);
-
-			/* is_atomic checks */
-			printf("DF=1, Incrementing - should coalesce: ");
-			correct_payload[0] = PAYLOAD_LEN * 2;
-			check_recv_pkts(rxfd, correct_payload, 1);
-
-			printf("DF=1, Fixed - should coalesce: ");
-			correct_payload[0] = PAYLOAD_LEN * 2;
-			check_recv_pkts(rxfd, correct_payload, 1);
-
-			printf("DF=0, Incrementing - should coalesce: ");
-			correct_payload[0] = PAYLOAD_LEN * 2;
-			check_recv_pkts(rxfd, correct_payload, 1);
-
-			printf("DF=0, Fixed - should not coalesce: ");
-			correct_payload[0] = PAYLOAD_LEN;
-			correct_payload[1] = PAYLOAD_LEN;
-			check_recv_pkts(rxfd, correct_payload, 2);
-
-			printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
-			correct_payload[0] = PAYLOAD_LEN * 2;
-			correct_payload[1] = PAYLOAD_LEN;
-			check_recv_pkts(rxfd, correct_payload, 2);
-
-			printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
-			correct_payload[0] = PAYLOAD_LEN * 2;
-			correct_payload[1] = PAYLOAD_LEN;
-			check_recv_pkts(rxfd, correct_payload, 2);
-		} else if (proto == PF_INET6) {
-			/* GRO doesn't check for ipv6 hop limit when flushing.
-			 * Hence no corresponding test to the ipv4 case.
-			 */
-			printf("fragmented ip6 doesn't coalesce: ");
-			correct_payload[0] = PAYLOAD_LEN * 2;
-			correct_payload[1] = PAYLOAD_LEN;
-			correct_payload[2] = PAYLOAD_LEN;
-			check_recv_pkts(rxfd, correct_payload, 3);
-
-			printf("ipv6 with ext header does coalesce: ");
-			correct_payload[0] = PAYLOAD_LEN * 2;
-			check_recv_pkts(rxfd, correct_payload, 1);
-
-			printf("ipv6 with ext header with different payloads doesn't coalesce: ");
-			correct_payload[0] = PAYLOAD_LEN;
-			correct_payload[1] = PAYLOAD_LEN;
-			check_recv_pkts(rxfd, correct_payload, 2);
-		}
-	} else if (strcmp(testname, "large") == 0) {
-		int offset = proto == PF_INET ? 20 : 0;
-		int remainder = (MAX_PAYLOAD + offset) % MSS;
+	/* ip sub-tests - IPv4 only */
+	} else if (strcmp(testname, "ip_csum") == 0) {
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
+		correct_payload[2] = PAYLOAD_LEN;
+		printf("bad ip checksum doesn't coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+	} else if (strcmp(testname, "ip_ttl") == 0) {
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
+		printf("different ttl doesn't coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+	} else if (strcmp(testname, "ip_opt") == 0) {
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
+		correct_payload[2] = PAYLOAD_LEN;
+		printf("ip options doesn't coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 3);
+	} else if (strcmp(testname, "ip_frag4") == 0) {
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
+		printf("fragmented ip4 doesn't coalesce: ");
+		check_recv_pkts(rxfd, correct_payload, 2);
+	} else if (strcmp(testname, "ip_id_df1_inc") == 0) {
+		printf("DF=1, Incrementing - should coalesce: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 1);
+	} else if (strcmp(testname, "ip_id_df1_fixed") == 0) {
+		printf("DF=1, Fixed - should coalesce: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 1);
+	} else if (strcmp(testname, "ip_id_df0_inc") == 0) {
+		printf("DF=0, Incrementing - should coalesce: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 1);
+	} else if (strcmp(testname, "ip_id_df0_fixed") == 0) {
+		printf("DF=0, Fixed - should coalesce: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 1);
+	} else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) {
+		printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		correct_payload[1] = PAYLOAD_LEN;
+		check_recv_pkts(rxfd, correct_payload, 2);
+	} else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) {
+		printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		correct_payload[1] = PAYLOAD_LEN;
+		check_recv_pkts(rxfd, correct_payload, 2);
+
+	/* ip sub-tests - IPv6 only */
+	} else if (strcmp(testname, "ip_frag6") == 0) {
+		/* GRO doesn't check for ipv6 hop limit when flushing.
+		 * Hence no corresponding test to the ipv4 case.
+		 */
+		printf("fragmented ip6 doesn't coalesce: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		correct_payload[1] = PAYLOAD_LEN;
+		correct_payload[2] = PAYLOAD_LEN;
+		check_recv_pkts(rxfd, correct_payload, 3);
+	} else if (strcmp(testname, "ip_v6ext_same") == 0) {
+		printf("ipv6 with ext header does coalesce: ");
+		correct_payload[0] = PAYLOAD_LEN * 2;
+		check_recv_pkts(rxfd, correct_payload, 1);
+	} else if (strcmp(testname, "ip_v6ext_diff") == 0) {
+		printf("ipv6 with ext header with different payloads doesn't coalesce: ");
+		correct_payload[0] = PAYLOAD_LEN;
+		correct_payload[1] = PAYLOAD_LEN;
+		check_recv_pkts(rxfd, correct_payload, 2);
 
-		correct_payload[0] = (MAX_PAYLOAD + offset);
+	/* large sub-tests */
+	} else if (strcmp(testname, "large_max") == 0) {
+		int remainder = max_payload() % calc_mss();
+
+		correct_payload[0] = max_payload();
 		correct_payload[1] = remainder;
 		printf("Shouldn't coalesce if exceed IP max pkt size: ");
 		check_recv_pkts(rxfd, correct_payload, 2);
+	} else if (strcmp(testname, "large_rem") == 0) {
+		int remainder = max_payload() % calc_mss();
 
 		/* last segment sent individually, doesn't start new segment */
-		correct_payload[0] = correct_payload[0] - remainder;
+		correct_payload[0] = max_payload() - remainder;
 		correct_payload[1] = remainder + 1;
 		correct_payload[2] = remainder + 1;
+		printf("last segment sent individually: ");
 		check_recv_pkts(rxfd, correct_payload, 3);
+
+	/* machinery sub-tests */
+	} else if (strcmp(testname, "single") == 0) {
+		printf("single data packet: ");
+		correct_payload[0] = PAYLOAD_LEN;
+		check_recv_pkts(rxfd, correct_payload, 1);
+	} else if (strcmp(testname, "capacity") == 0) {
+		check_capacity_pkts(rxfd);
+
 	} else {
-		error(1, 0, "Test case error, should never trigger");
+		error(1, 0, "Test case error: unknown testname %s", testname);
 	}
 
 	if (close(rxfd))
@@ -1251,16 +1732,20 @@ static void parse_args(int argc, char **argv)
 		{ "iface", required_argument, NULL, 'i' },
 		{ "ipv4", no_argument, NULL, '4' },
 		{ "ipv6", no_argument, NULL, '6' },
+		{ "ipip", no_argument, NULL, 'e' },
+		{ "ip6ip6", no_argument, NULL, 'E' },
+		{ "num-flows", required_argument, NULL, 'n' },
 		{ "rx", no_argument, NULL, 'r' },
 		{ "saddr", required_argument, NULL, 's' },
 		{ "smac", required_argument, NULL, 'S' },
 		{ "test", required_argument, NULL, 't' },
+		{ "order-check", no_argument, NULL, 'o' },
 		{ "verbose", no_argument, NULL, 'v' },
 		{ 0, 0, 0, 0 }
 	};
 	int c;
 
-	while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) {
+	while ((c = getopt_long(argc, argv, "46d:D:eEi:n:rs:S:t:ov", opts, NULL)) != -1) {
 		switch (c) {
 		case '4':
 			proto = PF_INET;
@@ -1270,6 +1755,16 @@ static void parse_args(int argc, char **argv)
 			proto = PF_INET6;
 			ethhdr_proto = htons(ETH_P_IPV6);
 			break;
+		case 'e':
+			ipip = true;
+			proto = PF_INET;
+			ethhdr_proto = htons(ETH_P_IP);
+			break;
+		case 'E':
+			ip6ip6 = true;
+			proto = PF_INET6;
+			ethhdr_proto = htons(ETH_P_IPV6);
+			break;
 		case 'd':
 			addr4_dst = addr6_dst = optarg;
 			break;
@@ -1279,6 +1774,9 @@ static void parse_args(int argc, char **argv)
 		case 'i':
 			ifname = optarg;
 			break;
+		case 'n':
+			num_flows = atoi(optarg);
+			break;
 		case 'r':
 			tx_socket = false;
 			break;
@@ -1291,6 +1789,9 @@ static void parse_args(int argc, char **argv)
 		case 't':
 			testname = optarg;
 			break;
+		case 'o':
+			order_check = true;
+			break;
 		case 'v':
 			verbose = true;
 			break;
@@ -1305,12 +1806,18 @@ int main(int argc, char **argv)
 {
 	parse_args(argc, argv);
 
-	if (proto == PF_INET) {
+	if (ipip) {
+		tcp_offset = ETH_HLEN + sizeof(struct iphdr) * 2;
+		total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+	} else if (ip6ip6) {
+		tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr) * 2;
+		total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+	} else if (proto == PF_INET) {
 		tcp_offset = ETH_HLEN + sizeof(struct iphdr);
 		total_hdr_len = tcp_offset + sizeof(struct tcphdr);
 	} else if (proto == PF_INET6) {
 		tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
-		total_hdr_len = MAX_HDR_LEN;
+		total_hdr_len = tcp_offset + sizeof(struct tcphdr);
 	} else {
 		error(1, 0, "Protocol family is not ipv4 or ipv6");
 	}
@@ -1318,11 +1825,13 @@ int main(int argc, char **argv)
 	read_MAC(src_mac, smac);
 	read_MAC(dst_mac, dmac);
 
-	if (tx_socket)
+	if (tx_socket) {
 		gro_sender();
-	else
+	} else {
+		/* Only the receiver exit status determines test success. */
 		gro_receiver();
+		fprintf(stderr, "Gro::%s test passed.\n", testname);
+	}
 
-	fprintf(stderr, "Gro::%s test passed.\n", testname);
 	return 0;
 }
diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h
new file mode 100644
index 000000000000..03912902a6d3
--- /dev/null
+++ b/tools/testing/selftests/net/lib/ksft.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(__NET_KSFT_H__)
+#define __NET_KSFT_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static inline void ksft_ready(void)
+{
+	const char msg[7] = "ready\n";
+	char *env_str;
+	int fd;
+
+	env_str = getenv("KSFT_READY_FD");
+	if (env_str) {
+		fd = atoi(env_str);
+		if (!fd) {
+			fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n",
+				env_str);
+			return;
+		}
+	} else {
+		fd = STDOUT_FILENO;
+	}
+
+	if (write(fd, msg, sizeof(msg)) < 0)
+		perror("write()");
+	if (fd != STDOUT_FILENO)
+		close(fd);
+}
+
+static inline void ksft_wait(void)
+{
+	char *env_str;
+	char byte;
+	int fd;
+
+	env_str = getenv("KSFT_WAIT_FD");
+	if (env_str) {
+		fd = atoi(env_str);
+		if (!fd) {
+			fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n",
+				env_str);
+			return;
+		}
+	} else {
+		/* Not running in KSFT env, wait for input from STDIN instead */
+		fd = STDIN_FILENO;
+	}
+
+	if (read(fd, &byte, sizeof(byte)) < 0)
+		perror("read()");
+	if (fd != STDIN_FILENO)
+		close(fd);
+}
+
+#endif
diff --git a/tools/testing/selftests/net/lib/ksft_setup_loopback.sh b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh
new file mode 100755
index 000000000000..3defbb1919c5
--- /dev/null
+++ b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Setup script for running ksft tests over a real interface in loopback mode.
+# This scripts replaces the historical setup_loopback.sh. It puts
+# a (presumably) real hardware interface into loopback mode, creates macvlan
+# interfaces on top and places them in a network namespace for isolation.
+#
+# NETIF env variable must be exported to indicate the real target device.
+# Note that the test will override NETIF with one of the macvlans, the
+# actual ksft test will only see the macvlans.
+#
+# Example use:
+#   export NETIF=eth0
+#   ./net/lib/ksft_setup_loopback.sh ./drivers/net/gro.py
+
+if [ -z "$NETIF" ]; then
+    echo "Error: NETIF variable not set"
+    exit 1
+fi
+if ! [ -d "/sys/class/net/$NETIF" ]; then
+    echo "Error: Can't find $NETIF, invalid netdevice"
+    exit 1
+fi
+
+# Save original settings for cleanup
+readonly FLUSH_PATH="/sys/class/net/${NETIF}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${NETIF}/napi_defer_hard_irqs"
+FLUSH_TIMEOUT="$(< "${FLUSH_PATH}")"
+readonly FLUSH_TIMEOUT
+HARD_IRQS="$(< "${IRQ_PATH}")"
+readonly HARD_IRQS
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+readonly SERVER_NS
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+readonly CLIENT_NS
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+# ksft expects addresses to communicate with remote
+export  LOCAL_V6=2001:db8:1::1
+export REMOTE_V6=2001:db8:1::2
+
+cleanup() {
+    local exit_code=$?
+
+    echo "Cleaning up..."
+
+    # Remove macvlan interfaces and namespaces
+    ip -netns "${SERVER_NS}" link del dev server 2>/dev/null || true
+    ip netns del "${SERVER_NS}" 2>/dev/null || true
+    ip -netns "${CLIENT_NS}" link del dev client 2>/dev/null || true
+    ip netns del "${CLIENT_NS}" 2>/dev/null || true
+
+    # Disable loopback
+    ethtool -K "${NETIF}" loopback off 2>/dev/null || true
+    sleep 1
+
+    echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+    echo "${HARD_IRQS}" >"${IRQ_PATH}"
+
+    exit $exit_code
+}
+
+trap cleanup EXIT INT TERM
+
+# Enable loopback mode
+echo "Enabling loopback on ${NETIF}..."
+ethtool -K "${NETIF}" loopback on || {
+    echo "Failed to enable loopback mode"
+    exit 1
+}
+# The interface may need time to get carrier back, but selftests
+# will wait for carrier, so no need to wait / sleep here.
+
+# Use timer on  host to trigger the network stack
+# Also disable device interrupt to not depend on NIC interrupt
+# Reduce test flakiness caused by unexpected interrupts
+echo 100000 >"${FLUSH_PATH}"
+echo 50 >"${IRQ_PATH}"
+
+# Create server namespace with macvlan
+ip netns add "${SERVER_NS}"
+ip link add link "${NETIF}" dev server address "${SERVER_MAC}" type macvlan
+ip link set dev server netns "${SERVER_NS}"
+ip -netns "${SERVER_NS}" link set dev server up
+ip -netns "${SERVER_NS}" addr add $LOCAL_V6/64 dev server
+ip -netns "${SERVER_NS}" link set dev lo up
+
+# Create client namespace with macvlan
+ip netns add "${CLIENT_NS}"
+ip link add link "${NETIF}" dev client address "${CLIENT_MAC}" type macvlan
+ip link set dev client netns "${CLIENT_NS}"
+ip -netns "${CLIENT_NS}" link set dev client up
+ip -netns "${CLIENT_NS}" addr add $REMOTE_V6/64 dev client
+ip -netns "${CLIENT_NS}" link set dev lo up
+
+echo "Setup complete!"
+echo "  Device: ${NETIF}"
+echo "  Server NS: ${SERVER_NS}"
+echo "  Client NS: ${CLIENT_NS}"
+echo ""
+
+# Setup environment variables for tests
+export NETIF=server
+export REMOTE_TYPE=netns
+export REMOTE_ARGS="${CLIENT_NS}"
+
+# Run the command
+ip netns exec "${SERVER_NS}" "$@"
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index 54d8f5eba810..7c81d86a7e97 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -1,9 +1,37 @@
 # SPDX-License-Identifier: GPL-2.0
 
+"""
+Python selftest helpers for netdev.
+"""
+
 from .consts import KSRC
-from .ksft import *
-from .netns import NetNS
-from .nsim import *
-from .utils import *
-from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily
-from .ynl import NetshaperFamily
+from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \
+    ksft_ne, ksft_true, ksft_not_none, ksft_in, ksft_not_in, ksft_is, \
+    ksft_ge, ksft_gt, ksft_lt, ksft_raises, ksft_busy_wait, \
+    ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit, \
+    ksft_variants, KsftNamedVariant
+from .netns import NetNS, NetNSEnter
+from .nsim import NetdevSim, NetdevSimDev
+from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \
+    bpftool, ip, ethtool, bpftrace, rand_port, rand_ports, wait_port_listen, \
+    wait_file, tool
+from .bpf import bpf_map_set, bpf_map_dump, bpf_prog_map_ids
+from .ynl import NlError, NlctrlFamily, YnlFamily, \
+    EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily
+from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily, Netlink
+
+__all__ = ["KSRC",
+           "KsftFailEx", "KsftSkipEx", "KsftXfailEx", "ksft_pr", "ksft_eq",
+           "ksft_ne", "ksft_true", "ksft_not_none", "ksft_in", "ksft_not_in",
+           "ksft_is", "ksft_ge", "ksft_gt", "ksft_lt", "ksft_raises",
+           "ksft_busy_wait", "ktap_result", "ksft_disruptive", "ksft_setup",
+           "ksft_run", "ksft_exit", "ksft_variants", "KsftNamedVariant",
+           "NetNS", "NetNSEnter",
+           "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer",
+           "bpftool", "ip", "ethtool", "bpftrace", "rand_port", "rand_ports",
+           "wait_port_listen", "wait_file", "tool",
+           "bpf_map_set", "bpf_map_dump", "bpf_prog_map_ids",
+           "NetdevSim", "NetdevSimDev",
+           "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError",
+           "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily",
+           "NlctrlFamily", "RtnlAddrFamily", "Netlink"]
diff --git a/tools/testing/selftests/net/lib/py/bpf.py b/tools/testing/selftests/net/lib/py/bpf.py
new file mode 100644
index 000000000000..beb6bf2896a8
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/bpf.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+BPF helper utilities for kernel selftests.
+
+Provides common operations for interacting with BPF maps and programs
+via bpftool, used by XDP and other BPF-based test files.
+"""
+
+from .utils import bpftool
+
+def _format_hex_bytes(value):
+    """
+    Helper function that converts an integer into a formatted hexadecimal byte string.
+
+    Args:
+        value: An integer representing the number to be converted.
+
+    Returns:
+        A string representing hexadecimal equivalent of value, with bytes separated by spaces.
+    """
+    hex_str = value.to_bytes(4, byteorder='little', signed=True)
+    return ' '.join(f'{byte:02x}' for byte in hex_str)
+
+
+def bpf_map_set(map_name, key, value):
+    """
+    Updates an XDP map with a given key-value pair using bpftool.
+
+    Args:
+        map_name: The name of the XDP map to update.
+        key: The key to update in the map, formatted as a hexadecimal string.
+        value: The value to associate with the key, formatted as a hexadecimal string.
+    """
+    key_formatted = _format_hex_bytes(key)
+    value_formatted = _format_hex_bytes(value)
+    bpftool(
+        f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}"
+    )
+
+def bpf_map_dump(map_id):
+    """Dump all entries of a BPF array map.
+
+    Args:
+        map_id: Numeric map ID (as returned by bpftool prog show).
+
+    Returns:
+        A dict mapping formatted key (int) to formatted value (int).
+    """
+    raw = bpftool(f"map dump id {map_id}", json=True)
+    return {e["formatted"]["key"]: e["formatted"]["value"] for e in raw}
+
+
+def bpf_prog_map_ids(prog_id):
+    """Get the map name-to-ID mapping for a loaded BPF program.
+
+    Args:
+        prog_id: Numeric program ID.
+
+    Returns:
+        A dict mapping map name (str) to map ID (int).
+    """
+    map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"]
+    maps = {}
+    for mid in map_ids:
+        name = bpftool(f"map show id {mid}", json=True)["name"]
+        maps[name] = mid
+    return maps
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 3efe005436cd..81287c2daff0 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -1,13 +1,17 @@
 # SPDX-License-Identifier: GPL-2.0
 
-import builtins
+import fnmatch
 import functools
+import getopt
 import inspect
+import os
+import signal
 import sys
 import time
 import traceback
+from collections import namedtuple
 from .consts import KSFT_MAIN_NAME
-from .utils import global_defer_queue
+from . import utils
 
 KSFT_RESULT = None
 KSFT_RESULT_ALL = True
@@ -26,8 +30,67 @@ class KsftXfailEx(Exception):
     pass
 
 
+class KsftTerminate(KeyboardInterrupt):
+    pass
+
+
+class _KsftArgs:
+    def __init__(self):
+        self.list_tests = False
+        self.filters = []
+
+        try:
+            opts, _ = getopt.getopt(sys.argv[1:], 'hlt:T:')
+        except getopt.GetoptError as e:
+            print(e, file=sys.stderr)
+            sys.exit(1)
+
+        for opt, val in opts:
+            if opt == '-h':
+                print(f"Usage: {sys.argv[0]} [-h|-l] [-t|-T name]\n"
+                      f"\t-h       print help\n"
+                      f"\t-l       list tests (filtered, if filters were specified)\n"
+                      f"\t-t name  include test\n"
+                      f"\t-T name  exclude test",
+                      file=sys.stderr)
+                sys.exit(0)
+            elif opt == '-l':
+                self.list_tests = True
+            elif opt == '-t':
+                self.filters.append((True, val))
+            elif opt == '-T':
+                self.filters.append((False, val))
+
+
+@functools.lru_cache()
+def _ksft_supports_color():
+    if os.environ.get("NO_COLOR") is not None:
+        return False
+    if not hasattr(sys.stdout, "isatty") or not sys.stdout.isatty():
+        return False
+    if os.environ.get("TERM") == "dumb":
+        return False
+    return True
+
+
 def ksft_pr(*objs, **kwargs):
-    print("#", *objs, **kwargs)
+    """
+    Print logs to stdout.
+
+    Behaves like print() but log lines will be prefixed
+    with # to prevent breaking the TAP output formatting.
+
+    Extra arguments (on top of what print() supports):
+      line_pfx - add extra string before each line
+    """
+    sep = kwargs.pop("sep", " ")
+    pfx = kwargs.pop("line_pfx", "")
+    pfx = "#" + (" " + pfx if pfx else "")
+    kwargs["flush"] = True
+
+    text = sep.join(str(obj) for obj in objs)
+    prefixed = f"\n{pfx} ".join(text.split('\n'))
+    print(pfx, prefixed, **kwargs)
 
 
 def _fail(*args):
@@ -66,11 +129,21 @@ def ksft_true(a, comment=""):
         _fail("Check failed", a, "does not eval to True", comment)
 
 
+def ksft_not_none(a, comment=""):
+    if a is None:
+        _fail("Check failed", a, "is None", comment)
+
+
 def ksft_in(a, b, comment=""):
     if a not in b:
         _fail("Check failed", a, "not in", b, comment)
 
 
+def ksft_not_in(a, b, comment=""):
+    if a in b:
+        _fail("Check failed", a, "in", b, comment)
+
+
 def ksft_is(a, b, comment=""):
     if a is not b:
         _fail("Check failed", a, "is not", b, comment)
@@ -81,6 +154,11 @@ def ksft_ge(a, b, comment=""):
         _fail("Check failed", a, "<", b, comment)
 
 
+def ksft_gt(a, b, comment=""):
+    if a <= b:
+        _fail("Check failed", a, "<=", b, comment)
+
+
 def ksft_lt(a, b, comment=""):
     if a >= b:
         _fail("Check failed", a, ">=", b, comment)
@@ -115,7 +193,7 @@ def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""):
         time.sleep(sleep)
 
 
-def ktap_result(ok, cnt=1, case="", comment=""):
+def ktap_result(ok, cnt=1, case_name="", comment=""):
     global KSFT_RESULT_ALL
     KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok
 
@@ -125,31 +203,46 @@ def ktap_result(ok, cnt=1, case="", comment=""):
     res += "ok "
     res += str(cnt) + " "
     res += KSFT_MAIN_NAME
-    if case:
-        res += "." + str(case.__name__)
+    if case_name:
+        res += "." + case_name
     if comment:
         res += " # " + comment
-    print(res)
+    if _ksft_supports_color():
+        if comment.startswith(("SKIP", "XFAIL")):
+            color = "\033[33m"
+        elif ok:
+            color = "\033[32m"
+        else:
+            color = "\033[31m"
+        res = color + res + "\033[0m"
+    print(res, flush=True)
+
+
+def _ksft_defer_arm(state):
+    """ Allow or disallow the use of defer() """
+    utils.GLOBAL_DEFER_ARMED = state
 
 
 def ksft_flush_defer():
     global KSFT_RESULT
 
     i = 0
-    qlen_start = len(global_defer_queue)
-    while global_defer_queue:
+    qlen_start = len(utils.GLOBAL_DEFER_QUEUE)
+    while utils.GLOBAL_DEFER_QUEUE:
         i += 1
-        entry = global_defer_queue.pop()
+        entry = utils.GLOBAL_DEFER_QUEUE.pop()
         try:
             entry.exec_only()
-        except:
+        except Exception:
             ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!")
-            tb = traceback.format_exc()
-            for line in tb.strip().split('\n'):
-                ksft_pr("Defer Exception|", line)
+            ksft_pr(traceback.format_exc(), line_pfx="Defer Exception|")
             KSFT_RESULT = False
 
 
+KsftCaseFunction = namedtuple("KsftCaseFunction",
+                              ['name', 'original_func', 'variants'])
+
+
 def ksft_disruptive(func):
     """
     Decorator that marks the test as disruptive (e.g. the test
@@ -160,11 +253,47 @@ def ksft_disruptive(func):
     @functools.wraps(func)
     def wrapper(*args, **kwargs):
         if not KSFT_DISRUPTIVE:
-            raise KsftSkipEx(f"marked as disruptive")
+            raise KsftSkipEx("marked as disruptive")
         return func(*args, **kwargs)
     return wrapper
 
 
+class KsftNamedVariant:
+    """ Named string name + argument list tuple for @ksft_variants """
+
+    def __init__(self, name, *params):
+        self.params = params
+        self.name = name or "_".join([str(x) for x in self.params])
+
+
+def ksft_variants(params):
+    """
+    Decorator defining the sets of inputs for a test.
+    The parameters will be included in the name of the resulting sub-case.
+    Parameters can be either single object, tuple or a KsftNamedVariant.
+    The argument can be a list or a generator.
+
+    Example:
+
+    @ksft_variants([
+        (1, "a"),
+        (2, "b"),
+        KsftNamedVariant("three", 3, "c"),
+    ])
+    def my_case(cfg, a, b):
+        pass # ...
+
+    ksft_run(cases=[my_case], args=(cfg, ))
+
+    Will generate cases:
+        my_case.1_a
+        my_case.2_b
+        my_case.three
+    """
+
+    return lambda func: KsftCaseFunction(func.__name__, func, params)
+
+
 def ksft_setup(env):
     """
     Setup test framework global state from the environment.
@@ -178,7 +307,7 @@ def ksft_setup(env):
             return False
         try:
             return bool(int(value))
-        except:
+        except Exception:
             raise Exception(f"failed to parse {name}")
 
     if "DISRUPTIVE" in env:
@@ -188,9 +317,42 @@ def ksft_setup(env):
     return env
 
 
-def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
+def _ksft_intr(signum, frame):
+    # ksft runner.sh sends 2 SIGTERMs in a row on a timeout
+    # if we don't ignore the second one it will stop us from handling cleanup
+    global term_cnt
+    term_cnt += 1
+    if term_cnt == 1:
+        raise KsftTerminate()
+    else:
+        ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...")
+
+
+def _ksft_name_matches(name, pattern):
+    if '*' in pattern or '?' in pattern or '[' in pattern:
+        return fnmatch.fnmatchcase(name, pattern)
+    return name == pattern
+
+
+def _ksft_test_enabled(name, filters):
+    has_positive = False
+    for include, pattern in filters:
+        has_positive |= include
+        if _ksft_name_matches(name, pattern):
+            return include
+    return not has_positive
+
+
+def _ksft_generate_test_cases(cases, globs, case_pfx, args, cli_args):
+    """Generate a filtered list of (func, args, name) tuples.
+
+    If -l is given, prints matching test names and exits.
+    """
+
     cases = cases or []
+    test_cases = []
 
+    # If using the globs method find all relevant functions
     if globs and case_pfx:
         for key, value in globs.items():
             if not callable(value):
@@ -200,22 +362,62 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
                     cases.append(value)
                     break
 
-    totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
+    for func in cases:
+        if isinstance(func, KsftCaseFunction):
+            # Parametrized test - create case for each param
+            for param in func.variants:
+                if not isinstance(param, KsftNamedVariant):
+                    if not isinstance(param, tuple):
+                        param = (param, )
+                    param = KsftNamedVariant(None, *param)
+
+                test_cases.append((func.original_func,
+                                   (*args, *param.params),
+                                   func.name + "." + param.name))
+        else:
+            test_cases.append((func, args, func.__name__))
 
-    print("KTAP version 1")
-    print("1.." + str(len(cases)))
+    if cli_args.filters:
+        test_cases = [tc for tc in test_cases
+                      if _ksft_test_enabled(tc[2], cli_args.filters)]
+
+    if cli_args.list_tests:
+        for _, _, name in test_cases:
+            print(name)
+        sys.exit(0)
+
+    return test_cases
+
+
+def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
+    cli_args = _KsftArgs()
+    test_cases = _ksft_generate_test_cases(cases, globs, case_pfx, args,
+                                           cli_args)
+
+    global term_cnt
+    term_cnt = 0
+    prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr)
+
+    totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
 
     global KSFT_RESULT
+    if KSFT_RESULT is not None:
+        raise RuntimeError("ksft_run() can't be called multiple times.")
+
+    print("TAP version 13", flush=True)
+    print("1.." + str(len(test_cases)), flush=True)
+
     cnt = 0
     stop = False
-    for case in cases:
+    for func, args, name in test_cases:
         KSFT_RESULT = True
         cnt += 1
         comment = ""
         cnt_key = ""
 
+        _ksft_defer_arm(True)
         try:
-            case(*args)
+            func(*args)
         except KsftSkipEx as e:
             comment = "SKIP " + str(e)
             cnt_key = 'skip'
@@ -224,25 +426,38 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
             cnt_key = 'xfail'
         except BaseException as e:
             stop |= isinstance(e, KeyboardInterrupt)
-            tb = traceback.format_exc()
-            for line in tb.strip().split('\n'):
-                ksft_pr("Exception|", line)
+            ksft_pr(traceback.format_exc(), line_pfx="Exception|")
             if stop:
-                ksft_pr("Stopping tests due to KeyboardInterrupt.")
+                ksft_pr(f"Stopping tests due to {type(e).__name__}.")
             KSFT_RESULT = False
             cnt_key = 'fail'
+        _ksft_defer_arm(False)
 
-        ksft_flush_defer()
+        try:
+            ksft_flush_defer()
+        except BaseException as e:
+            ksft_pr(traceback.format_exc(), line_pfx="Exception|")
+            if isinstance(e, KeyboardInterrupt):
+                ksft_pr()
+                ksft_pr("WARN: defer() interrupted, cleanup may be incomplete.")
+                ksft_pr("      Attempting to finish cleanup before exiting.")
+                ksft_pr("      Interrupt again to exit immediately.")
+                ksft_pr()
+                stop = True
+            # Flush was interrupted, try to finish the job best we can
+            ksft_flush_defer()
 
         if not cnt_key:
             cnt_key = 'pass' if KSFT_RESULT else 'fail'
 
-        ktap_result(KSFT_RESULT, cnt, case, comment=comment)
+        ktap_result(KSFT_RESULT, cnt, name, comment=comment)
         totals[cnt_key] += 1
 
         if stop:
             break
 
+    signal.signal(signal.SIGTERM, prev_sigterm)
+
     print(
         f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0"
     )
diff --git a/tools/testing/selftests/net/lib/py/netns.py b/tools/testing/selftests/net/lib/py/netns.py
index ecff85f9074f..8e9317044eef 100644
--- a/tools/testing/selftests/net/lib/py/netns.py
+++ b/tools/testing/selftests/net/lib/py/netns.py
@@ -1,9 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
 
 from .utils import ip
+import ctypes
 import random
 import string
 
+libc = ctypes.cdll.LoadLibrary('libc.so.6')
+
 
 class NetNS:
     def __init__(self, name=None):
@@ -29,3 +32,18 @@ class NetNS:
 
     def __repr__(self):
         return f"NetNS({self.name})"
+
+
+class NetNSEnter:
+    def __init__(self, ns_name):
+        self.ns_path = f"/run/netns/{ns_name}"
+
+    def __enter__(self):
+        self.saved = open("/proc/thread-self/ns/net")
+        with open(self.ns_path) as ns_file:
+            libc.setns(ns_file.fileno(), 0)
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        libc.setns(self.saved.fileno(), 0)
+        self.saved.close()
diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py
index 1a8cbe9acc48..7c640ed64c0b 100644
--- a/tools/testing/selftests/net/lib/py/nsim.py
+++ b/tools/testing/selftests/net/lib/py/nsim.py
@@ -27,7 +27,7 @@ class NetdevSim:
         self.port_index = port_index
         self.ns = ns
         self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index)
-        ret = ip("-j link show dev %s" % ifname, ns=ns)
+        ret = ip("-d -j link show dev %s" % ifname, ns=ns)
         self.dev = json.loads(ret.stdout)[0]
         self.ifindex = self.dev["ifindex"]
 
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 9e3bcddcf3e8..6c44a3d2bbf7 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -1,80 +1,193 @@
 # SPDX-License-Identifier: GPL-2.0
 
-import errno
 import json as _json
-import random
+import os
 import re
+import select
 import socket
 import subprocess
 import time
 
 
+class CmdInitFailure(Exception):
+    """ Command failed to start. Only raised by bkg(). """
+    def __init__(self, msg, cmd_obj):
+        super().__init__(msg + "\n" + repr(cmd_obj))
+        self.cmd = cmd_obj
+
+
 class CmdExitFailure(Exception):
+    """ Command failed (returned non-zero exit code). """
     def __init__(self, msg, cmd_obj):
-        super().__init__(msg)
+        super().__init__(msg + "\n" + repr(cmd_obj))
         self.cmd = cmd_obj
 
 
+def fd_read_timeout(fd, timeout):
+    rlist, _, _ = select.select([fd], [], [], timeout)
+    if rlist:
+        return os.read(fd, 1024)
+    raise TimeoutError("Timeout waiting for fd read")
+
+
 class cmd:
-    def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5):
+    """
+    Execute a command on local or remote host.
+
+    @shell defaults to false, and class will try to split @comm into a list
+    if it's a string with spaces.
+
+    Use bkg() instead to run a command in the background.
+    """
+    def __init__(self, comm, shell=None, fail=True, ns=None, background=False,
+                 host=None, timeout=5, ksft_ready=None, ksft_wait=None):
         if ns:
             comm = f'ip netns exec {ns} ' + comm
 
         self.stdout = None
         self.stderr = None
         self.ret = None
+        self.ksft_term_fd = None
 
+        self.host = host
         self.comm = comm
+
         if host:
             self.proc = host.cmd(comm)
         else:
+            # If user doesn't explicitly request shell try to avoid it.
+            if shell is None and isinstance(comm, str) and ' ' in comm:
+                comm = comm.split()
+
+            # ksft_wait lets us wait for the background process to fully start,
+            # we pass an FD to the child process, and wait for it to write back.
+            # Similarly term_fd tells child it's time to exit.
+            pass_fds = []
+            env = os.environ.copy()
+            if ksft_wait is not None:
+                wait_fd, self.ksft_term_fd = os.pipe()
+                pass_fds.append(wait_fd)
+                env["KSFT_WAIT_FD"]  = str(wait_fd)
+                ksft_ready = True  # ksft_wait implies ready
+            if ksft_ready is not None:
+                rfd, ready_fd = os.pipe()
+                pass_fds.append(ready_fd)
+                env["KSFT_READY_FD"] = str(ready_fd)
+
             self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
-                                         stderr=subprocess.PIPE)
+                                         stderr=subprocess.PIPE, pass_fds=pass_fds,
+                                         env=env)
+            if ksft_wait is not None:
+                os.close(wait_fd)
+            if ksft_ready is not None:
+                os.close(ready_fd)
+                msg = fd_read_timeout(rfd, ksft_wait)
+                os.close(rfd)
+                if not msg:
+                    terminate = self.proc.poll() is None
+                    self._process_terminate(terminate=terminate, timeout=1)
+                    raise CmdInitFailure("Did not receive ready message", self)
         if not background:
             self.process(terminate=False, fail=fail, timeout=timeout)
 
-    def process(self, terminate=True, fail=None, timeout=5):
-        if fail is None:
-            fail = not terminate
-
+    def _process_terminate(self, terminate, timeout):
         if terminate:
             self.proc.terminate()
-        stdout, stderr = self.proc.communicate(timeout)
+        stdout, stderr = self.proc.communicate(timeout=timeout)
         self.stdout = stdout.decode("utf-8")
         self.stderr = stderr.decode("utf-8")
         self.proc.stdout.close()
         self.proc.stderr.close()
         self.ret = self.proc.returncode
 
+        return stdout, stderr
+
+    def process(self, terminate=True, fail=None, timeout=5):
+        if fail is None:
+            fail = not terminate
+
+        if self.ksft_term_fd:
+            os.write(self.ksft_term_fd, b"1")
+
+        stdout, stderr = self._process_terminate(terminate=terminate,
+                                                 timeout=timeout)
         if self.proc.returncode != 0 and fail:
             if len(stderr) > 0 and stderr[-1] == "\n":
                 stderr = stderr[:-1]
-            raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" %
-                                 (self.proc.args, stdout, stderr), self)
+            raise CmdExitFailure("Command failed", self)
+
+    def __repr__(self):
+        def str_fmt(name, s):
+            name += ': '
+            return (name + s.strip().replace('\n', '\n' + ' ' * len(name)))
+
+        ret = "CMD"
+        if self.host:
+            ret += "[remote]"
+        if self.ret is None:
+            ret += f" (unterminated): {self.comm}\n"
+        elif self.ret == 0:
+            ret += f" (success): {self.comm}\n"
+        else:
+            ret += f": {self.comm}\n"
+            ret += f"  EXIT: {self.ret}\n"
+        if self.stdout:
+            ret += str_fmt("  STDOUT", self.stdout) + "\n"
+        if self.stderr:
+            ret += str_fmt("  STDERR", self.stderr) + "\n"
+        return ret.strip()
 
 
 class bkg(cmd):
-    def __init__(self, comm, shell=True, fail=None, ns=None, host=None,
-                 exit_wait=False):
+    """
+    Run a command in the background.
+
+    Examples usage:
+
+    Run a command on remote host, and wait for it to finish.
+    This is usually paired with wait_port_listen() to make sure
+    the command has initialized:
+
+        with bkg("socat ...", exit_wait=True, host=cfg.remote) as nc:
+            ...
+
+    Run a command and expect it to let us know that it's ready
+    by writing to a special file descriptor passed via KSFT_READY_FD.
+    Command will be terminated when we exit the context manager:
+
+        with bkg("my_binary", ksft_wait=5):
+    """
+    def __init__(self, comm, shell=None, fail=None, ns=None, host=None,
+                 exit_wait=False, ksft_ready=None, ksft_wait=None):
         super().__init__(comm, background=True,
-                         shell=shell, fail=fail, ns=ns, host=host)
-        self.terminate = not exit_wait
+                         shell=shell, fail=fail, ns=ns, host=host,
+                         ksft_ready=ksft_ready, ksft_wait=ksft_wait)
+        self.terminate = not exit_wait and not ksft_wait
+        self._exit_wait = exit_wait
         self.check_fail = fail
 
+        if shell and self.terminate:
+            print("# Warning: combining shell and terminate is risky!")
+            print("#          SIGTERM may not reach the child on zsh/ksh!")
+
     def __enter__(self):
         return self
 
     def __exit__(self, ex_type, ex_value, ex_tb):
-        return self.process(terminate=self.terminate, fail=self.check_fail)
+        terminate = self.terminate
+        # Force termination on exception, but only if bkg() didn't already exit
+        # since forcing termination silences failures with fail=None
+        if self.proc.poll() is None:
+            terminate = terminate or (self._exit_wait and ex_type is not None)
+        return self.process(terminate=terminate, fail=self.check_fail)
 
 
-global_defer_queue = []
+GLOBAL_DEFER_QUEUE = []
+GLOBAL_DEFER_ARMED = False
 
 
 class defer:
     def __init__(self, func, *args, **kwargs):
-        global global_defer_queue
-
         if not callable(func):
             raise Exception("defer created with un-callable object, did you call the function instead of passing its name?")
 
@@ -82,7 +195,9 @@ class defer:
         self.args = args
         self.kwargs = kwargs
 
-        self._queue =  global_defer_queue
+        if not GLOBAL_DEFER_ARMED:
+            raise Exception("defer queue not armed, did you use defer() outside of a test case?")
+        self._queue = GLOBAL_DEFER_QUEUE
         self._queue.append(self)
 
     def __enter__(self):
@@ -113,6 +228,10 @@ def tool(name, args, json=None, ns=None, host=None):
     return cmd_obj
 
 
+def bpftool(args, json=None, ns=None, host=None):
+    return tool('bpftool', args, json=json, ns=ns, host=host)
+
+
 def ip(args, json=None, ns=None, host=None):
     if ns:
         args = f'-netns {ns} ' + args
@@ -123,20 +242,67 @@ def ethtool(args, json=None, ns=None, host=None):
     return tool('ethtool', args, json=json, ns=ns, host=host)
 
 
-def rand_port():
+def bpftrace(expr, json=None, ns=None, host=None, timeout=None):
+    """
+    Run bpftrace and return map data (if json=True).
+    The output of bpftrace is inconvenient, so the helper converts
+    to a dict indexed by map name, e.g.:
+     {
+       "@":     { ... },
+       "@map2": { ... },
+     }
+    """
+    cmd_arr = ['bpftrace']
+    # Throw in --quiet if json, otherwise the output has two objects
+    if json:
+        cmd_arr += ['-f', 'json', '-q']
+    if timeout:
+        expr += ' interval:s:' + str(timeout) + ' { exit(); }'
+        timeout += 20
+    cmd_arr += ['-e', expr]
+    cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False, timeout=timeout)
+    if json:
+        # bpftrace prints objects as lines
+        ret = {}
+        for l in cmd_obj.stdout.split('\n'):
+            if not l.strip():
+                continue
+            one = _json.loads(l)
+            if one.get('type') != 'map':
+                continue
+            for k, v in one["data"].items():
+                if k.startswith('@'):
+                    k = k.lstrip('@')
+                ret[k] = v
+        return ret
+    return cmd_obj
+
+
+def rand_port(stype=socket.SOCK_STREAM):
+    """
+    Get a random unprivileged port.
     """
-    Get a random unprivileged port, try to make sure it's not already used.
+    return rand_ports(1, stype)[0]
+
+
+def rand_ports(count, stype=socket.SOCK_STREAM):
     """
-    for _ in range(1000):
-        port = random.randint(10000, 65535)
-        try:
-            with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
-                s.bind(("", port))
-            return port
-        except OSError as e:
-            if e.errno != errno.EADDRINUSE:
-                raise
-    raise Exception("Can't find any free unprivileged port")
+    Get a unique set of random unprivileged ports.
+    """
+    sockets = []
+    ports = []
+
+    try:
+        for _ in range(count):
+            s = socket.socket(socket.AF_INET6, stype)
+            sockets.append(s)
+            s.bind(("", 0))
+            ports.append(s.getsockname()[1])
+    finally:
+        for s in sockets:
+            s.close()
+
+    return ports
 
 
 def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5):
@@ -155,3 +321,21 @@ def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadlin
         if time.monotonic() > end:
             raise Exception("Waiting for port listen timed out")
         time.sleep(sleep)
+
+
+def wait_file(fname, test_fn, sleep=0.005, deadline=5, encoding='utf-8'):
+    """
+    Wait for file contents on the local system to satisfy a condition.
+    test_fn() should take one argument (file contents) and return whether
+    condition is met.
+    """
+    end = time.monotonic() + deadline
+
+    with open(fname, "r", encoding=encoding) as fp:
+        while True:
+            if test_fn(fp.read()):
+                break
+            fp.seek(0)
+            if time.monotonic() > end:
+                raise TimeoutError("Wait for file contents failed", fname)
+            time.sleep(sleep)
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index ad1e36baee2a..2e567062aa6c 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -13,20 +13,27 @@ try:
         SPEC_PATH = KSFT_DIR / "net/lib/specs"
 
         sys.path.append(tools_full_path.as_posix())
-        from net.lib.ynl.pyynl.lib import YnlFamily, NlError
+        from net.lib.ynl.pyynl.lib import YnlFamily, NlError, NlPolicy, Netlink
     else:
         # Running in tree
         tools_full_path = KSRC / "tools"
         SPEC_PATH = KSRC / "Documentation/netlink/specs"
 
         sys.path.append(tools_full_path.as_posix())
-        from net.ynl.pyynl.lib import YnlFamily, NlError
+        from net.ynl.pyynl.lib import YnlFamily, NlError, NlPolicy, Netlink
 except ModuleNotFoundError as e:
     ksft_pr("Failed importing `ynl` library from kernel sources")
     ksft_pr(str(e))
     ktap_result(True, comment="SKIP")
     sys.exit(4)
 
+__all__ = [
+    "NlError", "NlPolicy", "Netlink", "YnlFamily", "SPEC_PATH",
+    "EthtoolFamily", "RtnlFamily", "RtnlAddrFamily",
+    "NetdevFamily", "NetshaperFamily", "NlctrlFamily", "DevlinkFamily",
+    "PSPFamily",
+]
+
 #
 # Wrapper classes, loading the right specs
 # Set schema='' to avoid jsonschema validation, it's slow
@@ -39,9 +46,13 @@ class EthtoolFamily(YnlFamily):
 
 class RtnlFamily(YnlFamily):
     def __init__(self, recv_size=0):
-        super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
+        super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(),
                          schema='', recv_size=recv_size)
 
+class RtnlAddrFamily(YnlFamily):
+    def __init__(self, recv_size=0):
+        super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(),
+                         schema='', recv_size=recv_size)
 
 class NetdevFamily(YnlFamily):
     def __init__(self, recv_size=0):
@@ -52,3 +63,20 @@ class NetshaperFamily(YnlFamily):
     def __init__(self, recv_size=0):
         super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(),
                          schema='', recv_size=recv_size)
+
+
+class NlctrlFamily(YnlFamily):
+    def __init__(self, recv_size=0):
+        super().__init__((SPEC_PATH / Path('nlctrl.yaml')).as_posix(),
+                         schema='', recv_size=recv_size)
+
+
+class DevlinkFamily(YnlFamily):
+    def __init__(self, recv_size=0):
+        super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(),
+                         schema='', recv_size=recv_size)
+
+class PSPFamily(YnlFamily):
+    def __init__(self, recv_size=0):
+        super().__init__((SPEC_PATH / Path('psp.yaml')).as_posix(),
+                         schema='', recv_size=recv_size)
diff --git a/tools/testing/selftests/net/lib/sh/defer.sh b/tools/testing/selftests/net/lib/sh/defer.sh
index 082f5d38321b..47ab78c4d465 100644
--- a/tools/testing/selftests/net/lib/sh/defer.sh
+++ b/tools/testing/selftests/net/lib/sh/defer.sh
@@ -1,6 +1,10 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# Whether to pause and allow debugging when an executed deferred command has a
+# non-zero exit code.
+: "${DEFER_PAUSE_ON_FAIL:=no}"
+
 # map[(scope_id,track,cleanup_id) -> cleanup_command]
 # track={d=default | p=priority}
 declare -A __DEFER__JOBS
@@ -38,8 +42,20 @@ __defer__run()
 	local track=$1; shift
 	local defer_ix=$1; shift
 	local defer_key=$(__defer__defer_key $track $defer_ix)
+	local ret
+
+	eval ${__DEFER__JOBS[$defer_key]}
+	ret=$?
+
+	if [[ "$DEFER_PAUSE_ON_FAIL" == yes && "$ret" -ne 0 ]]; then
+		echo "Deferred command (track $track index $defer_ix):"
+		echo "	${__DEFER__JOBS[$defer_key]}"
+		echo "... ended with an exit status of $ret"
+		echo "Hit enter to continue, 'q' to quit"
+		read a
+		[[ "$a" == q ]] && exit 1
+	fi
 
-	${__DEFER__JOBS[$defer_key]}
 	unset __DEFER__JOBS[$defer_key]
 }
 
@@ -49,7 +65,7 @@ __defer__schedule()
 	local ndefers=$(__defer__ndefers $track)
 	local ndefers_key=$(__defer__ndefer_key $track)
 	local defer_key=$(__defer__defer_key $track $ndefers)
-	local defer="$@"
+	local defer="${@@Q}"
 
 	__DEFER__JOBS[$defer_key]="$defer"
 	__DEFER__NJOBS[$ndefers_key]=$((ndefers + 1))
diff --git a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
index d988b2e0cee8..e73fab3edd9f 100644
--- a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
+++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
@@ -10,4 +10,10 @@ int xdp_dummy_prog(struct xdp_md *ctx)
 	return XDP_PASS;
 }
 
+SEC("xdp.frags")
+int xdp_dummy_prog_frags(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/lib/xdp_helper.c b/tools/testing/selftests/net/lib/xdp_helper.c
new file mode 100644
index 000000000000..eb025a9f35b1
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_helper.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <linux/if_xdp.h>
+#include <linux/if_link.h>
+#include <net/if.h>
+#include <inttypes.h>
+
+#include "ksft.h"
+
+#define UMEM_SZ (1U << 16)
+#define NUM_DESC (UMEM_SZ / 2048)
+
+
+static void print_usage(const char *bin)
+{
+	fprintf(stderr, "Usage: %s ifindex queue_id [-z]\n\n"
+		"where:\n\t-z: force zerocopy mode", bin);
+}
+
+/* this is a simple helper program that creates an XDP socket and does the
+ * minimum necessary to get bind() to succeed.
+ *
+ * this test program is not intended to actually process packets, but could be
+ * extended in the future if that is actually needed.
+ *
+ * it is used by queues.py to ensure the xsk netlinux attribute is set
+ * correctly.
+ */
+int main(int argc, char **argv)
+{
+	struct xdp_umem_reg umem_reg = { 0 };
+	struct sockaddr_xdp sxdp = { 0 };
+	int num_desc = NUM_DESC;
+	void *umem_area;
+	int retry = 0;
+	int ifindex;
+	int sock_fd;
+	int queue;
+
+	if (argc != 3 && argc != 4) {
+		print_usage(argv[0]);
+		return 1;
+	}
+
+	sock_fd = socket(AF_XDP, SOCK_RAW, 0);
+	if (sock_fd < 0) {
+		perror("socket creation failed");
+		/* if the kernel doesn't support AF_XDP, let the test program
+		 * know with -1. All other error paths return 1.
+		 */
+		if (errno == EAFNOSUPPORT)
+			return -1;
+		return 1;
+	}
+
+	/* "Probing mode", just checking if AF_XDP sockets are supported */
+	if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-")) {
+		printf("AF_XDP support detected\n");
+		close(sock_fd);
+		return 0;
+	}
+
+	ifindex = atoi(argv[1]);
+	queue = atoi(argv[2]);
+
+	umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE |
+			MAP_ANONYMOUS, -1, 0);
+	if (umem_area == MAP_FAILED) {
+		perror("mmap failed");
+		return 1;
+	}
+
+	umem_reg.addr = (uintptr_t)umem_area;
+	umem_reg.len = UMEM_SZ;
+	umem_reg.chunk_size = 2048;
+	umem_reg.headroom = 0;
+
+	setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg,
+		   sizeof(umem_reg));
+	setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc,
+		   sizeof(num_desc));
+	setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc,
+		   sizeof(num_desc));
+	setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc));
+
+	sxdp.sxdp_family = AF_XDP;
+	sxdp.sxdp_ifindex = ifindex;
+	sxdp.sxdp_queue_id = queue;
+	sxdp.sxdp_flags = 0;
+
+	if (argc > 3) {
+		if (!strcmp(argv[3], "-z")) {
+			sxdp.sxdp_flags = XDP_ZEROCOPY;
+		} else {
+			print_usage(argv[0]);
+			return 1;
+		}
+	}
+
+	while (1) {
+		if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0)
+			break;
+
+		if (errno == EBUSY && retry < 3) {
+			retry++;
+			sleep(1);
+			continue;
+		} else {
+			perror("bind failed");
+			munmap(umem_area, UMEM_SZ);
+			close(sock_fd);
+			return 1;
+		}
+	}
+
+	ksft_ready();
+	ksft_wait();
+
+	/* parent program will write a byte to stdin when its ready for this
+	 * helper to exit
+	 */
+
+	close(sock_fd);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/lib/xdp_metadata.bpf.c b/tools/testing/selftests/net/lib/xdp_metadata.bpf.c
new file mode 100644
index 000000000000..f71f59215239
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_metadata.bpf.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+enum {
+	XDP_PORT = 1,
+	XDP_PROTO = 4,
+} xdp_map_setup_keys;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 5);
+	__type(key, __u32);
+	__type(value, __s32);
+} map_xdp_setup SEC(".maps");
+
+/* RSS hash results: key 0 = hash, key 1 = hash type,
+ * key 2 = packet count, key 3 = error count.
+ */
+enum {
+	RSS_KEY_HASH = 0,
+	RSS_KEY_TYPE = 1,
+	RSS_KEY_PKT_CNT = 2,
+	RSS_KEY_ERR_CNT = 3,
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 4);
+} map_rss SEC(".maps");
+
+/* Mirror of enum xdp_rss_hash_type from include/net/xdp.h.
+ * Needed because the enum is not part of UAPI headers.
+ */
+enum xdp_rss_hash_type {
+	XDP_RSS_L3_IPV4 = 1U << 0,
+	XDP_RSS_L3_IPV6 = 1U << 1,
+	XDP_RSS_L3_DYNHDR = 1U << 2,
+	XDP_RSS_L4 = 1U << 3,
+	XDP_RSS_L4_TCP = 1U << 4,
+	XDP_RSS_L4_UDP = 1U << 5,
+	XDP_RSS_L4_SCTP = 1U << 6,
+	XDP_RSS_L4_IPSEC = 1U << 7,
+	XDP_RSS_L4_ICMP = 1U << 8,
+};
+
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
+				    enum xdp_rss_hash_type *rss_type) __ksym;
+
+static __always_inline __u16 get_dest_port(void *l4, void *data_end,
+					   __u8 protocol)
+{
+	if (protocol == IPPROTO_UDP) {
+		struct udphdr *udp = l4;
+
+		if ((void *)(udp + 1) > data_end)
+			return 0;
+		return udp->dest;
+	} else if (protocol == IPPROTO_TCP) {
+		struct tcphdr *tcp = l4;
+
+		if ((void *)(tcp + 1) > data_end)
+			return 0;
+		return tcp->dest;
+	}
+
+	return 0;
+}
+
+SEC("xdp")
+int xdp_rss_hash(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	enum xdp_rss_hash_type rss_type = 0;
+	struct ethhdr *eth = data;
+	__u8 l4_proto = 0;
+	__u32 hash = 0;
+	__u32 key, val;
+	void *l4 = NULL;
+	__u32 *cnt;
+	int ret;
+
+	if ((void *)(eth + 1) > data_end)
+		return XDP_PASS;
+
+	if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+		struct iphdr *iph = (void *)(eth + 1);
+
+		if ((void *)(iph + 1) > data_end)
+			return XDP_PASS;
+		l4_proto = iph->protocol;
+		l4 = (void *)(iph + 1);
+	} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+		struct ipv6hdr *ip6h = (void *)(eth + 1);
+
+		if ((void *)(ip6h + 1) > data_end)
+			return XDP_PASS;
+		l4_proto = ip6h->nexthdr;
+		l4 = (void *)(ip6h + 1);
+	}
+
+	if (!l4)
+		return XDP_PASS;
+
+	/* Filter on the configured protocol (map_xdp_setup key XDP_PROTO).
+	 * When set, only process packets matching the requested L4 protocol.
+	 */
+	key = XDP_PROTO;
+	__s32 *proto_cfg = bpf_map_lookup_elem(&map_xdp_setup, &key);
+
+	if (proto_cfg && *proto_cfg != 0 && l4_proto != (__u8)*proto_cfg)
+		return XDP_PASS;
+
+	/* Filter on the configured port (map_xdp_setup key XDP_PORT).
+	 * Only applies to protocols with ports (UDP, TCP).
+	 */
+	key = XDP_PORT;
+	__s32 *port_cfg = bpf_map_lookup_elem(&map_xdp_setup, &key);
+
+	if (port_cfg && *port_cfg != 0) {
+		__u16 dest = get_dest_port(l4, data_end, l4_proto);
+
+		if (!dest || bpf_ntohs(dest) != (__u16)*port_cfg)
+			return XDP_PASS;
+	}
+
+	ret = bpf_xdp_metadata_rx_hash(ctx, &hash, &rss_type);
+	if (ret < 0) {
+		key = RSS_KEY_ERR_CNT;
+		cnt = bpf_map_lookup_elem(&map_rss, &key);
+		if (cnt)
+			__sync_fetch_and_add(cnt, 1);
+		return XDP_PASS;
+	}
+
+	key = RSS_KEY_HASH;
+	bpf_map_update_elem(&map_rss, &key, &hash, BPF_ANY);
+
+	key = RSS_KEY_TYPE;
+	val = (__u32)rss_type;
+	bpf_map_update_elem(&map_rss, &key, &val, BPF_ANY);
+
+	key = RSS_KEY_PKT_CNT;
+	cnt = bpf_map_lookup_elem(&map_rss, &key);
+	if (cnt)
+		__sync_fetch_and_add(cnt, 1);
+
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c
new file mode 100644
index 000000000000..64f05229ab24
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c
@@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_ADJST_OFFSET 256
+#define MAX_PAYLOAD_LEN 5000
+#define MAX_HDR_LEN 64
+
+extern int bpf_xdp_pull_data(struct xdp_md *xdp, __u32 len) __ksym __weak;
+
+enum {
+	XDP_MODE = 0,
+	XDP_PORT = 1,
+	XDP_ADJST_OFFSET = 2,
+	XDP_ADJST_TAG = 3,
+} xdp_map_setup_keys;
+
+enum {
+	XDP_MODE_PASS = 0,
+	XDP_MODE_DROP = 1,
+	XDP_MODE_TX = 2,
+	XDP_MODE_TAIL_ADJST = 3,
+	XDP_MODE_HEAD_ADJST = 4,
+} xdp_map_modes;
+
+enum {
+	STATS_RX = 0,
+	STATS_PASS = 1,
+	STATS_DROP = 2,
+	STATS_TX = 3,
+	STATS_ABORT = 4,
+} xdp_stats;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 5);
+	__type(key, __u32);
+	__type(value, __s32);
+} map_xdp_setup SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 5);
+	__type(key, __u32);
+	__type(value, __u64);
+} map_xdp_stats SEC(".maps");
+
+static __u32 min(__u32 a, __u32 b)
+{
+	return a < b ? a : b;
+}
+
+static void record_stats(struct xdp_md *ctx, __u32 stat_type)
+{
+	__u64 *count;
+
+	count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type);
+
+	if (count)
+		__sync_fetch_and_add(count, 1);
+}
+
+static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port)
+{
+	struct udphdr *udph = NULL;
+	void *data, *data_end;
+	struct ethhdr *eth;
+	int err;
+
+	err = bpf_xdp_pull_data(ctx, sizeof(*eth));
+	if (err)
+		return NULL;
+
+	data_end = (void *)(long)ctx->data_end;
+	data = eth = (void *)(long)ctx->data;
+
+	if (data + sizeof(*eth) > data_end)
+		return NULL;
+
+	if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+		struct iphdr *iph;
+
+		err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) +
+					     sizeof(*udph));
+		if (err)
+			return NULL;
+
+		data_end = (void *)(long)ctx->data_end;
+		data = (void *)(long)ctx->data;
+
+		iph = data + sizeof(*eth);
+
+		if (iph + 1 > (struct iphdr *)data_end ||
+		    iph->protocol != IPPROTO_UDP)
+			return NULL;
+
+		udph = data + sizeof(*iph) + sizeof(*eth);
+	} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+		struct ipv6hdr *ipv6h;
+
+		err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) +
+					     sizeof(*udph));
+		if (err)
+			return NULL;
+
+		data_end = (void *)(long)ctx->data_end;
+		data = (void *)(long)ctx->data;
+
+		ipv6h = data + sizeof(*eth);
+
+		if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+		    ipv6h->nexthdr != IPPROTO_UDP)
+			return NULL;
+
+		udph = data + sizeof(*ipv6h) + sizeof(*eth);
+	} else {
+		return NULL;
+	}
+
+	if (udph + 1 > (struct udphdr *)data_end)
+		return NULL;
+
+	if (udph->dest != bpf_htons(port))
+		return NULL;
+
+	record_stats(ctx, STATS_RX);
+
+	return udph;
+}
+
+static int xdp_mode_pass(struct xdp_md *ctx, __u16 port)
+{
+	struct udphdr *udph = NULL;
+
+	udph = filter_udphdr(ctx, port);
+	if (!udph)
+		return XDP_PASS;
+
+	record_stats(ctx, STATS_PASS);
+
+	return XDP_PASS;
+}
+
+static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port)
+{
+	struct udphdr *udph = NULL;
+
+	udph = filter_udphdr(ctx, port);
+	if (!udph)
+		return XDP_PASS;
+
+	record_stats(ctx, STATS_DROP);
+
+	return XDP_DROP;
+}
+
+static void swap_machdr(void *data)
+{
+	struct ethhdr *eth = data;
+	__u8 tmp_mac[ETH_ALEN];
+
+	__builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN);
+	__builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN);
+	__builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN);
+}
+
+static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
+{
+	struct udphdr *udph = NULL;
+	void *data, *data_end;
+	struct ethhdr *eth;
+	int err;
+
+	err = bpf_xdp_pull_data(ctx, sizeof(*eth));
+	if (err)
+		return XDP_PASS;
+
+	data_end = (void *)(long)ctx->data_end;
+	data = eth = (void *)(long)ctx->data;
+
+	if (data + sizeof(*eth) > data_end)
+		return XDP_PASS;
+
+	if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+		struct iphdr *iph;
+		__be32 tmp_ip;
+
+		err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) +
+					     sizeof(*udph));
+		if (err)
+			return XDP_PASS;
+
+		data_end = (void *)(long)ctx->data_end;
+		data = (void *)(long)ctx->data;
+
+		iph = data + sizeof(*eth);
+
+		if (iph + 1 > (struct iphdr *)data_end ||
+		    iph->protocol != IPPROTO_UDP)
+			return XDP_PASS;
+
+		udph = data + sizeof(*iph) + sizeof(*eth);
+
+		if (udph + 1 > (struct udphdr *)data_end)
+			return XDP_PASS;
+		if (udph->dest != bpf_htons(port))
+			return XDP_PASS;
+
+		record_stats(ctx, STATS_RX);
+		eth = data;
+		swap_machdr((void *)eth);
+
+		tmp_ip = iph->saddr;
+		iph->saddr = iph->daddr;
+		iph->daddr = tmp_ip;
+
+		record_stats(ctx, STATS_TX);
+
+		return XDP_TX;
+
+	} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+		struct in6_addr tmp_ipv6;
+		struct ipv6hdr *ipv6h;
+
+		err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) +
+					     sizeof(*udph));
+		if (err)
+			return XDP_PASS;
+
+		data_end = (void *)(long)ctx->data_end;
+		data = (void *)(long)ctx->data;
+
+		ipv6h = data + sizeof(*eth);
+
+		if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+		    ipv6h->nexthdr != IPPROTO_UDP)
+			return XDP_PASS;
+
+		udph = data + sizeof(*ipv6h) + sizeof(*eth);
+
+		if (udph + 1 > (struct udphdr *)data_end)
+			return XDP_PASS;
+		if (udph->dest != bpf_htons(port))
+			return XDP_PASS;
+
+		record_stats(ctx, STATS_RX);
+		eth = data;
+		swap_machdr((void *)eth);
+
+		__builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6));
+		__builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr,
+				 sizeof(tmp_ipv6));
+		__builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6));
+
+		record_stats(ctx, STATS_TX);
+
+		return XDP_TX;
+	}
+
+	return XDP_PASS;
+}
+
+static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct udphdr *udph = NULL;
+	struct ethhdr *eth = data;
+	__u32 len, len_new;
+
+	if (data + sizeof(*eth) > data_end)
+		return NULL;
+
+	if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+		struct iphdr *iph = data + sizeof(*eth);
+		__u16 total_len;
+
+		if (iph + 1 > (struct iphdr *)data_end)
+			return NULL;
+
+		iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset);
+
+		udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
+		if (!udph || udph + 1 > (struct udphdr *)data_end)
+			return NULL;
+
+		len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+	} else if (eth->h_proto  == bpf_htons(ETH_P_IPV6)) {
+		struct ipv6hdr *ipv6h = data + sizeof(*eth);
+		__u16 payload_len;
+
+		if (ipv6h + 1 > (struct ipv6hdr *)data_end)
+			return NULL;
+
+		udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
+		if (!udph || udph + 1 > (struct udphdr *)data_end)
+			return NULL;
+
+		*udp_csum = ~((__u32)udph->check);
+
+		len = ipv6h->payload_len;
+		len_new = bpf_htons(bpf_ntohs(len) + offset);
+		ipv6h->payload_len = len_new;
+
+		*udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+					  sizeof(len_new), *udp_csum);
+
+		len = udph->len;
+		len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+		*udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+					  sizeof(len_new), *udp_csum);
+	} else {
+		return NULL;
+	}
+
+	udph->len = len_new;
+
+	return udph;
+}
+
+static __u16 csum_fold_helper(__u32 csum)
+{
+	return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff;
+}
+
+static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset,
+				     unsigned long hdr_len)
+{
+	char tmp_buff[MAX_ADJST_OFFSET];
+	__u32 buff_pos, udp_csum = 0;
+	struct udphdr *udph = NULL;
+	__u32 buff_len;
+
+	udph = update_pkt(ctx, 0 - offset, &udp_csum);
+	if (!udph)
+		return -1;
+
+	buff_len = bpf_xdp_get_buff_len(ctx);
+
+	offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+				     offset & 0xff;
+	if (offset == 0)
+		return -1;
+
+	/* Make sure we have enough data to avoid eating the header */
+	if (buff_len - offset < hdr_len)
+		return -1;
+
+	buff_pos = buff_len - offset;
+	if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0)
+		return -1;
+
+	udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+	udph->check = (__u16)csum_fold_helper(udp_csum);
+
+	if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset)
+{
+	char tmp_buff[MAX_ADJST_OFFSET];
+	__u32 buff_pos, udp_csum = 0;
+	__u32 buff_len, hdr_len, key;
+	struct udphdr *udph;
+	__s32 *val;
+	__u8 tag;
+
+	/* Proceed to update the packet headers before attempting to adjuste
+	 * the tail. Once the tail is adjusted we lose access to the offset
+	 * amount of data at the end of the packet which is crucial to update
+	 * the checksum.
+	 * Since any failure beyond this would abort the packet, we should
+	 * not worry about passing a packet up the stack with wrong headers
+	 */
+	udph = update_pkt(ctx, offset, &udp_csum);
+	if (!udph)
+		return -1;
+
+	key = XDP_ADJST_TAG;
+	val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+	if (!val)
+		return -1;
+
+	tag = (__u8)(*val);
+
+	for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+		__builtin_memcpy(&tmp_buff[i], &tag, 1);
+
+	offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+				     offset & 0xff;
+	if (offset == 0)
+		return -1;
+
+	udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum);
+	udph->check = (__u16)csum_fold_helper(udp_csum);
+
+	buff_len = bpf_xdp_get_buff_len(ctx);
+
+	if (bpf_xdp_adjust_tail(ctx, offset) < 0) {
+		bpf_printk("Failed to adjust tail\n");
+		return -1;
+	}
+
+	if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port)
+{
+	struct udphdr *udph = NULL;
+	__s32 *adjust_offset, *val;
+	unsigned long hdr_len;
+	void *offset_ptr;
+	__u32 key;
+	__u8 tag;
+	int ret;
+
+	udph = filter_udphdr(ctx, port);
+	if (!udph)
+		return XDP_PASS;
+
+	hdr_len = (void *)udph - (void *)(long)ctx->data +
+		  sizeof(struct udphdr);
+	key = XDP_ADJST_OFFSET;
+	adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key);
+	if (!adjust_offset)
+		return XDP_PASS;
+
+	if (*adjust_offset < 0)
+		ret = xdp_adjst_tail_shrnk_data(ctx,
+						(__u16)(0 - *adjust_offset),
+						hdr_len);
+	else
+		ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset));
+	if (ret)
+		goto abort_pkt;
+
+	record_stats(ctx, STATS_PASS);
+	return XDP_PASS;
+
+abort_pkt:
+	record_stats(ctx, STATS_ABORT);
+	return XDP_ABORTED;
+}
+
+static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len,
+				     __u32 offset)
+{
+	char tmp_buff[MAX_ADJST_OFFSET];
+	struct udphdr *udph;
+	void *offset_ptr;
+	__u32 udp_csum = 0;
+
+	/* Update the length information in the IP and UDP headers before
+	 * adjusting the headroom. This simplifies accessing the relevant
+	 * fields in the IP and UDP headers for fragmented packets. Any
+	 * failure beyond this point will result in the packet being aborted,
+	 * so we don't need to worry about incorrect length information for
+	 * passed packets.
+	 */
+	udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum);
+	if (!udph)
+		return -1;
+
+	offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+				     offset & 0xff;
+	if (offset == 0)
+		return -1;
+
+	if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0)
+		return -1;
+
+	udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+
+	udph->check = (__u16)csum_fold_helper(udp_csum);
+
+	if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0)
+		return -1;
+
+	if (bpf_xdp_adjust_head(ctx, offset) < 0)
+		return -1;
+
+	if (offset > MAX_ADJST_OFFSET)
+		return -1;
+
+	if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+		return -1;
+
+	/* Added here to handle clang complain about negative value */
+	hdr_len = hdr_len & 0xff;
+
+	if (hdr_len == 0)
+		return -1;
+
+	if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len,
+				    __u32 offset)
+{
+	char hdr_buff[MAX_HDR_LEN];
+	char data_buff[MAX_ADJST_OFFSET];
+	void *offset_ptr;
+	__s32 *val;
+	__u32 key;
+	__u8 tag;
+	__u32 udp_csum = 0;
+	struct udphdr *udph;
+
+	udph = update_pkt(ctx, (__s16)(offset), &udp_csum);
+	if (!udph)
+		return -1;
+
+	key = XDP_ADJST_TAG;
+	val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+	if (!val)
+		return -1;
+
+	tag = (__u8)(*val);
+	for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+		__builtin_memcpy(&data_buff[i], &tag, 1);
+
+	offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+				     offset & 0xff;
+	if (offset == 0)
+		return -1;
+
+	udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum);
+	udph->check = (__u16)csum_fold_helper(udp_csum);
+
+	if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+		return -1;
+
+	/* Added here to handle clang complain about negative value */
+	hdr_len = hdr_len & 0xff;
+
+	if (hdr_len == 0)
+		return -1;
+
+	if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+		return -1;
+
+	if (offset > MAX_ADJST_OFFSET)
+		return -1;
+
+	if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0)
+		return -1;
+
+	if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+		return -1;
+
+	if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0)
+		return -1;
+
+	return 0;
+}
+
+static int xdp_head_adjst(struct xdp_md *ctx, __u16 port)
+{
+	struct udphdr *udph_ptr = NULL;
+	__u32 key, size, hdr_len;
+	__s32 *val;
+	int res;
+
+	/* Filter packets based on UDP port */
+	udph_ptr = filter_udphdr(ctx, port);
+	if (!udph_ptr)
+		return XDP_PASS;
+
+	hdr_len = (void *)udph_ptr - (void *)(long)ctx->data +
+		  sizeof(struct udphdr);
+
+	key = XDP_ADJST_OFFSET;
+	val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+	if (!val)
+		return XDP_PASS;
+
+	switch (*val) {
+	case -16:
+	case 16:
+		size = 16;
+		break;
+	case -32:
+	case 32:
+		size = 32;
+		break;
+	case -64:
+	case 64:
+		size = 64;
+		break;
+	case -128:
+	case 128:
+		size = 128;
+		break;
+	case -256:
+	case 256:
+		size = 256;
+		break;
+	default:
+		bpf_printk("Invalid adjustment offset: %d\n", *val);
+		goto abort;
+	}
+
+	if (*val < 0)
+		res = xdp_adjst_head_grow_data(ctx, hdr_len, size);
+	else
+		res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size);
+
+	if (res)
+		goto abort;
+
+	record_stats(ctx, STATS_PASS);
+	return XDP_PASS;
+
+abort:
+	record_stats(ctx, STATS_ABORT);
+	return XDP_ABORTED;
+}
+
+static int xdp_prog_common(struct xdp_md *ctx)
+{
+	__u32 key, *port;
+	__s32 *mode;
+
+	key = XDP_MODE;
+	mode = bpf_map_lookup_elem(&map_xdp_setup, &key);
+	if (!mode)
+		return XDP_PASS;
+
+	key = XDP_PORT;
+	port = bpf_map_lookup_elem(&map_xdp_setup, &key);
+	if (!port)
+		return XDP_PASS;
+
+	switch (*mode) {
+	case XDP_MODE_PASS:
+		return xdp_mode_pass(ctx, (__u16)(*port));
+	case XDP_MODE_DROP:
+		return xdp_mode_drop_handler(ctx, (__u16)(*port));
+	case XDP_MODE_TX:
+		return xdp_mode_tx_handler(ctx, (__u16)(*port));
+	case XDP_MODE_TAIL_ADJST:
+		return xdp_adjst_tail(ctx, (__u16)(*port));
+	case XDP_MODE_HEAD_ADJST:
+		return xdp_head_adjst(ctx, (__u16)(*port));
+	}
+
+	/* Default action is to simple pass */
+	return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_prog(struct xdp_md *ctx)
+{
+	return xdp_prog_common(ctx);
+}
+
+SEC("xdp.frags")
+int xdp_prog_frags(struct xdp_md *ctx)
+{
+	return xdp_prog_common(ctx);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/link_netns.py b/tools/testing/selftests/net/link_netns.py
new file mode 100755
index 000000000000..aab043c59d69
--- /dev/null
+++ b/tools/testing/selftests/net/link_netns.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import time
+
+from lib.py import ksft_run, ksft_exit, ksft_true
+from lib.py import ip
+from lib.py import NetNS, NetNSEnter
+from lib.py import RtnlFamily
+
+
+LINK_NETNSID = 100
+
+
+def test_event() -> None:
+    with NetNS() as ns1, NetNS() as ns2:
+        with NetNSEnter(str(ns2)):
+            rtnl = RtnlFamily()
+
+        rtnl.ntf_subscribe("rtnlgrp-link")
+
+        ip(f"netns set {ns2} {LINK_NETNSID}", ns=str(ns1))
+        ip(f"link add netns {ns1} link-netnsid {LINK_NETNSID} dummy1 type dummy")
+        ip(f"link add netns {ns1} dummy2 type dummy", ns=str(ns2))
+
+        ip("link del dummy1", ns=str(ns1))
+        ip("link del dummy2", ns=str(ns1))
+
+        time.sleep(1)
+        rtnl.check_ntf()
+        ksft_true(rtnl.async_msg_queue.empty(),
+                  "Received unexpected link notification")
+
+
+def validate_link_netns(netns, ifname, link_netnsid) -> bool:
+    link_info = ip(f"-d link show dev {ifname}", ns=netns, json=True)
+    if not link_info:
+        return False
+    return link_info[0].get("link_netnsid") == link_netnsid
+
+
+def test_link_net() -> None:
+    configs = [
+        # type, common args, type args, fallback to dev_net
+        ("ipvlan", "link dummy1", "", False),
+        ("macsec", "link dummy1", "", False),
+        ("macvlan", "link dummy1", "", False),
+        ("macvtap", "link dummy1", "", False),
+        ("vlan", "link dummy1", "id 100", False),
+        ("gre", "", "local 192.0.2.1", True),
+        ("vti", "", "local 192.0.2.1", True),
+        ("ipip", "", "local 192.0.2.1", True),
+        ("ip6gre", "", "local 2001:db8::1", True),
+        ("ip6tnl", "", "local 2001:db8::1", True),
+        ("vti6", "", "local 2001:db8::1", True),
+        ("sit", "", "local 192.0.2.1", True),
+        ("xfrm", "", "if_id 1", True),
+    ]
+
+    with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3:
+        net1, net2, net3 = str(ns1), str(ns2), str(ns3)
+
+        # prepare link netnsid  and a dummy link needed by certain drivers
+        ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2))
+        ip("link add dummy1 type dummy", ns=net3)
+
+        cases = [
+            # source, "netns", "link-netns", expected link-netns
+            (net3, None, None, None, None),
+            (net3, net2, None, None, LINK_NETNSID),
+            (net2, None, net3, LINK_NETNSID, LINK_NETNSID),
+            (net1, net2, net3, LINK_NETNSID, LINK_NETNSID),
+        ]
+
+        for src_net, netns, link_netns, exp1, exp2 in cases:
+            tgt_net = netns or src_net
+            for typ, cargs, targs, fb_dev_net in configs:
+                cmd = "link add"
+                if netns:
+                    cmd += f" netns {netns}"
+                if link_netns:
+                    cmd += f" link-netns {link_netns}"
+                cmd += f" {cargs} foo type {typ} {targs}"
+                ip(cmd, ns=src_net)
+                if fb_dev_net:
+                    ksft_true(validate_link_netns(tgt_net, "foo", exp1),
+                              f"{typ} link_netns validation failed")
+                else:
+                    ksft_true(validate_link_netns(tgt_net, "foo", exp2),
+                              f"{typ} link_netns validation failed")
+                ip(f"link del foo", ns=tgt_net)
+
+
+def test_peer_net() -> None:
+    types = [
+        "vxcan",
+        "netkit",
+        "veth",
+    ]
+
+    with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3, NetNS() as ns4:
+        net1, net2, net3, net4 = str(ns1), str(ns2), str(ns3), str(ns4)
+
+        ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2))
+
+        cases = [
+            # source, "netns", "link-netns", "peer netns", expected
+            (net1, None, None, None, None),
+            (net1, net2, None, None, None),
+            (net2, None, net3, None, LINK_NETNSID),
+            (net1, net2, net3, None, None),
+            (net2, None, None, net3, LINK_NETNSID),
+            (net1, net2, None, net3, LINK_NETNSID),
+            (net2, None, net2, net3, LINK_NETNSID),
+            (net1, net2, net4, net3, LINK_NETNSID),
+        ]
+
+        for src_net, netns, link_netns, peer_netns, exp in cases:
+            tgt_net = netns or src_net
+            for typ in types:
+                cmd = "link add"
+                if netns:
+                    cmd += f" netns {netns}"
+                if link_netns:
+                    cmd += f" link-netns {link_netns}"
+                cmd += f" foo type {typ}"
+                if peer_netns:
+                    cmd += f" peer netns {peer_netns}"
+                ip(cmd, ns=src_net)
+                ksft_true(validate_link_netns(tgt_net, "foo", exp),
+                          f"{typ} peer_netns validation failed")
+                ip(f"link del foo", ns=tgt_net)
+
+
+def main() -> None:
+    ksft_run([test_event, test_link_net, test_peer_net])
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
new file mode 100755
index 000000000000..881eb399798f
--- /dev/null
+++ b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# WARNING
+# -------
+# This is just a dummy script that triggers encap cases with possible dst cache
+# reference loops in affected lwt users (see list below). Some cases are
+# pathological configurations for simplicity, others are valid. Overall, we
+# don't want this issue to happen, no matter what. In order to catch any
+# reference loops, kmemleak MUST be used. The results alone are always blindly
+# successful, don't rely on them. Note that the following tests may crash the
+# kernel if the fix to prevent lwtunnel_{input|output|xmit}() reentry loops is
+# not present.
+#
+# Affected lwt users so far (please update accordingly if needed):
+#  - ila_lwt (output only)
+#  - ioam6_iptunnel (output only)
+#  - rpl_iptunnel (both input and output)
+#  - seg6_iptunnel (both input and output)
+
+source lib.sh
+
+check_compatibility()
+{
+	setup_ns tmp_node &>/dev/null
+	if [ $? != 0 ]; then
+		echo "SKIP: Cannot create netns."
+		exit $ksft_skip
+	fi
+
+	ip link add name veth0 netns $tmp_node type veth \
+		peer name veth1 netns $tmp_node &>/dev/null
+	local ret=$?
+
+	ip -netns $tmp_node link set veth0 up &>/dev/null
+	ret=$((ret + $?))
+
+	ip -netns $tmp_node link set veth1 up &>/dev/null
+	ret=$((ret + $?))
+
+	if [ $ret != 0 ]; then
+		echo "SKIP: Cannot configure links."
+		cleanup_ns $tmp_node
+		exit $ksft_skip
+	fi
+
+	lsmod 2>/dev/null | grep -q "ila"
+	ila_lsmod=$?
+	[ $ila_lsmod != 0 ] && modprobe ila &>/dev/null
+
+	ip -netns $tmp_node route add 2001:db8:1::/64 \
+		encap ila 1:2:3:4 csum-mode no-action ident-type luid \
+			hook-type output \
+		dev veth0 &>/dev/null
+
+	ip -netns $tmp_node route add 2001:db8:2::/64 \
+		encap ioam6 trace prealloc type 0x800000 ns 0 size 4 \
+		dev veth0 &>/dev/null
+
+	ip -netns $tmp_node route add 2001:db8:3::/64 \
+		encap rpl segs 2001:db8:3::1 dev veth0 &>/dev/null
+
+	ip -netns $tmp_node route add 2001:db8:4::/64 \
+		encap seg6 mode inline segs 2001:db8:4::1 dev veth0 &>/dev/null
+
+	ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ila"
+	skip_ila=$?
+
+	ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ioam6"
+	skip_ioam6=$?
+
+	ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap rpl"
+	skip_rpl=$?
+
+	ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap seg6"
+	skip_seg6=$?
+
+	cleanup_ns $tmp_node
+}
+
+setup()
+{
+	setup_ns alpha beta gamma &>/dev/null
+
+	ip link add name veth-alpha netns $alpha type veth \
+		peer name veth-betaL netns $beta &>/dev/null
+
+	ip link add name veth-betaR netns $beta type veth \
+		peer name veth-gamma netns $gamma &>/dev/null
+
+	ip -netns $alpha link set veth-alpha name veth0 &>/dev/null
+	ip -netns $beta link set veth-betaL name veth0 &>/dev/null
+	ip -netns $beta link set veth-betaR name veth1 &>/dev/null
+	ip -netns $gamma link set veth-gamma name veth0 &>/dev/null
+
+	ip -netns $alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null
+	ip -netns $alpha link set veth0 up &>/dev/null
+	ip -netns $alpha link set lo up &>/dev/null
+	ip -netns $alpha route add 2001:db8:2::/64 \
+		via 2001:db8:1::1 dev veth0 &>/dev/null
+
+	ip -netns $beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null
+	ip -netns $beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null
+	ip -netns $beta link set veth0 up &>/dev/null
+	ip -netns $beta link set veth1 up &>/dev/null
+	ip -netns $beta link set lo up &>/dev/null
+	ip -netns $beta route del 2001:db8:2::/64
+	ip -netns $beta route add 2001:db8:2::/64 dev veth1
+	ip netns exec $beta \
+		sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null
+
+	ip -netns $gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null
+	ip -netns $gamma link set veth0 up &>/dev/null
+	ip -netns $gamma link set lo up &>/dev/null
+	ip -netns $gamma route add 2001:db8:1::/64 \
+		via 2001:db8:2::1 dev veth0 &>/dev/null
+
+	sleep 1
+
+	ip netns exec $alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null
+	if [ $? != 0 ]; then
+		echo "SKIP: Setup failed."
+		exit $ksft_skip
+	fi
+
+	sleep 1
+}
+
+cleanup()
+{
+	cleanup_ns $alpha $beta $gamma
+	[ $ila_lsmod != 0 ] && modprobe -r ila &>/dev/null
+}
+
+run_ila()
+{
+	if [ $skip_ila != 0 ]; then
+		echo "SKIP: ila (output)"
+		return
+	fi
+
+	ip -netns $beta route del 2001:db8:2::/64
+	ip -netns $beta route add 2001:db8:2:0:0:0:0:2/128 \
+		encap ila 2001:db8:2:0 csum-mode no-action ident-type luid \
+			hook-type output \
+		dev veth1 &>/dev/null
+	sleep 1
+
+	echo "TEST: ila (output)"
+	ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+	sleep 1
+
+	ip -netns $beta route del 2001:db8:2:0:0:0:0:2/128
+	ip -netns $beta route add 2001:db8:2::/64 dev veth1
+	sleep 1
+}
+
+run_ioam6()
+{
+	if [ $skip_ioam6 != 0 ]; then
+		echo "SKIP: ioam6 (output)"
+		return
+	fi
+
+	ip -netns $beta route change 2001:db8:2::/64 \
+		encap ioam6 trace prealloc type 0x800000 ns 1 size 4 \
+		dev veth1 &>/dev/null
+	sleep 1
+
+	echo "TEST: ioam6 (output)"
+	ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+	sleep 1
+}
+
+run_rpl()
+{
+	if [ $skip_rpl != 0 ]; then
+		echo "SKIP: rpl (input)"
+		echo "SKIP: rpl (output)"
+		return
+	fi
+
+	ip -netns $beta route change 2001:db8:2::/64 \
+		encap rpl segs 2001:db8:2::2 \
+		dev veth1 &>/dev/null
+	sleep 1
+
+	echo "TEST: rpl (input)"
+	ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+	sleep 1
+
+	echo "TEST: rpl (output)"
+	ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+	sleep 1
+}
+
+run_seg6()
+{
+	if [ $skip_seg6 != 0 ]; then
+		echo "SKIP: seg6 (input)"
+		echo "SKIP: seg6 (output)"
+		return
+	fi
+
+	ip -netns $beta route change 2001:db8:2::/64 \
+		encap seg6 mode inline segs 2001:db8:2::2 \
+		dev veth1 &>/dev/null
+	sleep 1
+
+	echo "TEST: seg6 (input)"
+	ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+	sleep 1
+
+	echo "TEST: seg6 (output)"
+	ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+	sleep 1
+}
+
+run()
+{
+	run_ila
+	run_ioam6
+	run_rpl
+	run_seg6
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+	echo "SKIP: Need root privileges."
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool."
+	exit $ksft_skip
+fi
+
+check_compatibility
+
+trap cleanup EXIT
+
+setup
+run
+
+exit $ksft_pass
diff --git a/tools/testing/selftests/net/macvlan_mcast_shared_mac.sh b/tools/testing/selftests/net/macvlan_mcast_shared_mac.sh
new file mode 100755
index 000000000000..ff5b89347247
--- /dev/null
+++ b/tools/testing/selftests/net/macvlan_mcast_shared_mac.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test multicast delivery to macvlan bridge ports when the source MAC
+# matches the macvlan's own MAC address (e.g., VRRP virtual MAC shared
+# across multiple hosts).
+#
+# Topology:
+#
+#   NS_SRC                          NS_BRIDGE
+#   veth_src (SHARED_MAC) <----->  veth_dst
+#                                     |
+#                                     +-- macvlan0 (bridge mode, SHARED_MAC)
+#
+# A multicast packet sent from NS_SRC with source MAC equal to
+# macvlan0's MAC must still be delivered to macvlan0.
+
+source lib.sh
+
+SHARED_MAC="00:00:5e:00:01:01"
+MCAST_ADDR="239.0.0.1"
+
+setup() {
+	setup_ns NS_SRC NS_BRIDGE
+
+	ip -net "${NS_BRIDGE}" link add veth_dst type veth \
+		peer name veth_src netns "${NS_SRC}"
+
+	ip -net "${NS_SRC}" link set veth_src address "${SHARED_MAC}"
+	ip -net "${NS_SRC}" link set veth_src up
+	ip -net "${NS_SRC}" addr add 192.168.1.1/24 dev veth_src
+
+	ip -net "${NS_BRIDGE}" link set veth_dst up
+
+	ip -net "${NS_BRIDGE}" link add macvlan0 link veth_dst \
+		type macvlan mode bridge
+	ip -net "${NS_BRIDGE}" link set macvlan0 address "${SHARED_MAC}"
+	ip -net "${NS_BRIDGE}" link set macvlan0 up
+	ip -net "${NS_BRIDGE}" addr add 192.168.1.2/24 dev macvlan0
+
+	# Accept all multicast so the mc_filter passes for any group.
+	ip -net "${NS_BRIDGE}" link set macvlan0 allmulticast on
+}
+
+cleanup() {
+	rm -f "${CAPFILE}" "${CAPOUT}"
+	cleanup_ns "${NS_SRC}" "${NS_BRIDGE}"
+}
+
+test_macvlan_mcast_shared_mac() {
+	CAPFILE=$(mktemp)
+	CAPOUT=$(mktemp)
+
+	echo "Testing multicast delivery to macvlan with shared source MAC"
+
+	# Listen for one ICMP packet on macvlan0.
+	timeout 5s ip netns exec "${NS_BRIDGE}" \
+		tcpdump -i macvlan0 -c 1 -w "${CAPFILE}" icmp &> "${CAPOUT}" &
+	local pid=$!
+	if ! slowwait 1 grep -qs "listening" "${CAPOUT}"; then
+		echo "[FAIL] tcpdump did not start listening"
+		return "${ksft_fail}"
+	fi
+
+	# Send multicast ping from NS_SRC; source MAC equals macvlan0's MAC.
+	ip netns exec "${NS_SRC}" \
+		ping -W 0.1 -c 3 -I veth_src "${MCAST_ADDR}" &> /dev/null
+
+	wait "${pid}"
+
+	local count
+	count=$(tcpdump -r "${CAPFILE}" 2>/dev/null | wc -l)
+	if [[ "${count}" -ge 1 ]]; then
+		echo "[ OK ]"
+		return "${ksft_pass}"
+	else
+		echo "[FAIL] expected at least 1 ICMP packet on macvlan0," \
+			"got ${count}"
+		return "${ksft_fail}"
+	fi
+}
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+	echo "SKIP: Could not run test without tcpdump tool"
+	exit "${ksft_skip}"
+fi
+
+trap cleanup EXIT
+
+setup
+test_macvlan_mcast_shared_mac
+
+exit $?
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore
index 49daae73c41e..833279fb34e2 100644
--- a/tools/testing/selftests/net/mptcp/.gitignore
+++ b/tools/testing/selftests/net/mptcp/.gitignore
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 mptcp_connect
+mptcp_diag
 mptcp_inq
 mptcp_sockopt
 pm_nl_ctl
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index c76525fe2b84..22ba0da2adb8 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -3,15 +3,36 @@
 top_srcdir = ../../../../..
 
 CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
-
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
-	      simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
-
-TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
-
-TEST_FILES := mptcp_lib.sh settings
-
-TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) ../net_helper.sh
+CFLAGS += -I$(top_srcdir)/tools/include
+
+TEST_PROGS := \
+	diag.sh \
+	mptcp_connect.sh \
+	mptcp_connect_checksum.sh \
+	mptcp_connect_mmap.sh \
+	mptcp_connect_sendfile.sh \
+	mptcp_connect_splice.sh \
+	mptcp_join.sh \
+	mptcp_sockopt.sh \
+	pm_netlink.sh \
+	simult_flows.sh \
+	userspace_pm.sh \
+# end of TEST_PROGS
+
+TEST_GEN_FILES := \
+	mptcp_connect \
+	mptcp_diag \
+	mptcp_inq \
+	mptcp_sockopt \
+	pm_nl_ctl \
+# end of TEST_GEN_FILES
+
+TEST_FILES := \
+	mptcp_lib.sh \
+	settings \
+# end of TEST_FILES
+
+TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh)
 
 EXTRA_CLEAN := *.pcap
 
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 4f80014cae49..59051ee2a986 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -1,34 +1,36 @@
-CONFIG_KALLSYMS=y
-CONFIG_MPTCP=y
-CONFIG_IPV6=y
-CONFIG_MPTCP_IPV6=y
 CONFIG_INET_DIAG=m
 CONFIG_INET_MPTCP_DIAG=m
-CONFIG_VETH=y
-CONFIG_NET_SCH_NETEM=m
-CONFIG_SYN_COOKIES=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_NETFILTER_NETLINK=m
-CONFIG_NF_TABLES=m
-CONFIG_NFT_COMPAT=m
-CONFIG_NETFILTER_XTABLES=m
-CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NF_TABLES_INET=y
-CONFIG_NFT_TPROXY=m
-CONFIG_NFT_SOCKET=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
 CONFIG_IP_ADVANCED_ROUTER=y
 CONFIG_IP_MULTIPLE_TABLES=y
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IPV6=y
 CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IP6_NF_FILTER=m
+CONFIG_KALLSYMS=y
+CONFIG_MPTCP=y
+CONFIG_MPTCP_IPV6=y
 CONFIG_NET_ACT_CSUM=m
 CONFIG_NET_ACT_PEDIT=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_CLS_FW=m
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
 CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_SOCKET=m
+CONFIG_NFT_TPROXY=m
+CONFIG_SYN_COOKIES=y
+CONFIG_VETH=y
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 2bd0c1eb70c5..d847ff1737c3 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -28,7 +28,7 @@ flush_pids()
 }
 
 # This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
 cleanup()
 {
 	ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null
@@ -200,6 +200,62 @@ chk_msk_cestab()
 		 "${expected}" "${msg}" ""
 }
 
+chk_dump_one()
+{
+	local ss_token
+	local token
+	local msg
+
+	ss_token="$(ss -inmHMN $ns |
+		    mptcp_lib_get_info_value "token" "token")"
+
+	token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\
+		 awk -F':[ \t]+' '/^token/ {print $2}')"
+
+	msg="....chk dump_one"
+
+	mptcp_lib_print_title "$msg"
+	if [ -n "$ss_token" ] && [ "$ss_token" = "$token" ]; then
+		mptcp_lib_pr_ok
+		mptcp_lib_result_pass "${msg}"
+	else
+		mptcp_lib_pr_fail "expected $ss_token found $token"
+		mptcp_lib_result_fail "${msg}"
+		ret=${KSFT_FAIL}
+	fi
+}
+
+chk_dump_subflow()
+{
+	local inet_diag_token
+	local subflow_line
+	local ss_output
+	local ss_token
+	local msg
+
+	ss_output=$(ss -tniN $ns)
+
+	subflow_line=$(echo "$ss_output" | \
+		       grep -m1 -Eo '[0-9.]+:[0-9].+ +[0-9.]+:[0-9.]+')
+
+	ss_token=$(echo "$ss_output" | grep -m1 -Eo 'token:[^ ]+')
+
+	inet_diag_token=$(ip netns exec $ns ./mptcp_diag -s "$subflow_line" | \
+			  grep -Eo 'token:[^ ]+')
+
+	msg="....chk dump_subflow"
+
+	mptcp_lib_print_title "$msg"
+	if [ -n "$ss_token" ] && [ "$ss_token" = "$inet_diag_token" ]; then
+		mptcp_lib_pr_ok
+		mptcp_lib_result_pass "${msg}"
+	else
+		mptcp_lib_pr_fail "expected $ss_token found $inet_diag_token"
+		mptcp_lib_result_fail "${msg}"
+		ret=${KSFT_FAIL}
+	fi
+}
+
 msk_info_get_value()
 {
 	local port="${1}"
@@ -290,6 +346,8 @@ chk_msk_remote_key_nr 2 "....chk remote_key"
 chk_msk_fallback_nr 0 "....chk no fallback"
 chk_msk_inuse 2
 chk_msk_cestab 2
+chk_dump_one
+chk_dump_subflow
 flush_pids
 
 chk_msk_inuse 0 "2->0"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index d240d02fa443..cbe573c4ab3a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -33,6 +33,7 @@
 #include <linux/tcp.h>
 #include <linux/time_types.h>
 #include <linux/sockios.h>
+#include <linux/compiler.h>
 
 extern int optind;
 
@@ -51,6 +52,7 @@ enum cfg_mode {
 	CFG_MODE_POLL,
 	CFG_MODE_MMAP,
 	CFG_MODE_SENDFILE,
+	CFG_MODE_SPLICE,
 };
 
 enum cfg_peek {
@@ -123,7 +125,7 @@ static void die_usage(void)
 	fprintf(stderr, "\t-j     -- add additional sleep at connection start and tear down "
 		"-- for MPJ tests\n");
 	fprintf(stderr, "\t-l     -- listens mode, accepts incoming connection\n");
-	fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
+	fprintf(stderr, "\t-m [poll|mmap|sendfile|splice] -- use poll(default)/mmap+write/sendfile/splice\n");
 	fprintf(stderr, "\t-M mark -- set socket packet mark\n");
 	fprintf(stderr, "\t-o option -- test sockopt <option>\n");
 	fprintf(stderr, "\t-p num -- use port num\n");
@@ -140,7 +142,7 @@ static void die_usage(void)
 	exit(1);
 }
 
-static void xerror(const char *fmt, ...)
+static void __noreturn xerror(const char *fmt, ...)
 {
 	va_list ap;
 
@@ -180,13 +182,27 @@ static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen,
 }
 
 static void xgetaddrinfo(const char *node, const char *service,
-			 const struct addrinfo *hints,
+			 struct addrinfo *hints,
 			 struct addrinfo **res)
 {
-	int err = getaddrinfo(node, service, hints, res);
+	int err;
 
+again:
+	err = getaddrinfo(node, service, hints, res);
 	if (err) {
-		const char *errstr = getxinfo_strerr(err);
+		const char *errstr;
+
+		/* glibc starts to support MPTCP since v2.42.
+		 * For older versions, use IPPROTO_TCP to resolve,
+		 * and use TCP/MPTCP to create socket.
+		 * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82
+		 */
+		if (err == EAI_SOCKTYPE) {
+			hints->ai_protocol = IPPROTO_TCP;
+			goto again;
+		}
+
+		errstr = getxinfo_strerr(err);
 
 		fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
 			node ? node : "", service ? service : "", errstr);
@@ -243,7 +259,7 @@ static void set_transparent(int fd, int pf)
 	}
 }
 
-static void set_mptfo(int fd, int pf)
+static void set_mptfo(int fd)
 {
 	int qlen = 25;
 
@@ -292,7 +308,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
 {
 	int sock = -1;
 	struct addrinfo hints = {
-		.ai_protocol = IPPROTO_TCP,
+		.ai_protocol = IPPROTO_MPTCP,
 		.ai_socktype = SOCK_STREAM,
 		.ai_flags = AI_PASSIVE | AI_NUMERICHOST
 	};
@@ -320,7 +336,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
 			set_transparent(sock, pf);
 
 		if (cfg_sockopt_types.mptfo)
-			set_mptfo(sock, pf);
+			set_mptfo(sock);
 
 		if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
 			break; /* success */
@@ -356,7 +372,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
 			      int infd, struct wstate *winfo)
 {
 	struct addrinfo hints = {
-		.ai_protocol = IPPROTO_TCP,
+		.ai_protocol = IPPROTO_MPTCP,
 		.ai_socktype = SOCK_STREAM,
 	};
 	struct addrinfo *a, *addr;
@@ -391,21 +407,18 @@ static int sock_connect_mptcp(const char * const remoteaddr,
 				*peer = a;
 				break; /* success */
 			}
+			perror("sendto()");
 		} else {
 			if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
 				*peer = a;
 				break; /* success */
 			}
-		}
-		if (cfg_sockopt_types.mptfo) {
-			perror("sendto()");
-			close(sock);
-			sock = -1;
-		} else {
 			perror("connect()");
-			close(sock);
-			sock = -1;
 		}
+
+		/* error */
+		close(sock);
+		sock = -1;
 	}
 
 	freeaddrinfo(addr);
@@ -696,8 +709,14 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd,
 
 				bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len);
 				if (bw < 0) {
-					if (cfg_rcv_trunc)
-						return 0;
+					/* expected reset, continue to read */
+					if (cfg_rcv_trunc &&
+					    (errno == ECONNRESET ||
+					     errno == EPIPE)) {
+						fds.events &= ~POLLOUT;
+						continue;
+					}
+
 					perror("write");
 					return 111;
 				}
@@ -723,8 +742,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd,
 		}
 
 		if (fds.revents & (POLLERR | POLLNVAL)) {
-			if (cfg_rcv_trunc)
-				return 0;
+			if (cfg_rcv_trunc) {
+				fds.events &= ~(POLLERR | POLLNVAL);
+				continue;
+			}
 			fprintf(stderr, "Unexpected revents: "
 				"POLLERR/POLLNVAL(%x)\n", fds.revents);
 			return 5;
@@ -912,6 +933,71 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
 	return err;
 }
 
+static int do_splice(const int infd, const int outfd, const size_t len,
+		     struct wstate *winfo)
+{
+	ssize_t in_bytes, out_bytes;
+	int pipefd[2];
+	int err;
+
+	err = pipe(pipefd);
+	if (err) {
+		perror("pipe");
+		return 2;
+	}
+
+again:
+	in_bytes = splice(infd, NULL, pipefd[1], NULL, len - winfo->total_len,
+			  SPLICE_F_MOVE | SPLICE_F_MORE);
+	if (in_bytes < 0) {
+		perror("splice in");
+		err = 3;
+	} else if (in_bytes > 0) {
+		out_bytes = splice(pipefd[0], NULL, outfd, NULL, in_bytes,
+				   SPLICE_F_MOVE | SPLICE_F_MORE);
+		if (out_bytes < 0) {
+			perror("splice out");
+			err = 4;
+		} else if (in_bytes != out_bytes) {
+			fprintf(stderr, "Unexpected transfer: %zu vs %zu\n",
+				in_bytes, out_bytes);
+			err = 5;
+		} else {
+			goto again;
+		}
+	}
+
+	close(pipefd[0]);
+	close(pipefd[1]);
+
+	return err;
+}
+
+static int copyfd_io_splice(int infd, int peerfd, int outfd, unsigned int size,
+			    bool *in_closed_after_out, struct wstate *winfo)
+{
+	int err;
+
+	if (listen_mode) {
+		err = do_splice(peerfd, outfd, size, winfo);
+		if (err)
+			return err;
+
+		err = do_splice(infd, peerfd, size, winfo);
+	} else {
+		err = do_splice(infd, peerfd, size, winfo);
+		if (err)
+			return err;
+
+		shut_wr(peerfd);
+
+		err = do_splice(peerfd, outfd, size, winfo);
+		*in_closed_after_out = true;
+	}
+
+	return err;
+}
+
 static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo)
 {
 	bool in_closed_after_out = false;
@@ -944,6 +1030,14 @@ static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct
 					 &in_closed_after_out, winfo);
 		break;
 
+	case CFG_MODE_SPLICE:
+		file_size = get_infd_size(infd);
+		if (file_size < 0)
+			return file_size;
+		ret = copyfd_io_splice(infd, peerfd, outfd, file_size,
+				       &in_closed_after_out, winfo);
+		break;
+
 	default:
 		fprintf(stderr, "Invalid mode %d\n", cfg_mode);
 
@@ -1050,6 +1144,8 @@ static void check_getpeername_connect(int fd)
 	socklen_t salen = sizeof(ss);
 	char a[INET6_ADDRSTRLEN];
 	char b[INET6_ADDRSTRLEN];
+	const char *iface;
+	size_t len;
 
 	if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) {
 		perror("getpeername");
@@ -1059,7 +1155,13 @@ static void check_getpeername_connect(int fd)
 	xgetnameinfo((struct sockaddr *)&ss, salen,
 		     a, sizeof(a), b, sizeof(b));
 
-	if (strcmp(cfg_host, a) || strcmp(cfg_port, b))
+	iface = strchr(cfg_host, '%');
+	if (iface)
+		len = iface - cfg_host;
+	else
+		len = strlen(cfg_host) + 1;
+
+	if (strncmp(cfg_host, a, len) || strcmp(cfg_port, b))
 		fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__,
 			cfg_host, a, cfg_port, b);
 }
@@ -1079,6 +1181,7 @@ int main_loop_s(int listensock)
 	struct pollfd polls;
 	socklen_t salen;
 	int remotesock;
+	int err = 0;
 	int fd = 0;
 
 again:
@@ -1111,7 +1214,7 @@ again:
 		SOCK_TEST_TCPULP(remotesock, 0);
 
 		memset(&winfo, 0, sizeof(winfo));
-		copyfd_io(fd, remotesock, 1, true, &winfo);
+		err = copyfd_io(fd, remotesock, 1, true, &winfo);
 	} else {
 		perror("accept");
 		return 1;
@@ -1120,10 +1223,10 @@ again:
 	if (cfg_input)
 		close(fd);
 
-	if (--cfg_repeat > 0)
+	if (!err && --cfg_repeat > 0)
 		goto again;
 
-	return 0;
+	return err;
 }
 
 static void init_rng(void)
@@ -1233,7 +1336,7 @@ void xdisconnect(int fd)
 	else
 		xerror("bad family");
 
-	strcpy(cmd, "ss -M | grep -q ");
+	strcpy(cmd, "ss -Mnt | grep -q ");
 	cmdlen = strlen(cmd);
 	if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen],
 		       sizeof(cmd) - cmdlen))
@@ -1243,7 +1346,7 @@ void xdisconnect(int fd)
 
 	/*
 	 * wait until the pending data is completely flushed and all
-	 * the MPTCP sockets reached the closed status.
+	 * the sockets reached the closed status.
 	 * disconnect will bypass/ignore/drop any pending data.
 	 */
 	for (i = 0; ; i += msec_sleep) {
@@ -1264,13 +1367,13 @@ void xdisconnect(int fd)
 
 int main_loop(void)
 {
+	struct addrinfo *peer = NULL;
 	int fd = 0, ret, fd_in = 0;
-	struct addrinfo *peer;
 	struct wstate winfo;
 
 	if (cfg_input && cfg_sockopt_types.mptfo) {
 		fd_in = open(cfg_input, O_RDONLY);
-		if (fd < 0)
+		if (fd_in < 0)
 			xerror("can't open %s:%d", cfg_input, errno);
 	}
 
@@ -1293,13 +1396,13 @@ again:
 
 	if (cfg_input && !cfg_sockopt_types.mptfo) {
 		fd_in = open(cfg_input, O_RDONLY);
-		if (fd < 0)
+		if (fd_in < 0)
 			xerror("can't open %s:%d", cfg_input, errno);
 	}
 
 	ret = copyfd_io(fd_in, fd, 1, 0, &winfo);
 	if (ret)
-		return ret;
+		goto out;
 
 	if (cfg_truncate > 0) {
 		shutdown(fd, SHUT_WR);
@@ -1320,7 +1423,10 @@ again:
 		close(fd);
 	}
 
-	return 0;
+out:
+	if (cfg_input)
+		close(fd_in);
+	return ret;
 }
 
 int parse_proto(const char *proto)
@@ -1345,12 +1451,15 @@ int parse_mode(const char *mode)
 		return CFG_MODE_MMAP;
 	if (!strcasecmp(mode, "sendfile"))
 		return CFG_MODE_SENDFILE;
+	if (!strcasecmp(mode, "splice"))
+		return CFG_MODE_SPLICE;
 
 	fprintf(stderr, "Unknown test mode: %s\n", mode);
 	fprintf(stderr, "Supported modes are:\n");
 	fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n");
 	fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n");
 	fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n");
+	fprintf(stderr, "\t\t\"splice\" - send entire input file (splice), then read response (-l will read input first)\n");
 
 	die_usage();
 
@@ -1415,7 +1524,7 @@ static void parse_opts(int argc, char **argv)
 			 */
 			if (cfg_truncate < 0) {
 				cfg_rcv_trunc = true;
-				signal(SIGPIPE, handle_signal);
+				signal(SIGPIPE, SIG_IGN);
 			}
 			break;
 		case 'j':
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 5e3c56253274..a6447f7a31fe 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -134,7 +134,7 @@ ns4=""
 TEST_GROUP=""
 
 # This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
 cleanup()
 {
 	rm -f "$cin_disconnect"
@@ -211,6 +211,11 @@ if $checksum; then
 	done
 fi
 
+if $capture; then
+	rndh="${ns1:4}"
+	mptcp_lib_pr_info "Packet capture files will have this prefix: ${rndh}-"
+fi
+
 set_ethtool_flags() {
 	local ns="$1"
 	local dev="$2"
@@ -361,7 +366,6 @@ do_transfer()
 
 	if $capture; then
 		local capuser
-		local rndh="${connector_ns:4}"
 		if [ -z $SUDO_USER ] ; then
 			capuser=""
 		else
@@ -371,81 +375,75 @@ do_transfer()
 		local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}"
 		local capopt="-i any -s 65535 -B 32768 ${capuser}"
 
-		ip netns exec ${listener_ns}  tcpdump ${capopt} -w "${capfile}-listener.pcap"  >> "${capout}" 2>&1 &
+		ip netns exec ${listener_ns} tcpdump ${capopt} \
+			-w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
 		local cappid_listener=$!
 
-		ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
-		local cappid_connector=$!
+		if [ ${listener_ns} != ${connector_ns} ]; then
+			ip netns exec ${connector_ns} tcpdump ${capopt} \
+				-w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+			local cappid_connector=$!
+		fi
 
 		sleep 1
 	fi
 
-	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
-		nstat -n
+	mptcp_lib_nstat_init "${listener_ns}"
 	if [ ${listener_ns} != ${connector_ns} ]; then
-		NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
-			nstat -n
-	fi
-
-	local stat_synrx_last_l
-	local stat_ackrx_last_l
-	local stat_cookietx_last
-	local stat_cookierx_last
-	local stat_csum_err_s
-	local stat_csum_err_c
-	local stat_tcpfb_last_l
-	stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
-	stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
-	stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
-	stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
-	stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
-	stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
-	stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
-
-	timeout ${timeout_test} \
-		ip netns exec ${listener_ns} \
-			./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-				$extra_args $local_addr < "$sin" > "$sout" &
+		mptcp_lib_nstat_init "${connector_ns}"
+	fi
+
+	ip netns exec ${listener_ns} \
+		./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+			$extra_args $local_addr < "$sin" > "$sout" &
 	local spid=$!
 
 	mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
 
 	local start
 	start=$(date +%s%3N)
-	timeout ${timeout_test} \
-		ip netns exec ${connector_ns} \
-			./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
-				$extra_args $connect_addr < "$cin" > "$cout" &
+	ip netns exec ${connector_ns} \
+		./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+			$extra_args $connect_addr < "$cin" > "$cout" &
 	local cpid=$!
 
+	mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+		"${connector_ns}" "${port}" "${cpid}" "${spid}" &
+	local timeout_pid=$!
+
 	wait $cpid
 	local retc=$?
 	wait $spid
 	local rets=$?
 
+	if kill -0 $timeout_pid; then
+		# Finished before the timeout: kill the background job
+		mptcp_lib_kill_group_wait $timeout_pid
+		timeout_pid=0
+	fi
+
 	local stop
 	stop=$(date +%s%3N)
 
 	if $capture; then
 		sleep 1
 		kill ${cappid_listener}
-		kill ${cappid_connector}
+		if [ ${listener_ns} != ${connector_ns} ]; then
+			kill ${cappid_connector}
+		fi
 	fi
 
-	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
-		nstat | grep Tcp > /tmp/${listener_ns}.out
+	mptcp_lib_nstat_get "${listener_ns}"
 	if [ ${listener_ns} != ${connector_ns} ]; then
-		NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
-			nstat | grep Tcp > /tmp/${connector_ns}.out
+		mptcp_lib_nstat_get "${connector_ns}"
 	fi
 
 	local duration
 	duration=$((stop-start))
 	printf "(duration %05sms) " "${duration}"
-	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
 		mptcp_lib_pr_fail "client exit code $retc, server $rets"
-		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
-			"/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
+		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
 
 		echo
 		cat "$capout"
@@ -459,38 +457,38 @@ do_transfer()
 	rets=$?
 
 	local extra=""
-	local stat_synrx_now_l
-	local stat_ackrx_now_l
-	local stat_cookietx_now
-	local stat_cookierx_now
-	local stat_ooo_now
-	local stat_tcpfb_now_l
-	stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
-	stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
-	stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
-	stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
-	stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
-	stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
-
-	expect_synrx=$((stat_synrx_last_l))
-	expect_ackrx=$((stat_ackrx_last_l))
+	local stat_synrx
+	local stat_ackrx
+	local stat_cookietx
+	local stat_cookierx
+	local stat_ooo
+	local stat_tcpfb
+	stat_synrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+	stat_ackrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+	stat_cookietx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+	stat_cookierx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+	stat_ooo=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+	stat_tcpfb=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
+
+	expect_synrx=0
+	expect_ackrx=0
 
 	cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies)
 	cookies=${cookies##*=}
 
 	if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
-		expect_synrx=$((stat_synrx_last_l+connect_per_transfer))
-		expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer))
+		expect_synrx=${connect_per_transfer}
+		expect_ackrx=${connect_per_transfer}
 	fi
 
-	if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
-		mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \
+	if [ ${stat_synrx} -lt ${expect_synrx} ]; then
+		mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx})" \
 				  "than expected (${expect_synrx})"
 		retc=1
 	fi
-	if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then
-		if [ ${stat_ooo_now} -eq 0 ]; then
-			mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \
+	if [ ${stat_ackrx} -lt ${expect_ackrx} ]; then
+		if [ ${stat_ooo} -eq 0 ]; then
+			mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx})" \
 					  "than expected (${expect_ackrx})"
 			rets=1
 		else
@@ -504,47 +502,45 @@ do_transfer()
 		csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
 		csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
 
-		local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
-		if [ $csum_err_s_nr -gt 0 ]; then
-			mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]"
+		if [ $csum_err_s -gt 0 ]; then
+			mptcp_lib_pr_fail "server got ${csum_err_s} data checksum error[s]"
 			rets=1
 		fi
 
-		local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
-		if [ $csum_err_c_nr -gt 0 ]; then
-			mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]"
+		if [ $csum_err_c -gt 0 ]; then
+			mptcp_lib_pr_fail "client got ${csum_err_c} data checksum error[s]"
 			retc=1
 		fi
 	fi
 
-	if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
+	if [ ${stat_ooo} -eq 0 ] && [ ${stat_tcpfb} -gt 0 ]; then
 		mptcp_lib_pr_fail "unexpected fallback to TCP"
 		rets=1
 	fi
 
 	if [ $cookies -eq 2 ];then
-		if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
+		if [ $stat_cookietx -eq 0 ] ;then
 			extra+=" WARN: CookieSent: did not advance"
 		fi
-		if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
+		if [ $stat_cookierx -eq 0 ] ;then
 			extra+=" WARN: CookieRecv: did not advance"
 		fi
 	else
-		if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
+		if [ $stat_cookietx -gt 0 ] ;then
 			extra+=" WARN: CookieSent: changed"
 		fi
-		if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
+		if [ $stat_cookierx -gt 0 ] ;then
 			extra+=" WARN: CookieRecv: changed"
 		fi
 	fi
 
-	if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then
+	if [ ${stat_synrx} -gt ${expect_synrx} ]; then
 		extra+=" WARN: SYNRX: expect ${expect_synrx},"
-		extra+=" got ${stat_synrx_now_l} (probably retransmissions)"
+		extra+=" got ${stat_synrx} (probably retransmissions)"
 	fi
-	if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then
+	if [ ${stat_ackrx} -gt ${expect_ackrx} ]; then
 		extra+=" WARN: ACKRX: expect ${expect_ackrx},"
-		extra+=" got ${stat_ackrx_now_l} (probably retransmissions)"
+		extra+=" got ${stat_ackrx} (probably retransmissions)"
 	fi
 
 	if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh
new file mode 100755
index 000000000000..ce93ec2f107f
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+	"$(dirname "${0}")/mptcp_connect.sh" -C "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh
new file mode 100755
index 000000000000..5dd30f9394af
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+	"$(dirname "${0}")/mptcp_connect.sh" -m mmap "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh
new file mode 100755
index 000000000000..1d16fb1cc9bb
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+	"$(dirname "${0}")/mptcp_connect.sh" -m sendfile "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh
new file mode 100755
index 000000000000..241254a966c9
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_splice.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+	"$(dirname "${0}")/mptcp_connect.sh" -m splice "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c
new file mode 100644
index 000000000000..5e222ba977e4
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c
@@ -0,0 +1,439 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025, Kylin Software */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+
+#include <arpa/inet.h>
+
+#include <netinet/in.h>
+
+#include <linux/compiler.h>
+#include <linux/inet_diag.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/sock_diag.h>
+#include <linux/tcp.h>
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
+#define parse_rtattr_nested(tb, max, rta) \
+	(parse_rtattr_flags((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta), \
+			    NLA_F_NESTED))
+
+struct params {
+	__u32 target_token;
+	char subflow_addrs[1024];
+};
+
+struct mptcp_info {
+	__u8	mptcpi_subflows;
+	__u8	mptcpi_add_addr_signal;
+	__u8	mptcpi_add_addr_accepted;
+	__u8	mptcpi_subflows_max;
+	__u8	mptcpi_add_addr_signal_max;
+	__u8	mptcpi_add_addr_accepted_max;
+	__u32	mptcpi_flags;
+	__u32	mptcpi_token;
+	__u64	mptcpi_write_seq;
+	__u64	mptcpi_snd_una;
+	__u64	mptcpi_rcv_nxt;
+	__u8	mptcpi_local_addr_used;
+	__u8	mptcpi_local_addr_max;
+	__u8	mptcpi_csum_enabled;
+	__u32	mptcpi_retransmits;
+	__u64	mptcpi_bytes_retrans;
+	__u64	mptcpi_bytes_sent;
+	__u64	mptcpi_bytes_received;
+	__u64	mptcpi_bytes_acked;
+	__u8	mptcpi_subflows_total;
+	__u8	reserved[3];
+	__u32	mptcpi_last_data_sent;
+	__u32	mptcpi_last_data_recv;
+	__u32	mptcpi_last_ack_recv;
+};
+
+enum {
+	MPTCP_SUBFLOW_ATTR_UNSPEC,
+	MPTCP_SUBFLOW_ATTR_TOKEN_REM,
+	MPTCP_SUBFLOW_ATTR_TOKEN_LOC,
+	MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ,
+	MPTCP_SUBFLOW_ATTR_MAP_SEQ,
+	MPTCP_SUBFLOW_ATTR_MAP_SFSEQ,
+	MPTCP_SUBFLOW_ATTR_SSN_OFFSET,
+	MPTCP_SUBFLOW_ATTR_MAP_DATALEN,
+	MPTCP_SUBFLOW_ATTR_FLAGS,
+	MPTCP_SUBFLOW_ATTR_ID_REM,
+	MPTCP_SUBFLOW_ATTR_ID_LOC,
+	MPTCP_SUBFLOW_ATTR_PAD,
+
+	__MPTCP_SUBFLOW_ATTR_MAX
+};
+
+#define MPTCP_SUBFLOW_ATTR_MAX (__MPTCP_SUBFLOW_ATTR_MAX - 1)
+
+#define MPTCP_SUBFLOW_FLAG_MCAP_REM		_BITUL(0)
+#define MPTCP_SUBFLOW_FLAG_MCAP_LOC		_BITUL(1)
+#define MPTCP_SUBFLOW_FLAG_JOIN_REM		_BITUL(2)
+#define MPTCP_SUBFLOW_FLAG_JOIN_LOC		_BITUL(3)
+#define MPTCP_SUBFLOW_FLAG_BKUP_REM		_BITUL(4)
+#define MPTCP_SUBFLOW_FLAG_BKUP_LOC		_BITUL(5)
+#define MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED	_BITUL(6)
+#define MPTCP_SUBFLOW_FLAG_CONNECTED		_BITUL(7)
+#define MPTCP_SUBFLOW_FLAG_MAPVALID		_BITUL(8)
+
+#define rta_getattr(type, value)		(*(type *)RTA_DATA(value))
+
+static void __noreturn die_perror(const char *msg)
+{
+	perror(msg);
+	exit(1);
+}
+
+static void die_usage(int r)
+{
+	fprintf(stderr, "Usage:\n"
+			"mptcp_diag -t <token>\n"
+			"mptcp_diag -s \"<saddr>:<sport> <daddr>:<dport>\"\n");
+	exit(r);
+}
+
+static void send_query(int fd, struct inet_diag_req_v2 *r, __u32 proto)
+{
+	struct sockaddr_nl nladdr = {
+		.nl_family = AF_NETLINK
+	};
+	struct {
+		struct nlmsghdr nlh;
+		struct inet_diag_req_v2 r;
+	} req = {
+		.nlh = {
+			.nlmsg_len = sizeof(req),
+			.nlmsg_type = SOCK_DIAG_BY_FAMILY,
+			.nlmsg_flags = NLM_F_REQUEST
+		},
+		.r = *r
+	};
+	struct rtattr rta_proto;
+	struct iovec iov[6];
+	int iovlen = 0;
+
+	iov[iovlen++] = (struct iovec) {
+		.iov_base = &req,
+		.iov_len = sizeof(req)
+	};
+
+	if (proto == IPPROTO_MPTCP) {
+		rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL;
+		rta_proto.rta_len = RTA_LENGTH(sizeof(proto));
+
+		iov[iovlen++] = (struct iovec){ &rta_proto, sizeof(rta_proto)};
+		iov[iovlen++] = (struct iovec){ &proto, sizeof(proto)};
+		req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto));
+	}
+
+	struct msghdr msg = {
+		.msg_name = &nladdr,
+		.msg_namelen = sizeof(nladdr),
+		.msg_iov = iov,
+		.msg_iovlen = iovlen
+	};
+
+	for (;;) {
+		if (sendmsg(fd, &msg, 0) < 0) {
+			if (errno == EINTR)
+				continue;
+			die_perror("sendmsg");
+		}
+		break;
+	}
+}
+
+static void parse_rtattr_flags(struct rtattr *tb[], int max, struct rtattr *rta,
+			       int len, unsigned short flags)
+{
+	unsigned short type;
+
+	memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+	while (RTA_OK(rta, len)) {
+		type = rta->rta_type & ~flags;
+		if (type <= max && !tb[type])
+			tb[type] = rta;
+		rta = RTA_NEXT(rta, len);
+	}
+}
+
+static void print_info_msg(struct mptcp_info *info)
+{
+	printf("Token & Flags\n");
+	printf("token:        %x\n", info->mptcpi_token);
+	printf("flags:        %x\n", info->mptcpi_flags);
+	printf("csum_enabled: %u\n", info->mptcpi_csum_enabled);
+
+	printf("\nBasic Info\n");
+	printf("subflows:              %u\n", info->mptcpi_subflows);
+	printf("subflows_max:          %u\n", info->mptcpi_subflows_max);
+	printf("subflows_total:        %u\n", info->mptcpi_subflows_total);
+	printf("local_addr_used:       %u\n", info->mptcpi_local_addr_used);
+	printf("local_addr_max:        %u\n", info->mptcpi_local_addr_max);
+	printf("add_addr_signal:       %u\n", info->mptcpi_add_addr_signal);
+	printf("add_addr_accepted:     %u\n", info->mptcpi_add_addr_accepted);
+	printf("add_addr_signal_max:   %u\n", info->mptcpi_add_addr_signal_max);
+	printf("add_addr_accepted_max: %u\n", info->mptcpi_add_addr_accepted_max);
+
+	printf("\nTransmission Info\n");
+	printf("write_seq:        %llu\n", info->mptcpi_write_seq);
+	printf("snd_una:          %llu\n", info->mptcpi_snd_una);
+	printf("rcv_nxt:          %llu\n", info->mptcpi_rcv_nxt);
+	printf("last_data_sent:   %u\n", info->mptcpi_last_data_sent);
+	printf("last_data_recv:   %u\n", info->mptcpi_last_data_recv);
+	printf("last_ack_recv:    %u\n", info->mptcpi_last_ack_recv);
+	printf("retransmits:      %u\n", info->mptcpi_retransmits);
+	printf("retransmit bytes: %llu\n", info->mptcpi_bytes_retrans);
+	printf("bytes_sent:       %llu\n", info->mptcpi_bytes_sent);
+	printf("bytes_received:   %llu\n", info->mptcpi_bytes_received);
+	printf("bytes_acked:      %llu\n", info->mptcpi_bytes_acked);
+}
+
+/*
+ * 'print_subflow_info' is from 'mptcp_subflow_info'
+ * which is a function in 'misc/ss.c' of iproute2.
+ */
+static void print_subflow_info(struct rtattr *tb[])
+{
+	u_int32_t flags = 0;
+
+	printf("It's a mptcp subflow, the subflow info:\n");
+	if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) {
+		char caps[32 + 1] = { 0 }, *cap = &caps[0];
+
+		flags = rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_FLAGS]);
+
+		if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM)
+			*cap++ = 'M';
+		if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC)
+			*cap++ = 'm';
+		if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM)
+			*cap++ = 'J';
+		if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC)
+			*cap++ = 'j';
+		if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM)
+			*cap++ = 'B';
+		if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC)
+			*cap++ = 'b';
+		if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED)
+			*cap++ = 'e';
+		if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED)
+			*cap++ = 'c';
+		if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID)
+			*cap++ = 'v';
+
+		if (flags)
+			printf(" flags:%s", caps);
+	}
+	if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] &&
+	    tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] &&
+	    tb[MPTCP_SUBFLOW_ATTR_ID_REM] &&
+	    tb[MPTCP_SUBFLOW_ATTR_ID_LOC])
+		printf(" token:%04x(id:%u)/%04x(id:%u)",
+		       rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]),
+		       rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_REM]),
+		       rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]),
+		       rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_LOC]));
+	if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])
+		printf(" seq:%llu",
+		       rta_getattr(__u64, tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]));
+	if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])
+		printf(" sfseq:%u",
+		       rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]));
+	if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])
+		printf(" ssnoff:%u",
+		       rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]));
+	if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])
+		printf(" maplen:%u",
+		       rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]));
+	printf("\n");
+}
+
+static void parse_nlmsg(struct nlmsghdr *nlh, __u32 proto)
+{
+	struct inet_diag_msg *r = NLMSG_DATA(nlh);
+	struct rtattr *tb[INET_DIAG_MAX + 1];
+
+	parse_rtattr_flags(tb, INET_DIAG_MAX, (struct rtattr *)(r + 1),
+			   nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)),
+			   NLA_F_NESTED);
+
+	if (proto == IPPROTO_MPTCP && tb[INET_DIAG_INFO]) {
+		int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]);
+		struct mptcp_info *info;
+
+		/* workaround fort older kernels with less fields */
+		if (len < sizeof(*info)) {
+			info = alloca(sizeof(*info));
+			memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len);
+			memset((char *)info + len, 0, sizeof(*info) - len);
+		} else {
+			info = RTA_DATA(tb[INET_DIAG_INFO]);
+		}
+		print_info_msg(info);
+	}
+	if (proto == IPPROTO_TCP && tb[INET_DIAG_ULP_INFO]) {
+		struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 };
+
+		parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX,
+				    tb[INET_DIAG_ULP_INFO]);
+
+		if (ulpinfo[INET_ULP_INFO_MPTCP]) {
+			struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] = { 0 };
+
+			parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX,
+					    ulpinfo[INET_ULP_INFO_MPTCP]);
+			print_subflow_info(sfinfo);
+		} else {
+			printf("It's a normal TCP!\n");
+		}
+	}
+}
+
+static void recv_nlmsg(int fd, __u32 proto)
+{
+	char rcv_buff[8192];
+	struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buff;
+	struct sockaddr_nl rcv_nladdr = {
+		.nl_family = AF_NETLINK
+	};
+	struct iovec rcv_iov = {
+		.iov_base = rcv_buff,
+		.iov_len = sizeof(rcv_buff)
+	};
+	struct msghdr rcv_msg = {
+		.msg_name = &rcv_nladdr,
+		.msg_namelen = sizeof(rcv_nladdr),
+		.msg_iov = &rcv_iov,
+		.msg_iovlen = 1
+	};
+	int len;
+
+	len = recvmsg(fd, &rcv_msg, 0);
+
+	while (NLMSG_OK(nlh, len)) {
+		if (nlh->nlmsg_type == NLMSG_DONE) {
+			printf("NLMSG_DONE\n");
+			break;
+		} else if (nlh->nlmsg_type == NLMSG_ERROR) {
+			struct nlmsgerr *err;
+
+			err = (struct nlmsgerr *)NLMSG_DATA(nlh);
+			printf("Error %d:%s\n",
+			       -(err->error), strerror(-(err->error)));
+			break;
+		}
+		parse_nlmsg(nlh, proto);
+		nlh = NLMSG_NEXT(nlh, len);
+	}
+}
+
+static void get_mptcpinfo(__u32 token)
+{
+	struct inet_diag_req_v2 r = {
+		.sdiag_family           = AF_INET,
+		/* Real proto is set via INET_DIAG_REQ_PROTOCOL */
+		.sdiag_protocol         = IPPROTO_TCP,
+		.idiag_ext              = 1 << (INET_DIAG_INFO - 1),
+		.id.idiag_cookie[0]     = token,
+	};
+	__u32 proto = IPPROTO_MPTCP;
+	int fd;
+
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+	if (fd < 0)
+		die_perror("Netlink socket");
+
+	send_query(fd, &r, proto);
+	recv_nlmsg(fd, proto);
+
+	close(fd);
+}
+
+static void get_subflow_info(char *subflow_addrs)
+{
+	struct inet_diag_req_v2 r = {
+		.sdiag_family           = AF_INET,
+		.sdiag_protocol         = IPPROTO_TCP,
+		.idiag_ext              = 1 << (INET_DIAG_INFO - 1),
+		.id.idiag_cookie[0]     = INET_DIAG_NOCOOKIE,
+		.id.idiag_cookie[1]     = INET_DIAG_NOCOOKIE,
+	};
+	char saddr[64], daddr[64];
+	int sport, dport;
+	int ret;
+	int fd;
+
+	ret = sscanf(subflow_addrs, "%[^:]:%d %[^:]:%d", saddr, &sport, daddr, &dport);
+	if (ret != 4)
+		die_perror("IP PORT Pairs has style problems!");
+
+	printf("%s:%d -> %s:%d\n", saddr, sport, daddr, dport);
+
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+	if (fd < 0)
+		die_perror("Netlink socket");
+
+	r.id.idiag_sport = htons(sport);
+	r.id.idiag_dport = htons(dport);
+
+	inet_pton(AF_INET, saddr, &r.id.idiag_src);
+	inet_pton(AF_INET, daddr, &r.id.idiag_dst);
+	send_query(fd, &r, IPPROTO_TCP);
+	recv_nlmsg(fd, IPPROTO_TCP);
+}
+
+static void parse_opts(int argc, char **argv, struct params *p)
+{
+	int c;
+
+	if (argc < 2)
+		die_usage(1);
+
+	while ((c = getopt(argc, argv, "ht:s:")) != -1) {
+		switch (c) {
+		case 'h':
+			die_usage(0);
+			break;
+		case 't':
+			sscanf(optarg, "%x", &p->target_token);
+			break;
+		case 's':
+			strncpy(p->subflow_addrs, optarg,
+				sizeof(p->subflow_addrs) - 1);
+			break;
+		default:
+			die_usage(1);
+			break;
+		}
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct params p = { 0 };
+
+	parse_opts(argc, argv, &p);
+
+	if (p.target_token)
+		get_mptcpinfo(p.target_token);
+
+	if (p.subflow_addrs[0] != '\0')
+		get_subflow_info(p.subflow_addrs);
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
index 218aac467321..5716998da192 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -28,6 +28,7 @@
 
 #include <linux/tcp.h>
 #include <linux/sockios.h>
+#include <linux/compiler.h>
 
 #ifndef IPPROTO_MPTCP
 #define IPPROTO_MPTCP 262
@@ -40,7 +41,7 @@ static int pf = AF_INET;
 static int proto_tx = IPPROTO_MPTCP;
 static int proto_rx = IPPROTO_MPTCP;
 
-static void die_perror(const char *msg)
+static void __noreturn die_perror(const char *msg)
 {
 	perror(msg);
 	exit(1);
@@ -52,7 +53,7 @@ static void die_usage(int r)
 	exit(r);
 }
 
-static void xerror(const char *fmt, ...)
+static void __noreturn xerror(const char *fmt, ...)
 {
 	va_list ap;
 
@@ -72,13 +73,22 @@ static const char *getxinfo_strerr(int err)
 }
 
 static void xgetaddrinfo(const char *node, const char *service,
-			 const struct addrinfo *hints,
+			 struct addrinfo *hints,
 			 struct addrinfo **res)
 {
-	int err = getaddrinfo(node, service, hints, res);
+	int err;
 
+again:
+	err = getaddrinfo(node, service, hints, res);
 	if (err) {
-		const char *errstr = getxinfo_strerr(err);
+		const char *errstr;
+
+		if (err == EAI_SOCKTYPE) {
+			hints->ai_protocol = IPPROTO_TCP;
+			goto again;
+		}
+
+		errstr = getxinfo_strerr(err);
 
 		fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
 			node ? node : "", service ? service : "", errstr);
@@ -91,7 +101,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
 {
 	int sock = -1;
 	struct addrinfo hints = {
-		.ai_protocol = IPPROTO_TCP,
+		.ai_protocol = IPPROTO_MPTCP,
 		.ai_socktype = SOCK_STREAM,
 		.ai_flags = AI_PASSIVE | AI_NUMERICHOST
 	};
@@ -136,7 +146,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
 			      const char * const port, int proto)
 {
 	struct addrinfo hints = {
-		.ai_protocol = IPPROTO_TCP,
+		.ai_protocol = IPPROTO_MPTCP,
 		.ai_socktype = SOCK_STREAM,
 	};
 	struct addrinfo *a, *addr;
@@ -493,6 +503,7 @@ static int server(int unixfd)
 
 	process_one_client(r, unixfd);
 
+	close(fd);
 	return 0;
 }
 
@@ -571,8 +582,12 @@ int main(int argc, char *argv[])
 		die_perror("pipe");
 
 	s = xfork();
-	if (s == 0)
-		return server(unixfds[1]);
+	if (s == 0) {
+		close(unixfds[0]);
+		ret = server(unixfds[1]);
+		close(unixfds[1]);
+		return ret;
+	}
 
 	close(unixfds[1]);
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 13a3b68181ee..beec41f6662a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -8,7 +8,7 @@
 
 # ShellCheck incorrectly believes that most of the code here is unreachable
 # because it's invoked by variable name, see how the "tests" array is used
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
 
 . "$(dirname "${0}")/mptcp_lib.sh"
 
@@ -62,6 +62,8 @@ unset sflags
 unset fastclose
 unset fullmesh
 unset speed
+unset bind_addr
+unset join_syn_rej
 unset join_csum_ns1
 unset join_csum_ns2
 unset join_fail_nr
@@ -73,6 +75,17 @@ unset join_create_err
 unset join_bind_err
 unset join_connect_err
 
+unset fb_ns1
+unset fb_ns2
+unset fb_infinite_map_tx
+unset fb_dss_corruption
+unset fb_simult_conn
+unset fb_mpc_passive
+unset fb_mpc_active
+unset fb_mpc_data
+unset fb_md5_sig
+unset fb_dss
+
 # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
 #				  (ip6 && (ip6[74] & 0xf0) == 0x30)'"
 CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
@@ -91,6 +104,24 @@ CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
 			       6 0 0 65535,
 			       6 0 0 0"
 
+# IPv4: TCP hdr of 48B, a first suboption of 12B (DACK8), the RM_ADDR suboption
+# generated using "nfbpf_compile '(ip[32] & 0xf0) == 0xc0 && ip[53] == 0x0c &&
+#				  (ip[66] & 0xf0) == 0x40'"
+CBPF_MPTCP_SUBOPTION_RM_ADDR="13,
+			      48 0 0 0,
+			      84 0 0 240,
+			      21 0 9 64,
+			      48 0 0 32,
+			      84 0 0 240,
+			      21 0 6 192,
+			      48 0 0 53,
+			      21 0 4 12,
+			      48 0 0 66,
+			      84 0 0 240,
+			      21 0 1 64,
+			      6 0 0 65535,
+			      6 0 0 0"
+
 init_partial()
 {
 	capout=$(mktemp)
@@ -346,6 +377,7 @@ reset_with_add_addr_timeout()
 		tables="${ip6tables}"
 	fi
 
+	# set a maximum, to avoid too long timeout with exponential backoff
 	ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
 
 	if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \
@@ -589,8 +621,7 @@ wait_rm_addr()
 	local old_cnt="${2}"
 	local cnt
 
-	local i
-	for i in $(seq 10); do
+	for _ in $(seq 10); do
 		cnt=$(rm_addr_count ${ns})
 		[ "$cnt" = "${old_cnt}" ] || break
 		sleep 0.1
@@ -609,29 +640,46 @@ wait_rm_sf()
 	local old_cnt="${2}"
 	local cnt
 
-	local i
-	for i in $(seq 10); do
+	for _ in $(seq 10); do
 		cnt=$(rm_sf_count ${ns})
 		[ "$cnt" = "${old_cnt}" ] || break
 		sleep 0.1
 	done
 }
 
+# $1: expected MPJ ACK Rx counter in $ns1
 wait_mpj()
 {
-	local ns="${1}"
-	local cnt old_cnt
+	local exp_cnt="${1}"
+	local cnt
 
-	old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
+	for _ in $(seq 10); do
+		cnt=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
+		[ "${cnt}" = "${exp_cnt}" ] && break
+		sleep 0.1
+	done
+}
 
-	local i
-	for i in $(seq 10); do
-		cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
-		[ "$cnt" = "${old_cnt}" ] || break
+wait_ll_ready()
+{
+	local ns="${1}"
+
+	for _ in $(seq 50); do
+		ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" |
+			grep -qw "tentative" || break
 		sleep 0.1
 	done
 }
 
+get_ll_addr()
+{
+	local ns="${1}"
+	local iface="${2}"
+
+	ip -n "${ns}" -6 addr show dev "${iface}" scope link |
+		grep "inet6 fe80" | sed 's#.*\(fe80::.*\)/.*#\1#'
+}
+
 kill_events_pids()
 {
 	mptcp_lib_kill_wait $evts_ns1_pid
@@ -938,6 +986,9 @@ do_transfer()
 	local FAILING_LINKS=${FAILING_LINKS:-""}
 	local fastclose=${fastclose:-""}
 	local speed=${speed:-"fast"}
+	local bind_addr=${bind_addr:-"::"}
+	local listener_in="${sin}"
+	local connector_in="${cin}"
 	port=$(get_port)
 
 	:> "$cout"
@@ -945,10 +996,8 @@ do_transfer()
 
 	cond_start_capture ${listener_ns}
 
-	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
-		nstat -n
-	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
-		nstat -n
+	mptcp_lib_nstat_init "${listener_ns}"
+	mptcp_lib_nstat_init "${connector_ns}"
 
 	local extra_args
 	if [ $speed = "fast" ]; then
@@ -986,42 +1035,40 @@ do_transfer()
 
 	extra_srv_args="$extra_args $extra_srv_args"
 	if [ "$test_linkfail" -gt 1 ];then
-		timeout ${timeout_test} \
-			ip netns exec ${listener_ns} \
-				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-					$extra_srv_args "::" < "$sinfail" > "$sout" &
-	else
-		timeout ${timeout_test} \
-			ip netns exec ${listener_ns} \
-				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-					$extra_srv_args "::" < "$sin" > "$sout" &
+		listener_in="${sinfail}"
 	fi
+	ip netns exec ${listener_ns} \
+		./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \
+			${extra_srv_args} "${bind_addr}" < "${listener_in}" > "${sout}" &
 	local spid=$!
 
 	mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
 
 	extra_cl_args="$extra_args $extra_cl_args"
 	if [ "$test_linkfail" -eq 0 ];then
-		timeout ${timeout_test} \
-			ip netns exec ${connector_ns} \
-				./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
-					$extra_cl_args $connect_addr < "$cin" > "$cout" &
+		ip netns exec ${connector_ns} \
+			./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+				$extra_cl_args $connect_addr < "$cin" > "$cout" &
 	elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then
+		connector_in="${cinsent}"
 		( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
 			tee "$cinsent" | \
-			timeout ${timeout_test} \
 				ip netns exec ${connector_ns} \
 					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
 						$extra_cl_args $connect_addr > "$cout" &
 	else
+		connector_in="${cinsent}"
 		tee "$cinsent" < "$cinfail" | \
-			timeout ${timeout_test} \
-				ip netns exec ${connector_ns} \
-					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
-						$extra_cl_args $connect_addr > "$cout" &
+			ip netns exec ${connector_ns} \
+				./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+					$extra_cl_args $connect_addr > "$cout" &
 	fi
 	local cpid=$!
 
+	mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+		"${connector_ns}" "${port}" "${cpid}" "${spid}" &
+	local timeout_pid=$!
+
 	pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr
 	check_cestab $listener_ns $connector_ns
 
@@ -1030,31 +1077,26 @@ do_transfer()
 	wait $spid
 	local rets=$?
 
+	if kill -0 $timeout_pid; then
+		# Finished before the timeout: kill the background job
+		mptcp_lib_kill_group_wait $timeout_pid
+		timeout_pid=0
+	fi
+
 	cond_stop_capture
 
-	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
-		nstat | grep Tcp > /tmp/${listener_ns}.out
-	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
-		nstat | grep Tcp > /tmp/${connector_ns}.out
+	mptcp_lib_nstat_get "${listener_ns}"
+	mptcp_lib_nstat_get "${connector_ns}"
 
-	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
 		fail_test "client exit code $retc, server $rets"
-		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
-			"/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
+		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
 		return 1
 	fi
 
-	if [ "$test_linkfail" -gt 1 ];then
-		check_transfer $sinfail $cout "file received by client" $trunc_size
-	else
-		check_transfer $sin $cout "file received by client" $trunc_size
-	fi
+	check_transfer $listener_in $cout "file received by client" $trunc_size
 	retc=$?
-	if [ "$test_linkfail" -eq 0 ];then
-		check_transfer $cin $sout "file received by server" $trunc_size
-	else
-		check_transfer $cinsent $sout "file received by server" $trunc_size
-	fi
+	check_transfer $connector_in $sout "file received by server" $trunc_size
 	rets=$?
 
 	[ $retc -eq 0 ] && [ $rets -eq 0 ]
@@ -1123,12 +1165,20 @@ run_tests()
 	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
 }
 
+_dump_stats()
+{
+	local ns="${1}"
+	local side="${2}"
+
+	mptcp_lib_print_err "${side} ns stats (${ns2})"
+	mptcp_lib_pr_nstat "${ns}"
+	echo
+}
+
 dump_stats()
 {
-	echo Server ns stats
-	ip netns exec $ns1 nstat -as | grep Tcp
-	echo Client ns stats
-	ip netns exec $ns2 nstat -as | grep Tcp
+	_dump_stats "${ns1}" "Server"
+	_dump_stats "${ns2}" "Client"
 }
 
 chk_csum_nr()
@@ -1370,7 +1420,7 @@ chk_join_tx_nr()
 
 	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr")
 	if [ -z "$count" ]; then
-		rc=${KSFT_SKIP}
+		: # ignore skip
 	elif [ "$count" != "$create" ]; then
 		rc=${KSFT_FAIL}
 		print_check "syn tx create socket error"
@@ -1379,7 +1429,7 @@ chk_join_tx_nr()
 
 	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr")
 	if [ -z "$count" ]; then
-		rc=${KSFT_SKIP}
+		: # ignore skip
 	elif [ "$count" != "$bind" ]; then
 		rc=${KSFT_FAIL}
 		print_check "syn tx bind error"
@@ -1388,7 +1438,7 @@ chk_join_tx_nr()
 
 	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr")
 	if [ -z "$count" ]; then
-		rc=${KSFT_SKIP}
+		: # ignore skip
 	elif [ "$count" != "$connect" ]; then
 		rc=${KSFT_FAIL}
 		print_check "syn tx connect error"
@@ -1398,11 +1448,121 @@ chk_join_tx_nr()
 	print_results "join Tx" ${rc}
 }
 
+chk_fallback_nr()
+{
+	local infinite_map_tx=${fb_infinite_map_tx:-0}
+	local dss_corruption=${fb_dss_corruption:-0}
+	local simult_conn=${fb_simult_conn:-0}
+	local mpc_passive=${fb_mpc_passive:-0}
+	local mpc_active=${fb_mpc_active:-0}
+	local mpc_data=${fb_mpc_data:-0}
+	local md5_sig=${fb_md5_sig:-0}
+	local dss=${fb_dss:-0}
+	local rc=${KSFT_PASS}
+	local ns=$1
+	local count
+
+	count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx")
+	if [ -z "$count" ]; then
+		: # ignore skip
+	elif [ "$count" != "$infinite_map_tx" ]; then
+		rc=${KSFT_FAIL}
+		print_check "$ns infinite map tx fallback"
+		fail_test "got $count infinite map tx fallback[s] in $ns expected $infinite_map_tx"
+	fi
+
+	count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback")
+	if [ -z "$count" ]; then
+		: # ignore skip
+	elif [ "$count" != "$dss_corruption" ]; then
+		rc=${KSFT_FAIL}
+		print_check "$ns dss corruption fallback"
+		fail_test "got $count dss corruption fallback[s] in $ns expected $dss_corruption"
+	fi
+
+	count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback")
+	if [ -z "$count" ]; then
+		: # ignore skip
+	elif [ "$count" != "$simult_conn" ]; then
+		rc=${KSFT_FAIL}
+		print_check "$ns simult conn fallback"
+		fail_test "got $count simult conn fallback[s] in $ns expected $simult_conn"
+	fi
+
+	count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK")
+	if [ -z "$count" ]; then
+		: # ignore skip
+	elif [ "$count" != "$mpc_passive" ]; then
+		rc=${KSFT_FAIL}
+		print_check "$ns mpc passive fallback"
+		fail_test "got $count mpc passive fallback[s] in $ns expected $mpc_passive"
+	fi
+
+	count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK")
+	if [ -z "$count" ]; then
+		: # ignore skip
+	elif [ "$count" != "$mpc_active" ]; then
+		rc=${KSFT_FAIL}
+		print_check "$ns mpc active fallback"
+		fail_test "got $count mpc active fallback[s] in $ns expected $mpc_active"
+	fi
+
+	count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback")
+	if [ -z "$count" ]; then
+		: # ignore skip
+	elif [ "$count" != "$mpc_data" ]; then
+		rc=${KSFT_FAIL}
+		print_check "$ns mpc data fallback"
+		fail_test "got $count mpc data fallback[s] in $ns expected $mpc_data"
+	fi
+
+	count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback")
+	if [ -z "$count" ]; then
+		: # ignore skip
+	elif [ "$count" != "$md5_sig" ]; then
+		rc=${KSFT_FAIL}
+		print_check "$ns MD5 Sig fallback"
+		fail_test "got $count MD5 Sig fallback[s] in $ns expected $md5_sig"
+	fi
+
+	count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback")
+	if [ -z "$count" ]; then
+		: # ignore skip
+	elif [ "$count" != "$dss" ]; then
+		rc=${KSFT_FAIL}
+		print_check "$ns dss fallback"
+		fail_test "got $count dss fallback[s] in $ns expected $dss"
+	fi
+
+	return $rc
+}
+
+chk_fallback_nr_all()
+{
+	local netns=("ns1" "ns2")
+	local fb_ns=("fb_ns1" "fb_ns2")
+	local rc=${KSFT_PASS}
+
+	for i in 0 1; do
+		if [ -n "${!fb_ns[i]}" ]; then
+			eval "${!fb_ns[i]}" \
+				chk_fallback_nr ${netns[i]} || rc=${?}
+		else
+			chk_fallback_nr ${netns[i]} || rc=${?}
+		fi
+	done
+
+	if [ "${rc}" != "${KSFT_PASS}" ]; then
+		print_results "fallback" ${rc}
+	fi
+}
+
 chk_join_nr()
 {
 	local syn_nr=$1
 	local syn_ack_nr=$2
 	local ack_nr=$3
+	local syn_rej=${join_syn_rej:-0}
 	local csum_ns1=${join_csum_ns1:-0}
 	local csum_ns2=${join_csum_ns2:-0}
 	local fail_nr=${join_fail_nr:-0}
@@ -1441,20 +1601,49 @@ chk_join_nr()
 		fi
 	fi
 
+	count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure")
+	if [ -z "$count" ]; then
+		: # ignore skip
+	elif [ "$count" != "0" ]; then
+		rc=${KSFT_FAIL}
+		print_check "synack HMAC"
+		fail_test "got $count JOIN[s] synack HMAC failure expected 0"
+	fi
+
 	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
 	if [ -z "$count" ]; then
-		rc=${KSFT_SKIP}
+		: # ignore skip
 	elif [ "$count" != "$ack_nr" ]; then
 		rc=${KSFT_FAIL}
 		print_check "ack rx"
 		fail_test "got $count JOIN[s] ack rx expected $ack_nr"
 	fi
 
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure")
+	if [ -z "$count" ]; then
+		: # ignore skip
+	elif [ "$count" != "0" ]; then
+		rc=${KSFT_FAIL}
+		print_check "ack HMAC"
+		fail_test "got $count JOIN[s] ack HMAC failure expected 0"
+	fi
+
+	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected")
+	if [ -z "$count" ]; then
+		: # ignore skip
+	elif [ "$count" != "$syn_rej" ]; then
+		rc=${KSFT_FAIL}
+		print_check "syn rejected"
+		fail_test "got $count JOIN[s] syn rejected expected $syn_rej"
+	fi
+
 	print_results "join Rx" ${rc}
 
 	join_syn_tx="${join_syn_tx:-${syn_nr}}" \
 		chk_join_tx_nr
 
+	chk_fallback_nr_all
+
 	if $validate_checksum; then
 		chk_csum_nr $csum_ns1 $csum_ns2
 		chk_fail_nr $fail_nr $fail_nr
@@ -1474,7 +1663,6 @@ chk_stale_nr()
 	local stale_min=$2
 	local stale_max=$3
 	local stale_delta=$4
-	local dump_stats
 	local stale_nr
 	local recover_nr
 
@@ -1490,16 +1678,11 @@ chk_stale_nr()
 		fail_test "got $stale_nr stale[s] $recover_nr recover[s], " \
 		     " expected stale in range [$stale_min..$stale_max]," \
 		     " stale-recover delta $stale_delta"
-		dump_stats=1
+		echo $ns stats
+		ip -n $ns -s link show
 	else
 		print_ok
 	fi
-
-	if [ "${dump_stats}" = 1 ]; then
-		echo $ns stats
-		ip netns exec $ns ip -s link show
-		ip netns exec $ns nstat -as | grep MPTcp
-	fi
 }
 
 chk_add_nr()
@@ -1518,7 +1701,6 @@ chk_add_nr()
 	local tx=""
 	local rx=""
 	local count
-	local timeout
 
 	if [[ $ns_invert = "invert" ]]; then
 		ns_tx=$ns2
@@ -1527,15 +1709,13 @@ chk_add_nr()
 		rx=" server"
 	fi
 
-	timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout)
-
 	print_check "add addr rx${rx}"
 	count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr")
 	if [ -z "$count" ]; then
 		print_skip
-	# if the test configured a short timeout tolerate greater then expected
-	# add addrs options, due to retransmissions
-	elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then
+	# Tolerate more ADD_ADDR then expected (if any), due to retransmissions
+	elif [ "$count" != "$add_nr" ] &&
+	     { [ "$add_nr" -eq 0 ] || [ "$count" -lt "$add_nr" ]; }; then
 		fail_test "got $count ADD_ADDR[s] expected $add_nr"
 	else
 		print_ok
@@ -1623,18 +1803,15 @@ chk_add_tx_nr()
 {
 	local add_tx_nr=$1
 	local echo_tx_nr=$2
-	local timeout
 	local count
 
-	timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
-
 	print_check "add addr tx"
 	count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx")
 	if [ -z "$count" ]; then
 		print_skip
-	# if the test configured a short timeout tolerate greater then expected
-	# add addrs options, due to retransmissions
-	elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
+	# Tolerate more ADD_ADDR then expected (if any), due to retransmissions
+	elif [ "$count" != "$add_tx_nr" ] &&
+	     { [ "$add_tx_nr" -eq 0 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
 		fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr"
 	else
 		print_ok
@@ -1945,7 +2122,8 @@ subflows_tests()
 		pm_nl_set_limits $ns2 0 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 1 1 0
+		join_syn_rej=1 \
+			chk_join_nr 1 1 0
 	fi
 
 	# subflow
@@ -1974,7 +2152,8 @@ subflows_tests()
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 2 2 1
+		join_syn_rej=1 \
+			chk_join_nr 2 2 1
 	fi
 
 	# single subflow, dev
@@ -2120,7 +2299,8 @@ signal_address_tests()
 		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
 		pm_nl_set_limits $ns2 3 3
-		run_tests $ns1 $ns2 10.0.1.1
+		speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 3 3 3
 		chk_add_nr 3 3
 	fi
@@ -2132,7 +2312,8 @@ signal_address_tests()
 		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
 		pm_nl_set_limits $ns2 3 3
-		run_tests $ns1 $ns2 10.0.1.1
+		speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
 		join_syn_tx=3 \
 			chk_join_nr 1 1 1
 		chk_add_nr 3 3
@@ -2155,21 +2336,88 @@ signal_address_tests()
 		ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
 		speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 3 3 3
 
 		# It is not directly linked to the commit introducing this
 		# symbol but for the parent one which is linked anyway.
-		if ! mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
-			chk_join_nr 3 3 2
-			chk_add_nr 4 4
-		else
-			chk_join_nr 3 3 3
+		if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
 			# the server will not signal the address terminating
 			# the MPC subflow
 			chk_add_nr 3 3
+		else
+			chk_add_nr 4 4
 		fi
 	fi
 }
 
+laminar_endp_tests()
+{
+	# no laminar endpoints: routing rules are used
+	if reset_with_tcp_filter "without a laminar endpoint" ns1 10.0.2.2 REJECT &&
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		join_syn_tx=1 \
+			chk_join_nr 0 0 0
+		chk_add_nr 1 1
+	fi
+
+	# laminar endpoints: this endpoint is used
+	if reset_with_tcp_filter "with a laminar endpoint" ns1 10.0.2.2 REJECT &&
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+	fi
+
+	# laminar endpoints: these endpoints are used
+	if reset_with_tcp_filter "with multiple laminar endpoints" ns1 10.0.2.2 REJECT &&
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns2 dead:beef:3::2 flags laminar
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags laminar
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+		chk_add_nr 2 2
+	fi
+
+	# laminar endpoints: only one endpoint is used
+	if reset_with_tcp_filter "single laminar endpoint" ns1 10.0.2.2 REJECT &&
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 2 2
+	fi
+
+	# laminar endpoints: subflow and laminar flags
+	if reset_with_tcp_filter "sublow + laminar endpoints" ns1 10.0.2.2 REJECT &&
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+		pm_nl_set_limits $ns1 0 4
+		pm_nl_set_limits $ns2 2 4
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,laminar
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,laminar
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+	fi
+}
+
 link_failure_tests()
 {
 	# accept and use add_addr with additional subflows and link loss
@@ -2314,7 +2562,7 @@ remove_tests()
 	if reset "remove single subflow"; then
 		pm_nl_set_limits $ns1 0 1
 		pm_nl_set_limits $ns2 0 1
-		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
 		addr_nr_ns2=-1 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 1 1 1
@@ -2327,8 +2575,8 @@ remove_tests()
 	if reset "remove multiple subflows"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 0 2
-		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
-		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
 		addr_nr_ns2=-2 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 2 2 2
@@ -2339,7 +2587,7 @@ remove_tests()
 	# single address, remove
 	if reset "remove single address"; then
 		pm_nl_set_limits $ns1 0 1
-		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
 		pm_nl_set_limits $ns2 1 1
 		addr_nr_ns1=-1 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
@@ -2352,9 +2600,9 @@ remove_tests()
 	# subflow and signal, remove
 	if reset "remove subflow and signal"; then
 		pm_nl_set_limits $ns1 0 2
-		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
 		pm_nl_set_limits $ns2 1 2
-		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
 		addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 2 2 2
@@ -2366,10 +2614,10 @@ remove_tests()
 	# subflows and signal, remove
 	if reset "remove subflows and signal"; then
 		pm_nl_set_limits $ns1 0 3
-		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
 		pm_nl_set_limits $ns2 1 3
-		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
 		addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 3 3 3
@@ -2378,12 +2626,25 @@ remove_tests()
 		chk_rst_nr 0 0
 	fi
 
+	# signal+subflow with limits, remove
+	if reset "remove signal+subflow with limits"; then
+		pm_nl_set_limits $ns1 0 0
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,subflow
+		pm_nl_set_limits $ns2 0 0
+		addr_nr_ns1=-1 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_add_nr 1 1
+		chk_rm_nr 1 0 invert
+		chk_rst_nr 0 0
+	fi
+
 	# addresses remove
 	if reset "remove addresses"; then
 		pm_nl_set_limits $ns1 3 3
-		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
-		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup
 		pm_nl_set_limits $ns2 3 3
 		addr_nr_ns1=-3 speed=10 \
 			run_tests $ns1 $ns2 10.0.1.1
@@ -2396,10 +2657,10 @@ remove_tests()
 	# invalid addresses remove
 	if reset "remove invalid addresses"; then
 		pm_nl_set_limits $ns1 3 3
-		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup
 		# broadcast IP: no packet for this address will be received on ns1
-		pm_nl_add_endpoint $ns1 224.0.0.1 flags signal
-		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+		pm_nl_add_endpoint $ns1 224.0.0.1 flags signal,backup
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
 		pm_nl_set_limits $ns2 2 2
 		addr_nr_ns1=-3 speed=10 \
 			run_tests $ns1 $ns2 10.0.1.1
@@ -2413,10 +2674,10 @@ remove_tests()
 	# subflows and signal, flush
 	if reset "flush subflows and signal"; then
 		pm_nl_set_limits $ns1 0 3
-		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
 		pm_nl_set_limits $ns2 1 3
-		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
 		addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 3 3 3
@@ -2429,9 +2690,9 @@ remove_tests()
 	if reset "flush subflows"; then
 		pm_nl_set_limits $ns1 3 3
 		pm_nl_set_limits $ns2 3 3
-		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
-		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
-		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup id 150
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
 		addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 3 3 3
@@ -2448,9 +2709,9 @@ remove_tests()
 	# addresses flush
 	if reset "flush addresses"; then
 		pm_nl_set_limits $ns1 3 3
-		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
-		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+		pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup
 		pm_nl_set_limits $ns2 3 3
 		addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
@@ -2463,9 +2724,9 @@ remove_tests()
 	# invalid addresses flush
 	if reset "flush invalid addresses"; then
 		pm_nl_set_limits $ns1 3 3
-		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
-		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
-		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+		pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup
+		pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+		pm_nl_add_endpoint $ns1 10.0.14.1 flags signal,backup
 		pm_nl_set_limits $ns2 3 3
 		addr_nr_ns1=-8 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
@@ -2734,7 +2995,11 @@ mixed_tests()
 		pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
 		speed=slow \
 			run_tests $ns1 $ns2 dead:beef:2::1
-		chk_join_nr 1 1 1
+		if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_fullmesh_max$"; then
+			chk_join_nr 0 0 0
+		else
+			chk_join_nr 1 1 1
+		fi
 	fi
 
 	# fullmesh still tries to create all the possibly subflows with
@@ -3015,6 +3280,133 @@ add_addr_ports_tests()
 	fi
 }
 
+bind_tests()
+{
+	# bind to one address should not allow extra subflows to other addresses
+	if reset "bind main address v4, no join v4"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		bind_addr="10.0.1.1" \
+			run_tests $ns1 $ns2 10.0.1.1
+		join_syn_tx=1 \
+			chk_join_nr 0 0 0
+		chk_add_nr 1 1
+	fi
+
+	# bind to one address should not allow extra subflows to other addresses
+	if reset "bind main address v6, no join v6"; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+		bind_addr="dead:beef:1::1" \
+			run_tests $ns1 $ns2 dead:beef:1::1
+		join_syn_tx=1 \
+			chk_join_nr 0 0 0
+		chk_add_nr 1 1
+	fi
+
+	# multiple binds to allow extra subflows to other addresses
+	if reset "multiple bind to allow joins v4"; then
+		local extra_bind
+
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+
+		# Launching another app listening on a different address
+		# Note: it could be a totally different app, e.g. nc, socat, ...
+		ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+			-s MPTCP 10.0.2.1 &
+		extra_bind=$!
+
+		bind_addr="10.0.1.1" \
+			run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+
+		kill ${extra_bind}
+	fi
+
+	# multiple binds to allow extra subflows to other addresses
+	if reset "multiple bind to allow joins v6"; then
+		local extra_bind
+
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+
+		# Launching another app listening on a different address
+		# Note: it could be a totally different app, e.g. nc, socat, ...
+		ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+			-s MPTCP dead:beef:2::1 &
+		extra_bind=$!
+
+		bind_addr="dead:beef:1::1" \
+			run_tests $ns1 $ns2 dead:beef:1::1
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+
+		kill ${extra_bind}
+	fi
+
+	# multiple binds to allow extra subflows to other addresses: v6 LL case
+	if reset "multiple bind to allow joins v6 link-local routing"; then
+		local extra_bind ns1ll1 ns1ll2
+
+		ns1ll1="$(get_ll_addr $ns1 ns1eth1)"
+		ns1ll2="$(get_ll_addr $ns1 ns1eth2)"
+
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal
+
+		wait_ll_ready $ns1 # to be able to bind
+		wait_ll_ready $ns2 # also needed to bind on the client side
+		ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+			-s MPTCP "${ns1ll2}%ns1eth2" &
+		extra_bind=$!
+
+		bind_addr="${ns1ll1}%ns1eth1" \
+			run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1"
+		# it is not possible to connect to the announced LL addr without
+		# specifying the outgoing interface.
+		join_connect_err=1 \
+			chk_join_nr 0 0 0
+		chk_add_nr 1 1
+
+		kill ${extra_bind}
+	fi
+
+	# multiple binds to allow extra subflows to v6 LL addresses: laminar
+	if reset "multiple bind to allow joins v6 link-local laminar" &&
+	   continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+		local extra_bind ns1ll1 ns1ll2 ns2ll2
+
+		ns1ll1="$(get_ll_addr $ns1 ns1eth1)"
+		ns1ll2="$(get_ll_addr $ns1 ns1eth2)"
+		ns2ll2="$(get_ll_addr $ns2 ns2eth2)"
+
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 2 2
+		pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal
+		pm_nl_add_endpoint $ns2 "${ns2ll2}" flags laminar dev ns2eth2
+
+		wait_ll_ready $ns1 # to be able to bind
+		wait_ll_ready $ns2 # also needed to bind on the client side
+		ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+			-s MPTCP "${ns1ll2}%ns1eth2" &
+		extra_bind=$!
+
+		bind_addr="${ns1ll1}%ns1eth1" \
+			run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1"
+		chk_join_nr 1 1 1
+		chk_add_nr 1 1
+
+		kill ${extra_bind}
+	fi
+}
+
 syncookies_tests()
 {
 	# single subflow, syncookies
@@ -3043,7 +3435,8 @@ syncookies_tests()
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 2 1 1
+		join_syn_rej=1 \
+			chk_join_nr 2 1 1
 	fi
 
 	# test signal address with cookies
@@ -3155,6 +3548,17 @@ deny_join_id0_tests()
 		run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 1 1 1
 	fi
+
+	# default limits, server deny join id 0 + signal
+	if reset_with_allow_join_id0 "default limits, server deny join id 0" 0 1; then
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 2 2 2
+	fi
 }
 
 fullmesh_tests()
@@ -3270,7 +3674,6 @@ fullmesh_tests()
 fastclose_tests()
 {
 	if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
-		MPTCP_LIB_SUBTEST_FLAKY=1
 		test_linkfail=1024 fastclose=client \
 			run_tests $ns1 $ns2 10.0.1.1
 		chk_join_nr 0 0 0
@@ -3279,7 +3682,6 @@ fastclose_tests()
 	fi
 
 	if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
-		MPTCP_LIB_SUBTEST_FLAKY=1
 		test_linkfail=1024 fastclose=server \
 			run_tests $ns1 $ns2 10.0.1.1
 		join_rst_nr=1 \
@@ -3305,6 +3707,7 @@ fail_tests()
 		join_csum_ns1=+1 join_csum_ns2=+0 \
 			join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \
 			join_corrupted_pkts="$(pedit_action_pkts)" \
+			fb_ns1="fb_dss=1" fb_ns2="fb_infinite_map_tx=1" \
 			chk_join_nr 0 0 0
 		chk_fail_nr 1 -1 invert
 	fi
@@ -3335,7 +3738,6 @@ userspace_pm_add_addr()
 	tk=$(mptcp_lib_evts_get_info token "$evts")
 
 	ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3
-	sleep 1
 }
 
 # $1: ns ; $2: id
@@ -3366,7 +3768,6 @@ userspace_pm_add_sf()
 
 	ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \
 				rip $da rport $dp token $tk
-	sleep 1
 }
 
 # $1: ns ; $2: addr $3: event type
@@ -3463,21 +3864,28 @@ userspace_pm_chk_get_addr()
 	fi
 }
 
-# $1: ns ; $2: event type ; $3: count
+# $1: ns ; $2: event type ; $3: count ; [ $4: attr ; $5: attr count ]
 chk_evt_nr()
 {
 	local ns=${1}
 	local evt_name="${2}"
 	local exp="${3}"
+	local attr="${4}"
+	local attr_exp="${5}"
 
 	local evts="${evts_ns1}"
 	local evt="${!evt_name}"
+	local attr_name
 	local count
 
+	if [ -n "${attr}" ]; then
+		attr_name=", ${attr}: ${attr_exp}"
+	fi
+
 	evt_name="${evt_name:16}" # without MPTCP_LIB_EVENT_
 	[ "${ns}" == "ns2" ] && evts="${evts_ns2}"
 
-	print_check "event ${ns} ${evt_name} (${exp})"
+	print_check "event ${ns} ${evt_name} (${exp}${attr_name})"
 
 	if [[ "${evt_name}" = "LISTENER_"* ]] &&
 	   ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then
@@ -3488,11 +3896,42 @@ chk_evt_nr()
 	count=$(grep -cw "type:${evt}" "${evts}")
 	if [ "${count}" != "${exp}" ]; then
 		fail_test "got ${count} events, expected ${exp}"
+		cat "${evts}"
+		return
+	elif [ -z "${attr}" ]; then
+		print_ok
+		return
+	fi
+
+	count=$(grep -w "type:${evt}" "${evts}" | grep -c ",${attr}:")
+	if [ "${count}" != "${attr_exp}" ]; then
+		fail_test "got ${count} event attributes, expected ${attr_exp}"
+		grep -w "type:${evt}" "${evts}"
 	else
 		print_ok
 	fi
 }
 
+# $1: ns ; $2: event type ; $3: expected count
+wait_event()
+{
+	local ns="${1}"
+	local evt_name="${2}"
+	local exp="${3}"
+
+	local evt="${!evt_name}"
+	local evts="${evts_ns1}"
+	local count
+
+	[ "${ns}" == "ns2" ] && evts="${evts_ns2}"
+
+	for _ in $(seq 100); do
+		count=$(grep -cw "type:${evt}" "${evts}")
+		[ "${count}" -ge "${exp}" ] && break
+		sleep 0.1
+	done
+}
+
 userspace_tests()
 {
 	# userspace pm type prevents add_addr
@@ -3527,7 +3966,8 @@ userspace_tests()
 		pm_nl_set_limits $ns2 1 1
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 1 1 0
+		join_syn_rej=1 \
+			chk_join_nr 1 1 0
 	fi
 
 	# userspace pm type does not send join
@@ -3550,7 +3990,8 @@ userspace_tests()
 		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
 		sflags=backup speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1
-		chk_join_nr 1 1 0
+		join_syn_rej=1 \
+			chk_join_nr 1 1 0
 		chk_prio_nr 0 0 0 0
 	fi
 
@@ -3573,12 +4014,14 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns1
 		pm_nl_set_limits $ns2 2 2
-		{ speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
-		wait_mpj $ns1
+		wait_event ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
 		userspace_pm_add_addr $ns1 10.0.2.1 10
+		wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 1
 		userspace_pm_add_addr $ns1 10.0.3.1 20
+		wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 2
 		chk_join_nr 2 2 2
 		chk_add_nr 2 2
 		chk_mptcp_info subflows 2 subflows 2
@@ -3598,7 +4041,7 @@ userspace_tests()
 		chk_mptcp_info subflows 0 subflows 0
 		chk_subflows_total 1 1
 		kill_events_pids
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
 	fi
 
 	# userspace pm create destroy subflow
@@ -3606,11 +4049,12 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns2
 		pm_nl_set_limits $ns1 0 1
-		{ speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
-		wait_mpj $ns2
+		wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
 		userspace_pm_add_sf $ns2 10.0.3.2 20
+		wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
 		chk_join_nr 1 1 1
 		chk_mptcp_info subflows 1 subflows 1
 		chk_subflows_total 2 2
@@ -3626,7 +4070,7 @@ userspace_tests()
 		chk_mptcp_info subflows 0 subflows 0
 		chk_subflows_total 1 1
 		kill_events_pids
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
 	fi
 
 	# userspace pm create id 0 subflow
@@ -3634,20 +4078,21 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns2
 		pm_nl_set_limits $ns1 0 1
-		{ speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
-		wait_mpj $ns2
+		wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
 		chk_mptcp_info subflows 0 subflows 0
 		chk_subflows_total 1 1
 		userspace_pm_add_sf $ns2 10.0.3.2 0
+		wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
 		userspace_pm_chk_dump_addr "${ns2}" \
 			"id 0 flags subflow 10.0.3.2" "id 0 subflow"
 		chk_join_nr 1 1 1
 		chk_mptcp_info subflows 1 subflows 1
 		chk_subflows_total 2 2
 		kill_events_pids
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
 	fi
 
 	# userspace pm remove initial subflow
@@ -3655,11 +4100,12 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns2
 		pm_nl_set_limits $ns1 0 1
-		{ speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
-		wait_mpj $ns2
+		wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
 		userspace_pm_add_sf $ns2 10.0.3.2 20
+		wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
 		chk_join_nr 1 1 1
 		chk_mptcp_info subflows 1 subflows 1
 		chk_subflows_total 2 2
@@ -3671,7 +4117,7 @@ userspace_tests()
 		chk_mptcp_info subflows 1 subflows 1
 		chk_subflows_total 1 1
 		kill_events_pids
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
 	fi
 
 	# userspace pm send RM_ADDR for ID 0
@@ -3679,11 +4125,12 @@ userspace_tests()
 	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
 		set_userspace_pm $ns1
 		pm_nl_set_limits $ns2 1 1
-		{ speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
-		wait_mpj $ns1
+		wait_event ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
 		userspace_pm_add_addr $ns1 10.0.2.1 10
+		wait_event ns2 MPTCP_LIB_EVENT_ANNOUNCED 1
 		chk_join_nr 1 1 1
 		chk_add_nr 1 1
 		chk_mptcp_info subflows 1 subflows 1
@@ -3697,7 +4144,38 @@ userspace_tests()
 		chk_mptcp_info subflows 1 subflows 1
 		chk_subflows_total 1 1
 		kill_events_pids
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
+	fi
+
+	# userspace pm no duplicated spurious close events after an error
+	if reset_with_events "userspace pm no dup close events after error" &&
+	   continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 2
+		{ timeout_test=120 test_linkfail=128 speed=slow \
+			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
+		local tests_pid=$!
+		wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
+		userspace_pm_add_sf $ns2 10.0.3.2 20
+		wait_event ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 1
+		chk_mptcp_info subflows 1 subflows 1
+		chk_subflows_total 2 2
+
+		# force quick loss
+		ip netns exec $ns2 sysctl -q net.ipv4.tcp_syn_retries=1
+		if ip netns exec "${ns1}" ${iptables} -A INPUT -s "10.0.1.2" \
+		      -p tcp --tcp-option 30 -j REJECT --reject-with tcp-reset &&
+		   ip netns exec "${ns2}" ${iptables} -A INPUT -d "10.0.1.2" \
+		      -p tcp --tcp-option 30 -j REJECT --reject-with tcp-reset; then
+			wait_event ns2 MPTCP_LIB_EVENT_SUB_CLOSED 1
+			wait_event ns1 MPTCP_LIB_EVENT_SUB_CLOSED 1
+			chk_subflows_total 1 1
+			userspace_pm_add_sf $ns2 10.0.1.2 0
+			wait_event ns2 MPTCP_LIB_EVENT_SUB_CLOSED 2
+			chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 2 error 2
+		fi
+		kill_events_pids
+		mptcp_lib_kill_group_wait $tests_pid
 	fi
 }
 
@@ -3705,16 +4183,16 @@ endpoint_tests()
 {
 	# subflow_rebuild_header is needed to support the implicit flag
 	# userspace pm type prevents add_addr
-	if reset "implicit EP" &&
-	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+	if reset_with_events "implicit EP" &&
+	   continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
 		pm_nl_set_limits $ns1 2 2
 		pm_nl_set_limits $ns2 2 2
 		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
-		{ speed=slow \
+		{ timeout_test=120 test_linkfail=128 speed=slow \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 
-		wait_mpj $ns1
+		wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
 		pm_nl_check_endpoint "creation" \
 			$ns2 10.0.2.2 id 1 flags implicit
 		chk_mptcp_info subflows 1 subflows 1
@@ -3727,21 +4205,22 @@ endpoint_tests()
 		pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
 		pm_nl_check_endpoint "modif is allowed" \
 			$ns2 10.0.2.2 id 1 flags signal
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
+		kill_events_pids
 	fi
 
 	if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT &&
-	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+	   continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
 		start_events
 		pm_nl_set_limits $ns1 0 3
 		pm_nl_set_limits $ns2 0 3
 		pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
 		pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
-		{ test_linkfail=4 speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 
-		wait_mpj $ns2
+		wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
 		pm_nl_check_endpoint "creation" \
 			$ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
 		chk_subflow_nr "before delete id 2" 2
@@ -3753,7 +4232,7 @@ endpoint_tests()
 		chk_mptcp_info subflows 0 subflows 0
 
 		pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
-		wait_mpj $ns2
+		wait_mpj 2
 		chk_subflow_nr "after re-add id 2" 2
 		chk_mptcp_info subflows 1 subflows 1
 
@@ -3765,10 +4244,18 @@ endpoint_tests()
 		ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
 		pm_nl_del_endpoint $ns2 3 10.0.3.2
 		pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
-		wait_mpj $ns2
+		wait_mpj 3
 		chk_subflow_nr "after no reject" 3
 		chk_mptcp_info subflows 2 subflows 2
 
+		# To make sure RM_ADDR are sent over a different subflow, but
+		# allow the rest to quickly and cleanly close the subflow
+		local ipt=1
+		ip netns exec "${ns2}" ${iptables} -I OUTPUT -s "10.0.1.2" \
+			-p tcp -m tcp --tcp-option 30 \
+			-m bpf --bytecode \
+			"$CBPF_MPTCP_SUBOPTION_RM_ADDR" \
+			-j DROP || ipt=0
 		local i
 		for i in $(seq 3); do
 			pm_nl_del_endpoint $ns2 1 10.0.1.2
@@ -3777,12 +4264,13 @@ endpoint_tests()
 			chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf
 
 			pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
-			wait_mpj $ns2
+			wait_mpj $((3 + i))
 			chk_subflow_nr "after re-add id 0 ($i)" 3
 			chk_mptcp_info subflows 3 subflows 3
 		done
+		[ ${ipt} = 1 ] && ip netns exec "${ns2}" ${iptables} -D OUTPUT 1
 
-		mptcp_lib_kill_wait $tests_pid
+		mptcp_lib_kill_group_wait $tests_pid
 
 		kill_events_pids
 		chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
@@ -3807,55 +4295,72 @@ endpoint_tests()
 
 	# remove and re-add
 	if reset_with_events "delete re-add signal" &&
-	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+	   continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+		ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0
 		pm_nl_set_limits $ns1 0 3
 		pm_nl_set_limits $ns2 3 3
 		pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
 		# broadcast IP: no packet for this address will be received on ns1
 		pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
 		pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal
-		{ test_linkfail=4 speed=5 \
+		{ timeout_test=120 test_linkfail=128 speed=5 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 
-		wait_mpj $ns2
+		wait_event ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
 		pm_nl_check_endpoint "creation" \
 			$ns1 10.0.2.1 id 1 flags signal
 		chk_subflow_nr "before delete" 2
 		chk_mptcp_info subflows 1 subflows 1
+		chk_mptcp_info add_addr_signal 2 add_addr_accepted 1
 
 		pm_nl_del_endpoint $ns1 1 10.0.2.1
 		pm_nl_del_endpoint $ns1 2 224.0.0.1
 		sleep 0.5
 		chk_subflow_nr "after delete" 1
 		chk_mptcp_info subflows 0 subflows 0
+		chk_mptcp_info add_addr_signal 0 add_addr_accepted 0
 
 		pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
 		pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
-		wait_mpj $ns2
+		wait_mpj 3
 		chk_subflow_nr "after re-add" 3
 		chk_mptcp_info subflows 2 subflows 2
+		chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
 
+		# To make sure RM_ADDR are sent over a different subflow, but
+		# allow the rest to quickly and cleanly close the subflow
+		local ipt=1
+		ip netns exec "${ns1}" ${iptables} -I OUTPUT -s "10.0.1.1" \
+			-p tcp -m tcp --tcp-option 30 \
+			-m bpf --bytecode \
+			"$CBPF_MPTCP_SUBOPTION_RM_ADDR" \
+			-j DROP || ipt=0
 		pm_nl_del_endpoint $ns1 42 10.0.1.1
 		sleep 0.5
 		chk_subflow_nr "after delete ID 0" 2
 		chk_mptcp_info subflows 2 subflows 2
+		chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
+		[ ${ipt} = 1 ] && ip netns exec "${ns1}" ${iptables} -D OUTPUT 1
 
-		pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
-		wait_mpj $ns2
+		pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal
+		wait_mpj 4
 		chk_subflow_nr "after re-add ID 0" 3
 		chk_mptcp_info subflows 3 subflows 3
+		chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
 
-		pm_nl_del_endpoint $ns1 99 10.0.1.1
+		pm_nl_del_endpoint $ns1 42 10.0.1.1
 		sleep 0.5
 		chk_subflow_nr "after re-delete ID 0" 2
 		chk_mptcp_info subflows 2 subflows 2
+		chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
 
 		pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal
-		wait_mpj $ns2
+		wait_mpj 5
 		chk_subflow_nr "after re-re-add ID 0" 3
 		chk_mptcp_info subflows 3 subflows 3
-		mptcp_lib_kill_wait $tests_pid
+		chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
+		mptcp_lib_kill_group_wait $tests_pid
 
 		kill_events_pids
 		chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
@@ -3881,13 +4386,13 @@ endpoint_tests()
 
 	# flush and re-add
 	if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT &&
-	   mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+	   continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
 		pm_nl_set_limits $ns1 0 2
 		pm_nl_set_limits $ns2 1 2
 		# broadcast IP: no packet for this address will be received on ns1
 		pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
 		pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
-		{ test_linkfail=4 speed=20 \
+		{ timeout_test=120 test_linkfail=128 speed=20 \
 			run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
 		local tests_pid=$!
 
@@ -3900,10 +4405,10 @@ endpoint_tests()
 		wait_rm_addr $ns2 0
 		ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
 		pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
-		wait_mpj $ns2
+		wait_mpj 1
 		pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
-		wait_mpj $ns2
-		mptcp_lib_kill_wait $tests_pid
+		wait_mpj 2
+		mptcp_lib_kill_group_wait $tests_pid
 
 		join_syn_tx=3 join_connect_err=1 \
 			chk_join_nr 2 2 2
@@ -3943,6 +4448,7 @@ all_tests_sorted=(
 	f@subflows_tests
 	e@subflows_error_tests
 	s@signal_address_tests
+	L@laminar_endp_tests
 	l@link_failure_tests
 	t@add_addr_timeout_tests
 	r@remove_tests
@@ -3952,6 +4458,7 @@ all_tests_sorted=(
 	M@mixed_tests
 	b@backup_tests
 	p@add_addr_ports_tests
+	B@bind_tests
 	k@syncookies_tests
 	S@checksum_tests
 	d@deny_join_id0_tests
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 051e289d7967..5fea7e7df628 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
 . "$(dirname "${0}")/../lib.sh"
-. "$(dirname "${0}")/../net_helper.sh"
 
 readonly KSFT_PASS=0
 readonly KSFT_FAIL=1
@@ -107,23 +106,32 @@ mptcp_lib_pr_info() {
 	mptcp_lib_print_info "INFO: ${*}"
 }
 
-# $1-2: listener/connector ns ; $3 port ; $4-5 listener/connector stat file
+mptcp_lib_pr_nstat() {
+	local ns="${1}"
+	local hist="/tmp/${ns}.out"
+
+	if [ -f "${hist}" ]; then
+		awk '$2 != 0 { print "  "$0 }' "${hist}"
+	else
+		ip netns exec "${ns}" nstat -as | grep Tcp
+	fi
+}
+
+# $1-2: listener/connector ns ; $3 port
 mptcp_lib_pr_err_stats() {
 	local lns="${1}"
 	local cns="${2}"
 	local port="${3}"
-	local lstat="${4}"
-	local cstat="${5}"
 
 	echo -en "${MPTCP_LIB_COLOR_RED}"
 	{
 		printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}"
 		ip netns exec "${lns}" ss -Menitam -o "sport = :${port}"
-		cat "${lstat}"
+		mptcp_lib_pr_nstat "${lns}"
 
 		printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}"
 		ip netns exec "${cns}" ss -Menitam -o "dport = :${port}"
-		[ "${lstat}" != "${cstat}" ] && cat "${cstat}"
+		[ "${lns}" != "${cns}" ] && mptcp_lib_pr_nstat "${cns}"
 	} 1>&2
 	echo -en "${MPTCP_LIB_COLOR_RESET}"
 }
@@ -331,12 +339,28 @@ mptcp_lib_result_print_all_tap() {
 
 # get the value of keyword $1 in the line marked by keyword $2
 mptcp_lib_get_info_value() {
-	grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+	grep "${2}" 2>/dev/null |
+		sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+		# the ';q' at the end limits to the first matched entry.
 }
 
 # $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]]
 mptcp_lib_evts_get_info() {
-	grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
+	grep "${4:-}" "${2}" 2>/dev/null |
+		mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
+}
+
+mptcp_lib_wait_timeout() {
+	local timeout_test="${1}"
+	local listener_ns="${2}"
+	local connector_ns="${3}"
+	local port="${4}"
+	shift 4 # rest are PIDs
+
+	sleep "${timeout_test}"
+	mptcp_lib_print_err "timeout"
+	mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
+	kill "${@}" 2>/dev/null
 }
 
 # $1: PID
@@ -348,19 +372,62 @@ mptcp_lib_kill_wait() {
 	wait "${1}" 2>/dev/null
 }
 
+# $1: PID
+mptcp_lib_pid_list_children() {
+	local curr="${1}"
+	# evoke 'ps' only once
+	local pids="${2:-"$(ps o pid,ppid)"}"
+
+	echo "${curr}"
+
+	local pid
+	for pid in $(echo "${pids}" | awk "\$2 == ${curr} { print \$1 }"); do
+		mptcp_lib_pid_list_children "${pid}" "${pids}"
+	done
+}
+
+# $1: PID
+mptcp_lib_kill_group_wait() {
+	# Some users might not have procps-ng: cannot use "kill -- -PID"
+	mptcp_lib_pid_list_children "${1}" | xargs -r kill &>/dev/null
+	wait "${1}" 2>/dev/null
+}
+
 # $1: IP address
 mptcp_lib_is_v6() {
 	[ -z "${1##*:*}" ]
 }
 
+mptcp_lib_nstat_init() {
+	local ns="${1}"
+
+	rm -f "/tmp/${ns}."{nstat,out}
+	NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -n
+}
+
+mptcp_lib_nstat_get() {
+	local ns="${1}"
+
+	# filter out non-*TCP stats, and the rate (last column)
+	NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -sz |
+		grep -o ".*Tcp\S\+\s\+[0-9]\+" > "/tmp/${ns}.out"
+}
+
 # $1: ns, $2: MIB counter
+# Get the counter from the history (mptcp_lib_nstat_{init,get}()) if available.
+# If not, get the counter from nstat ignoring any history.
 mptcp_lib_get_counter() {
 	local ns="${1}"
 	local counter="${2}"
+	local hist="/tmp/${ns}.out"
 	local count
 
-	count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
-		awk 'NR==1 {next} {print $2}')
+	if [[ -s "${hist}" && "${counter}" == *"Tcp"* ]]; then
+		count=$(awk "/^${counter} / {print \$2; exit}" "${hist}")
+	else
+		count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
+			awk 'NR==1 {next} {print $2}')
+	fi
 	if [ -z "${count}" ]; then
 		mptcp_lib_fail_if_expected_feature "${counter} counter"
 		return 1
@@ -382,7 +449,7 @@ mptcp_lib_make_file() {
 mptcp_lib_print_file_err() {
 	ls -l "${1}" 1>&2
 	echo "Trailing bytes are: "
-	tail -c 27 "${1}"
+	tail -c 32 "${1}" | od -x | head -n2
 }
 
 # $1: input file ; $2: output file ; $3: what kind of file
@@ -476,8 +543,6 @@ mptcp_lib_ns_init() {
 	local netns
 	for netns in "${@}"; do
 		ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1
-		ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0
-		ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0
 	done
 }
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index 926b0be87c99..b6e58d936ebe 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -25,6 +25,7 @@
 #include <netinet/in.h>
 
 #include <linux/tcp.h>
+#include <linux/compiler.h>
 
 static int pf = AF_INET;
 
@@ -127,7 +128,7 @@ struct so_state {
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #endif
 
-static void die_perror(const char *msg)
+static void __noreturn die_perror(const char *msg)
 {
 	perror(msg);
 	exit(1);
@@ -139,7 +140,7 @@ static void die_usage(int r)
 	exit(r);
 }
 
-static void xerror(const char *fmt, ...)
+static void __noreturn xerror(const char *fmt, ...)
 {
 	va_list ap;
 
@@ -159,13 +160,22 @@ static const char *getxinfo_strerr(int err)
 }
 
 static void xgetaddrinfo(const char *node, const char *service,
-			 const struct addrinfo *hints,
+			 struct addrinfo *hints,
 			 struct addrinfo **res)
 {
-	int err = getaddrinfo(node, service, hints, res);
+	int err;
 
+again:
+	err = getaddrinfo(node, service, hints, res);
 	if (err) {
-		const char *errstr = getxinfo_strerr(err);
+		const char *errstr;
+
+		if (err == EAI_SOCKTYPE) {
+			hints->ai_protocol = IPPROTO_TCP;
+			goto again;
+		}
+
+		errstr = getxinfo_strerr(err);
 
 		fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
 			node ? node : "", service ? service : "", errstr);
@@ -178,7 +188,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
 {
 	int sock = -1;
 	struct addrinfo hints = {
-		.ai_protocol = IPPROTO_TCP,
+		.ai_protocol = IPPROTO_MPTCP,
 		.ai_socktype = SOCK_STREAM,
 		.ai_flags = AI_PASSIVE | AI_NUMERICHOST
 	};
@@ -223,7 +233,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
 			      const char * const port, int proto)
 {
 	struct addrinfo hints = {
-		.ai_protocol = IPPROTO_TCP,
+		.ai_protocol = IPPROTO_MPTCP,
 		.ai_socktype = SOCK_STREAM,
 	};
 	struct addrinfo *a, *addr;
@@ -658,22 +668,26 @@ static void process_one_client(int fd, int pipefd)
 
 	do_getsockopts(&s, fd, ret, ret2);
 	if (s.mptcpi_rcv_delta != (uint64_t)ret + 1)
-		xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret);
+		xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64 ", diff %" PRId64,
+		       s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - (ret + 1));
 
 	/* be nice when running on top of older kernel */
 	if (s.pkt_stats_avail) {
 		if (s.last_sample.mptcpi_bytes_sent != ret2)
-			xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64,
+			xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64
+			       ", diff %" PRId64,
 			       s.last_sample.mptcpi_bytes_sent, ret2,
 			       s.last_sample.mptcpi_bytes_sent - ret2);
 		if (s.last_sample.mptcpi_bytes_received != ret)
-			xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64,
+			xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64
+			       ", diff %" PRId64,
 			       s.last_sample.mptcpi_bytes_received, ret,
 			       s.last_sample.mptcpi_bytes_received - ret);
 		if (s.last_sample.mptcpi_bytes_acked != ret)
-			xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64,
-			       s.last_sample.mptcpi_bytes_acked, ret2,
-			       s.last_sample.mptcpi_bytes_acked - ret2);
+			xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64
+			       ", diff %" PRId64,
+			       s.last_sample.mptcpi_bytes_acked, ret,
+			       s.last_sample.mptcpi_bytes_acked - ret);
 	}
 
 	close(fd);
@@ -713,6 +727,7 @@ static int server(int pipefd)
 
 	process_one_client(r, pipefd);
 
+	close(fd);
 	return 0;
 }
 
@@ -838,8 +853,12 @@ int main(int argc, char *argv[])
 		die_perror("pipe");
 
 	s = xfork();
-	if (s == 0)
-		return server(pipefds[1]);
+	if (s == 0) {
+		close(pipefds[0]);
+		ret = server(pipefds[1]);
+		close(pipefds[1]);
+		return ret;
+	}
 
 	close(pipefds[1]);
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 418a903c3a4d..ab8bce06b262 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -95,7 +95,7 @@ init()
 }
 
 # This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
 cleanup()
 {
 	mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}"
@@ -169,41 +169,44 @@ do_transfer()
 		cmsg+=",TCPINQ"
 	fi
 
-	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
-		nstat -n
-	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
-		nstat -n
+	mptcp_lib_nstat_init "${listener_ns}"
+	mptcp_lib_nstat_init "${connector_ns}"
 
-	timeout ${timeout_test} \
-		ip netns exec ${listener_ns} \
-			$mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
-				${local_addr} < "$sin" > "$sout" &
+	ip netns exec ${listener_ns} \
+		$mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
+			${local_addr} < "$sin" > "$sout" &
 	local spid=$!
 
-	sleep 1
+	mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
 
-	timeout ${timeout_test} \
-		ip netns exec ${connector_ns} \
-			$mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \
-				$connect_addr < "$cin" > "$cout" &
+	ip netns exec ${connector_ns} \
+		$mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \
+			$connect_addr < "$cin" > "$cout" &
 
 	local cpid=$!
 
+	mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+		"${connector_ns}" "${port}" "${cpid}" "${spid}" &
+	local timeout_pid=$!
+
 	wait $cpid
 	local retc=$?
 	wait $spid
 	local rets=$?
 
-	NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
-		nstat | grep Tcp > /tmp/${listener_ns}.out
-	NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
-		nstat | grep Tcp > /tmp/${connector_ns}.out
+	if kill -0 $timeout_pid; then
+		# Finished before the timeout: kill the background job
+		mptcp_lib_kill_group_wait $timeout_pid
+		timeout_pid=0
+	fi
+
+	mptcp_lib_nstat_get "${listener_ns}"
+	mptcp_lib_nstat_get "${connector_ns}"
 
 	print_title "Transfer ${ip:2}"
-	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
 		mptcp_lib_pr_fail "client exit code $retc, server $rets"
-		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
-			"/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
+		mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
 
 		mptcp_lib_result_fail "transfer ${ip}"
 
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 2e6648a2b2c0..123d9d7a0278 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -32,7 +32,7 @@ ns1=""
 err=$(mktemp)
 
 # This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
 cleanup()
 {
 	rm -f "${err}"
@@ -70,8 +70,9 @@ format_endpoints() {
 	mptcp_lib_pm_nl_format_endpoints "${@}"
 }
 
+# This function is invoked indirectly
+#shellcheck disable=SC2317,SC2329
 get_endpoint() {
-	# shellcheck disable=SC2317 # invoked indirectly
 	mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}"
 }
 
@@ -191,6 +192,10 @@ check "show_endpoints" \
 flush_endpoint
 check "show_endpoints" "" "flush addrs"
 
+add_endpoint 10.0.1.1 flags unknown
+check "show_endpoints" "$(format_endpoints "1,10.0.1.1")" "ignore unknown flags"
+flush_endpoint
+
 set_limits 9 1 2>/dev/null
 check "get_limits" "${default_limits}" "rcv addrs above hard limit"
 
@@ -198,6 +203,7 @@ set_limits 1 9 2>/dev/null
 check "get_limits" "${default_limits}" "subflows above hard limit"
 
 set_limits 8 8
+flush_endpoint  ## to make sure it doesn't affect the limits
 check "get_limits" "$(format_limits 8 8)" "set limits"
 
 flush_endpoint
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 994a556f46c1..99eecccbf0c8 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -2,6 +2,7 @@
 
 #include <errno.h>
 #include <error.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -23,6 +24,8 @@
 #define IPPROTO_MPTCP 262
 #endif
 
+#define MPTCP_PM_ADDR_FLAG_UNKNOWN _BITUL(7)
+
 static void syntax(char *argv[])
 {
 	fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept [<args>]\n", argv[0]);
@@ -113,6 +116,8 @@ static int capture_events(int fd, int event_group)
 		error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group");
 
 	do {
+		bool server_side = false;
+
 		FD_ZERO(&rfds);
 		FD_SET(fd, &rfds);
 		res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
@@ -187,11 +192,22 @@ static int capture_events(int fd, int event_group)
 				else if (attrs->rta_type == MPTCP_ATTR_ERROR)
 					fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs));
 				else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE)
-					fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs));
+					server_side = !!*(__u8 *)RTA_DATA(attrs);
+				else if (attrs->rta_type == MPTCP_ATTR_FLAGS) {
+					__u16 flags = *(__u16 *)RTA_DATA(attrs);
+
+					/* only print when present, easier */
+					if (flags & MPTCP_PM_EV_FLAG_DENY_JOIN_ID0)
+						fprintf(stderr, ",deny_join_id0:1");
+					if (flags & MPTCP_PM_EV_FLAG_SERVER_SIDE)
+						server_side = true;
+				}
 
 				attrs = RTA_NEXT(attrs, msg_len);
 			}
 		}
+		if (server_side)
+			fprintf(stderr, ",server_side:1");
 		fprintf(stderr, "\n");
 	} while (1);
 
@@ -816,10 +832,14 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
 					flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
 				else if (!strcmp(tok, "signal"))
 					flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
+				else if (!strcmp(tok, "laminar"))
+					flags |= MPTCP_PM_ADDR_FLAG_LAMINAR;
 				else if (!strcmp(tok, "backup"))
 					flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
 				else if (!strcmp(tok, "fullmesh"))
 					flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
+				else if (!strcmp(tok, "unknown"))
+					flags |= MPTCP_PM_ADDR_FLAG_UNKNOWN;
 				else
 					error(1, errno,
 					      "unknown flag %s", argv[arg]);
@@ -1004,6 +1024,13 @@ static void print_addr(struct rtattr *attrs, int len)
 					printf(",");
 			}
 
+			if (flags & MPTCP_PM_ADDR_FLAG_LAMINAR) {
+				printf("laminar");
+				flags &= ~MPTCP_PM_ADDR_FLAG_LAMINAR;
+				if (flags)
+					printf(",");
+			}
+
 			if (flags & MPTCP_PM_ADDR_FLAG_BACKUP) {
 				printf("backup");
 				flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
@@ -1025,6 +1052,13 @@ static void print_addr(struct rtattr *attrs, int len)
 					printf(",");
 			}
 
+			if (flags & MPTCP_PM_ADDR_FLAG_UNKNOWN) {
+				printf("unknown");
+				flags &= ~MPTCP_PM_ADDR_FLAG_UNKNOWN;
+				if (flags)
+					printf(",");
+			}
+
 			/* bump unknown flags, if any */
 			if (flags)
 				printf("0x%x", flags);
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 9c2a415976cb..d11a8b949aab 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -28,14 +28,14 @@ size=0
 
 usage() {
 	echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]"
-	echo -e "\t-b: bail out after first error, otherwise runs al testcases"
+	echo -e "\t-b: bail out after first error, otherwise runs all testcases"
 	echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
 	echo -e "\t-d: debug this script"
 	echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
 }
 
 # This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
 cleanup()
 {
 	rm -f "$cout" "$sout"
@@ -155,48 +155,53 @@ do_transfer()
 		sleep 1
 	fi
 
-	NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \
-		nstat -n
-	NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \
-		nstat -n
+	mptcp_lib_nstat_init "${ns3}"
+	mptcp_lib_nstat_init "${ns1}"
 
-	timeout ${timeout_test} \
-		ip netns exec ${ns3} \
-			./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
-				0.0.0.0 < "$sin" > "$sout" &
+	ip netns exec ${ns3} \
+		./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
+			0.0.0.0 < "$sin" > "$sout" &
 	local spid=$!
 
 	mptcp_lib_wait_local_port_listen "${ns3}" "${port}"
 
-	timeout ${timeout_test} \
-		ip netns exec ${ns1} \
-			./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
-				10.0.3.3 < "$cin" > "$cout" &
+	ip netns exec ${ns1} \
+		./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
+			10.0.3.3 < "$cin" > "$cout" &
 	local cpid=$!
 
+	mptcp_lib_wait_timeout "${timeout_test}" "${ns3}" "${ns1}" "${port}" \
+		"${cpid}" "${spid}" &
+	local timeout_pid=$!
+
 	wait $cpid
 	local retc=$?
 	wait $spid
 	local rets=$?
 
+	if kill -0 $timeout_pid; then
+		# Finished before the timeout: kill the background job
+		mptcp_lib_kill_group_wait $timeout_pid
+		timeout_pid=0
+	fi
+
 	if $capture; then
 		sleep 1
 		kill ${cappid_listener}
 		kill ${cappid_connector}
 	fi
 
-	NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \
-		nstat | grep Tcp > /tmp/${ns3}.out
-	NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \
-		nstat | grep Tcp > /tmp/${ns1}.out
+	mptcp_lib_nstat_get "${ns3}"
+	mptcp_lib_nstat_get "${ns1}"
 
 	cmp $sin $cout > /dev/null 2>&1
 	local cmps=$?
 	cmp $cin $sout > /dev/null 2>&1
 	local cmpc=$?
 
-	if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
-	   [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
+	if [ $retc -eq 0 ] && [ $rets -eq 0 ] &&
+	   [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] &&
+	   [ $timeout_pid -eq 0 ]; then
 		printf "%-16s" " max $max_time "
 		mptcp_lib_pr_ok
 		cat "$capout"
@@ -204,8 +209,7 @@ do_transfer()
 	fi
 
 	mptcp_lib_pr_fail "client exit code $retc, server $rets"
-	mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" \
-		"/tmp/${ns3}.out" "/tmp/${ns1}.out"
+	mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}"
 	ls -l $sin $cout
 	ls -l $cin $sout
 
@@ -233,10 +237,13 @@ run_test()
 	for dev in ns2eth1 ns2eth2; do
 		tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
 	done
-	tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
-	tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
-	tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
-	tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
+
+	# keep the queued pkts number low, or the RTT estimator will see
+	# increasing latency over time.
+	tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 limit 50
+	tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 limit 50
+	tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 limit 50
+	tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 limit 50
 
 	# time is measured in ms, account for transfer size, aggregated link speed
 	# and header overhead (10%)
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 3651f73451cf..e9ae1806ab07 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -94,7 +94,7 @@ test_fail()
 }
 
 # This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
 cleanup()
 {
 	print_title "Cleanup"
@@ -117,7 +117,36 @@ cleanup()
 trap cleanup EXIT
 
 # Create and configure network namespaces for testing
+print_title "Init"
 mptcp_lib_ns_init ns1 ns2
+
+# check path_manager and pm_type sysctl mapping
+if [ -f /proc/sys/net/mptcp/path_manager ]; then
+	ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=userspace
+	pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)"
+	if [ "${pm_type}" != "1" ]; then
+		test_fail "unexpected pm_type: ${pm_type}"
+		mptcp_lib_result_print_all_tap
+		exit ${KSFT_FAIL}
+	fi
+
+	ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=error 2>/dev/null
+	pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)"
+	if [ "${pm_type}" != "1" ]; then
+		test_fail "unexpected pm_type after error: ${pm_type}"
+		mptcp_lib_result_print_all_tap
+		exit ${KSFT_FAIL}
+	fi
+
+	ip netns exec "$ns1" sysctl -q net.mptcp.pm_type=0
+	pm_name="$(ip netns exec "$ns1" sysctl -n net.mptcp.path_manager)"
+	if [ "${pm_name}" != "kernel" ]; then
+		test_fail "unexpected path-manager: ${pm_name}"
+		mptcp_lib_result_print_all_tap
+		exit ${KSFT_FAIL}
+	fi
+fi
+
 for i in "$ns1" "$ns2" ;do
 	ip netns exec "$i" sysctl -q net.mptcp.pm_type=1
 done
@@ -152,7 +181,6 @@ mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid
 sleep 0.5
 mptcp_lib_subtests_last_ts_reset
 
-print_title "Init"
 print_test "Created network namespaces ns1, ns2"
 test_pass
 
@@ -173,6 +201,9 @@ make_connection()
 		is_v6="v4"
 	fi
 
+	# set this on the client side only: will not affect the rest
+	ip netns exec "$ns2" sysctl -q net.mptcp.allow_join_initial_addr_port=0
+
 	:>"$client_evts"
 	:>"$server_evts"
 
@@ -180,7 +211,8 @@ make_connection()
 	ip netns exec "$ns1" \
 	   ./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 &
 	local server_pid=$!
-	sleep 0.5
+
+	mptcp_lib_wait_local_port_listen "${ns1}" "${port}"
 
 	# Run the client, transfer $file and stay connected to the server
 	# to conduct tests
@@ -195,23 +227,28 @@ make_connection()
 	local client_token
 	local client_port
 	local client_serverside
+	local client_nojoin
 	local server_token
 	local server_serverside
+	local server_nojoin
 
 	client_token=$(mptcp_lib_evts_get_info token "$client_evts")
 	client_port=$(mptcp_lib_evts_get_info sport "$client_evts")
 	client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts")
+	client_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$client_evts")
 	server_token=$(mptcp_lib_evts_get_info token "$server_evts")
 	server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts")
+	server_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$server_evts")
 
 	print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1"
-	if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
-		   [ "$server_serverside" = 1 ]
+	if [ "${client_token}" != "" ] && [ "${server_token}" != "" ] &&
+	   [ "${client_serverside:-0}" = 0 ] && [ "${server_serverside:-0}" = 1 ] &&
+	   [ "${client_nojoin:-0}" = 0 ] && [ "${server_nojoin:-0}" = 1 ]
 	then
 		test_pass
 		print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}"
 	else
-		test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})"
+		test_fail "Expected tokens (c:${client_token} - s:${server_token}), server (c:${client_serverside} - s:${server_serverside}), nojoin (c:${client_nojoin} - s:${server_nojoin})"
 		mptcp_lib_result_print_all_tap
 		exit ${KSFT_FAIL}
 	fi
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 7ea5fb28c93d..1d5d3c4e7e87 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -77,6 +77,7 @@
 static int  cfg_cork;
 static bool cfg_cork_mixed;
 static int  cfg_cpu		= -1;		/* default: pin to last cpu */
+static int  cfg_expect_zerocopy	= -1;
 static int  cfg_family		= PF_UNSPEC;
 static int  cfg_ifindex		= 1;
 static int  cfg_payload_len;
@@ -92,9 +93,9 @@ static socklen_t cfg_alen;
 static struct sockaddr_storage cfg_dst_addr;
 static struct sockaddr_storage cfg_src_addr;
 
+static int exitcode;
 static char payload[IP_MAXPACKET];
 static long packets, bytes, completions, expected_completions;
-static int  zerocopied = -1;
 static uint32_t next_completion;
 static uint32_t sends_since_notify;
 
@@ -444,11 +445,13 @@ static bool do_recv_completion(int fd, int domain)
 	next_completion = hi + 1;
 
 	zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
-	if (zerocopied == -1)
-		zerocopied = zerocopy;
-	else if (zerocopied != zerocopy) {
-		fprintf(stderr, "serr: inconsistent\n");
-		zerocopied = zerocopy;
+	if (cfg_expect_zerocopy != -1 &&
+	    cfg_expect_zerocopy != zerocopy) {
+		fprintf(stderr, "serr: ee_code: %u != expected %u\n",
+			zerocopy, cfg_expect_zerocopy);
+		exitcode = 1;
+		/* suppress repeated messages */
+		cfg_expect_zerocopy = zerocopy;
 	}
 
 	if (cfg_verbose >= 2)
@@ -571,7 +574,7 @@ static void do_tx(int domain, int type, int protocol)
 
 	fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
 		packets, bytes >> 20, completions,
-		zerocopied == 1 ? 'y' : 'n');
+		cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n');
 }
 
 static int do_setup_rx(int domain, int type, int protocol)
@@ -715,7 +718,7 @@ static void parse_opts(int argc, char **argv)
 
 	cfg_payload_len = max_payload_len;
 
-	while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
+	while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) {
 		switch (c) {
 		case '4':
 			if (cfg_family != PF_UNSPEC)
@@ -770,6 +773,9 @@ static void parse_opts(int argc, char **argv)
 		case 'z':
 			cfg_zerocopy = true;
 			break;
+		case 'Z':
+			cfg_expect_zerocopy = !!atoi(optarg);
+			break;
 		}
 	}
 
@@ -817,5 +823,5 @@ int main(int argc, char **argv)
 	else
 		error(1, 0, "unknown cfg_test %s", cfg_test);
 
-	return 0;
+	return exitcode;
 }
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index 89c22f5320e0..28178a38a4e7 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -6,6 +6,7 @@
 set -e
 
 readonly DEV="veth0"
+readonly DUMMY_DEV="dummy0"
 readonly DEV_MTU=65535
 readonly BIN="./msg_zerocopy"
 
@@ -14,21 +15,25 @@ readonly NSPREFIX="ns-${RAND}"
 readonly NS1="${NSPREFIX}1"
 readonly NS2="${NSPREFIX}2"
 
-readonly SADDR4='192.168.1.1'
-readonly DADDR4='192.168.1.2'
-readonly SADDR6='fd::1'
-readonly DADDR6='fd::2'
+readonly LPREFIX4='192.168.1'
+readonly RPREFIX4='192.168.2'
+readonly LPREFIX6='fd'
+readonly RPREFIX6='fc'
+
 
 readonly path_sysctl_mem="net.core.optmem_max"
 
 # No arguments: automated test
 if [[ "$#" -eq "0" ]]; then
-	$0 4 tcp -t 1
-	$0 6 tcp -t 1
-	$0 4 udp -t 1
-	$0 6 udp -t 1
-	echo "OK. All tests passed"
-	exit 0
+	ret=0
+
+	$0 4 tcp -t 1 || ret=1
+	$0 6 tcp -t 1 || ret=1
+	$0 4 udp -t 1 || ret=1
+	$0 6 udp -t 1 || ret=1
+
+	[[ "$ret" == "0" ]] && echo "OK. All tests passed"
+	exit $ret
 fi
 
 # Argument parsing
@@ -45,11 +50,18 @@ readonly EXTRA_ARGS="$@"
 
 # Argument parsing: configure addresses
 if [[ "${IP}" == "4" ]]; then
-	readonly SADDR="${SADDR4}"
-	readonly DADDR="${DADDR4}"
+	readonly SADDR="${LPREFIX4}.1"
+	readonly DADDR="${LPREFIX4}.2"
+	readonly DUMMY_ADDR="${RPREFIX4}.1"
+	readonly DADDR_TXONLY="${RPREFIX4}.2"
+	readonly MASK="24"
 elif [[ "${IP}" == "6" ]]; then
-	readonly SADDR="${SADDR6}"
-	readonly DADDR="${DADDR6}"
+	readonly SADDR="${LPREFIX6}::1"
+	readonly DADDR="${LPREFIX6}::2"
+	readonly DUMMY_ADDR="${RPREFIX6}::1"
+	readonly DADDR_TXONLY="${RPREFIX6}::2"
+	readonly MASK="64"
+	readonly NODAD="nodad"
 else
 	echo "Invalid IP version ${IP}"
 	exit 1
@@ -89,33 +101,61 @@ ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
 ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
   peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
 
+ip link add "${DUMMY_DEV}" mtu "${DEV_MTU}" netns "${NS2}" type dummy
+
 # Bring the devices up
 ip -netns "${NS1}" link set "${DEV}" up
 ip -netns "${NS2}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DUMMY_DEV}" up
 
 # Set fixed MAC addresses on the devices
 ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
 ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
 
 # Add fixed IP addresses to the devices
-ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
-ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
-ip -netns "${NS1}" addr add       fd::1/64 dev "${DEV}" nodad
-ip -netns "${NS2}" addr add       fd::2/64 dev "${DEV}" nodad
+ip -netns "${NS1}" addr add "${SADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DUMMY_ADDR}/${MASK}" dev "${DUMMY_DEV}" ${NODAD}
+
+ip -netns "${NS1}" route add default via "${DADDR}" dev "${DEV}"
+ip -netns "${NS2}" route add default via "${DADDR_TXONLY}" dev "${DUMMY_DEV}"
+
+ip netns exec "${NS2}" sysctl -wq net.ipv4.ip_forward=1
+ip netns exec "${NS2}" sysctl -wq net.ipv6.conf.all.forwarding=1
 
 # Optionally disable sg or csum offload to test edge cases
 # ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
 
+ret=0
+
 do_test() {
 	local readonly ARGS="$1"
 
-	echo "ipv${IP} ${TXMODE} ${ARGS}"
-	ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
+	# tx-rx test
+	# packets queued to a local socket are copied,
+	# sender notification has SO_EE_CODE_ZEROCOPY_COPIED.
+
+	echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-rx\n"
+	ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 \
+		-S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
 	sleep 0.2
-	ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
+	ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+		-S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" -Z 0 || ret=1
 	wait
+
+	# next test is unconnected tx to dummy0, cannot exercise with tcp
+	[[ "${TXMODE}" == "tcp" ]] && return
+
+	# tx-only test: send out dummy0
+	# packets leaving the host are not copied,
+	# sender notification does not have SO_EE_CODE_ZEROCOPY_COPIED.
+
+	echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-only\n"
+	ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+		-S "${SADDR}" -D "${DADDR_TXONLY}" ${ARGS} "${TXMODE}" -Z 1 || ret=1
 }
 
 do_test "${EXTRA_ARGS}"
 do_test "-z ${EXTRA_ARGS}"
-echo ok
+
+[[ "$ret" == "0" ]] && echo "OK"
diff --git a/tools/testing/selftests/net/nat6to4.sh b/tools/testing/selftests/net/nat6to4.sh
new file mode 100755
index 000000000000..0ee859b622a4
--- /dev/null
+++ b/tools/testing/selftests/net/nat6to4.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NS="ns-peer-$(mktemp -u XXXXXX)"
+
+ip netns add "${NS}"
+ip -netns "${NS}" link set lo up
+ip -netns "${NS}" route add default via 127.0.0.2 dev lo
+
+tc -n "${NS}" qdisc add dev lo ingress
+tc -n "${NS}" filter add dev lo ingress prio 4 protocol ip \
+   bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action
+
+ip netns exec "${NS}" \
+   bash -c 'echo 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abc | socat - UDP4-DATAGRAM:224.1.0.1:6666,ip-multicast-loop=1'
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
deleted file mode 100644
index 6596fe03c77f..000000000000
--- a/tools/testing/selftests/net/net_helper.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Helper functions
-
-wait_local_port_listen()
-{
-	local listener_ns="${1}"
-	local port="${2}"
-	local protocol="${3}"
-	local pattern
-	local i
-
-	pattern=":$(printf "%04X" "${port}") "
-
-	# for tcp protocol additionally check the socket state
-	[ ${protocol} = "tcp" ] && pattern="${pattern}0A"
-	for i in $(seq 10); do
-		if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
-		   /proc/net/"${protocol}"* | grep -q "${pattern}"; then
-			break
-		fi
-		sleep 0.1
-	done
-}
diff --git a/tools/testing/selftests/net/netdev-l2addr.sh b/tools/testing/selftests/net/netdev-l2addr.sh
new file mode 100755
index 000000000000..18509da293e5
--- /dev/null
+++ b/tools/testing/selftests/net/netdev-l2addr.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+set -o pipefail
+
+NSIM_ADDR=2025
+TEST_ADDR="d0:be:d0:be:d0:00"
+
+RET_CODE=0
+
+cleanup() {
+    cleanup_netdevsim "$NSIM_ADDR"
+    cleanup_ns "$NS"
+}
+
+trap cleanup EXIT
+
+fail() {
+    echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+    RET_CODE=1
+}
+
+get_addr()
+{
+    local type="$1"
+    local dev="$2"
+    local ns="$3"
+
+    ip -j -n "$ns" link show dev "$dev" | jq -er ".[0].$type"
+}
+
+setup_ns NS
+
+nsim=$(create_netdevsim $NSIM_ADDR "$NS")
+
+get_addr address "$nsim" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim" "$NS" >/dev/null || fail "Couldn't get brd addr"
+get_addr permaddr "$nsim" "$NS" >/dev/null && fail "Found perm_addr without setting it"
+
+ip -n "$NS" link set dev "$nsim" address "$TEST_ADDR"
+ip -n "$NS" link set dev "$nsim" brd "$TEST_ADDR"
+
+[[ "$(get_addr address "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set ether addr"
+[[ "$(get_addr broadcast "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set brd addr"
+
+if create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "FF:FF:FF:FF:FF:FF" 2>/dev/null; then
+    fail "Created netdevsim with broadcast permaddr"
+fi
+
+nsim_port=$(create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "$TEST_ADDR")
+
+get_addr address "$nsim_port" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim_port" "$NS" >/dev/null || fail "Couldn't get brd addr"
+[[ "$(get_addr permaddr "$nsim_port" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't get permaddr"
+
+cleanup_netdevsim "$NSIM_ADDR" "$NS"
+
+exit $RET_CODE
diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore
index 64c4f8d9aa6c..5d2be9a00627 100644
--- a/tools/testing/selftests/net/netfilter/.gitignore
+++ b/tools/testing/selftests/net/netfilter/.gitignore
@@ -5,3 +5,4 @@ conntrack_dump_flush
 conntrack_reverse_clash
 sctp_collision
 nf_queue
+udpclash
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index ffe161fac8b5..ee2d1a5254f8 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -6,42 +6,52 @@ HOSTPKG_CONFIG := pkg-config
 MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
 MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
 
-TEST_PROGS := br_netfilter.sh bridge_brouter.sh
-TEST_PROGS += br_netfilter_queue.sh
-TEST_PROGS += conntrack_dump_flush.sh
-TEST_PROGS += conntrack_icmp_related.sh
-TEST_PROGS += conntrack_ipip_mtu.sh
-TEST_PROGS += conntrack_tcp_unreplied.sh
-TEST_PROGS += conntrack_sctp_collision.sh
-TEST_PROGS += conntrack_vrf.sh
-TEST_PROGS += conntrack_reverse_clash.sh
-TEST_PROGS += ipvs.sh
-TEST_PROGS += nf_conntrack_packetdrill.sh
-TEST_PROGS += nf_nat_edemux.sh
-TEST_PROGS += nft_audit.sh
-TEST_PROGS += nft_concat_range.sh
-TEST_PROGS += nft_conntrack_helper.sh
-TEST_PROGS += nft_fib.sh
-TEST_PROGS += nft_flowtable.sh
-TEST_PROGS += nft_meta.sh
-TEST_PROGS += nft_nat.sh
-TEST_PROGS += nft_nat_zones.sh
-TEST_PROGS += nft_queue.sh
-TEST_PROGS += nft_synproxy.sh
-TEST_PROGS += nft_tproxy_tcp.sh
-TEST_PROGS += nft_tproxy_udp.sh
-TEST_PROGS += nft_zones_many.sh
-TEST_PROGS += rpath.sh
-TEST_PROGS += vxlan_mtu_frag.sh
-TEST_PROGS += xt_string.sh
+TEST_PROGS := \
+	br_netfilter.sh \
+	br_netfilter_queue.sh \
+	bridge_brouter.sh \
+	conntrack_clash.sh \
+	conntrack_dump_flush.sh \
+	conntrack_icmp_related.sh \
+	conntrack_ipip_mtu.sh \
+	conntrack_resize.sh \
+	conntrack_reverse_clash.sh \
+	conntrack_sctp_collision.sh \
+	conntrack_tcp_unreplied.sh \
+	conntrack_vrf.sh \
+	ipvs.sh \
+	nf_conntrack_packetdrill.sh \
+	nf_nat_edemux.sh \
+	nft_audit.sh \
+	nft_concat_range.sh \
+	nft_conntrack_helper.sh \
+	nft_fib.sh \
+	nft_flowtable.sh \
+	nft_interface_stress.sh \
+	nft_meta.sh \
+	nft_nat.sh \
+	nft_nat_zones.sh \
+	nft_queue.sh \
+	nft_synproxy.sh \
+	nft_tproxy_tcp.sh \
+	nft_tproxy_udp.sh \
+	nft_zones_many.sh \
+	rpath.sh \
+	vxlan_mtu_frag.sh \
+	xt_string.sh \
+# end of TEST_PROGS
 
 TEST_PROGS_EXTENDED = nft_concat_range_perf.sh
 
-TEST_GEN_FILES = audit_logread
-TEST_GEN_FILES += connect_close nf_queue
-TEST_GEN_FILES += conntrack_dump_flush
-TEST_GEN_FILES += conntrack_reverse_clash
-TEST_GEN_FILES += sctp_collision
+TEST_GEN_FILES = \
+	audit_logread \
+	connect_close \
+	conntrack_dump_flush \
+	conntrack_reverse_clash \
+	nf_queue \
+	sctp_collision \
+	udpclash \
+# end of TEST_GEN_FILES
 
 include ../../lib.mk
 
@@ -50,10 +60,14 @@ $(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS)
 
 $(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS)
 $(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS)
+$(OUTPUT)/udpclash: LDLIBS += -lpthread
 
-TEST_FILES := lib.sh
-TEST_FILES += packetdrill
+TEST_FILES := \
+	lib.sh \
+	packetdrill \
+# end of TEST_FILES
 
 TEST_INCLUDES := \
+	$(wildcard ../lib/sh/*.sh) \
 	../lib.sh \
-	$(wildcard ../lib/sh/*.sh)
+# end of TEST_INCLUDES
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
index c28379a965d8..011de8763094 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -13,6 +13,12 @@ source lib.sh
 
 checktool "nft --version" "run test without nft tool"
 
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+	echo SKIP: kernel is tainted
+	exit $ksft_skip
+fi
+
 cleanup() {
 	cleanup_all_ns
 }
@@ -54,9 +60,6 @@ bcast_ping()
 	done
 }
 
-ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0
-
 if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then
 	echo "SKIP: Can't create veth device"
 	exit $ksft_skip
@@ -165,6 +168,7 @@ if [ "$t" -eq 0 ];then
 	echo PASS: kernel not tainted
 else
 	echo ERROR: kernel is tainted
+	dmesg
 	ret=1
 fi
 
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
index 6a764d70ab06..4788641717d9 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
@@ -4,6 +4,12 @@ source lib.sh
 
 checktool "nft --version" "run test without nft tool"
 
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+	echo SKIP: kernel is tainted
+	exit $ksft_skip
+fi
+
 cleanup() {
 	cleanup_all_ns
 }
@@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then
 	echo PASS: kernel not tainted
 else
 	echo ERROR: kernel is tainted
+	dmesg
 	exit 1
 fi
 
diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
index 2549b6590693..ea76f2bc2f59 100755
--- a/tools/testing/selftests/net/netfilter/bridge_brouter.sh
+++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
@@ -22,8 +22,6 @@ trap cleanup EXIT
 
 setup_ns nsbr ns1 ns2
 
-ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0
-ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0
 if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then
 	echo "SKIP: Can't create veth device"
 	exit $ksft_skip
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 43d8b500d391..979cff56e1f5 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -7,64 +7,75 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
 CONFIG_BRIDGE_EBT_T_FILTER=m
 CONFIG_BRIDGE_NETFILTER=m
 CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m
 CONFIG_BRIDGE_VLAN_FILTERING=y
 CONFIG_CGROUP_BPF=y
+CONFIG_CRYPTO_SHA1=m
 CONFIG_DUMMY=m
+CONFIG_INET_DIAG=m
 CONFIG_INET_ESP=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
-CONFIG_IP_NF_IPTABLES=m
+CONFIG_INET_SCTP_DIAG=m
+CONFIG_IP6_NF_FILTER=m
 CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_RAW=m
 CONFIG_IP_NF_FILTER=m
-CONFIG_IP6_NF_FILTER=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_NAT=m
 CONFIG_IP_NF_RAW=m
-CONFIG_IP6_NF_RAW=m
 CONFIG_IP_SCTP=m
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_TUNNEL=m
 CONFIG_IP_VS=m
 CONFIG_IP_VS_PROTO_TCP=y
 CONFIG_IP_VS_RR=m
-CONFIG_IPV6=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_MACVLAN=m
 CONFIG_NAMESPACES=y
 CONFIG_NET_CLS_U32=m
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_NET_NS=y
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_IPIP=m
-CONFIG_NET_VRF=y
 CONFIG_NETFILTER=y
 CONFIG_NETFILTER_ADVANCED=y
 CONFIG_NETFILTER_NETLINK=m
 CONFIG_NETFILTER_NETLINK_QUEUE=m
 CONFIG_NETFILTER_SYNPROXY=m
 CONFIG_NETFILTER_XTABLES=m
-CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
 CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
 CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_NAT=m
 CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NET_IPIP=m
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_VRF=y
 CONFIG_NF_CONNTRACK=m
 CONFIG_NF_CONNTRACK_EVENTS=y
 CONFIG_NF_CONNTRACK_FTP=m
 CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_PROCFS=y
 CONFIG_NF_CONNTRACK_ZONES=y
 CONFIG_NF_CT_NETLINK=m
 CONFIG_NF_CT_PROTO_SCTP=y
 CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_FLOW_TABLE_INET=m
 CONFIG_NF_LOG_IPV4=m
 CONFIG_NF_LOG_IPV6=m
 CONFIG_NF_NAT=m
-CONFIG_NF_NAT_REDIRECT=y
 CONFIG_NF_NAT_MASQUERADE=y
+CONFIG_NF_NAT_REDIRECT=y
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NF_TABLES_IPV6=y
 CONFIG_NF_TABLES_NETDEV=y
-CONFIG_NF_FLOW_TABLE_INET=m
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_COMPAT=m
 CONFIG_NFT_CT=m
@@ -83,12 +94,9 @@ CONFIG_NFT_QUOTA=m
 CONFIG_NFT_REDIR=m
 CONFIG_NFT_SYNPROXY=m
 CONFIG_NFT_TPROXY=m
+CONFIG_TUN=m
 CONFIG_VETH=m
 CONFIG_VLAN_8021Q=m
 CONFIG_VXLAN=m
-CONFIG_XFRM_USER=m
 CONFIG_XFRM_STATISTICS=y
-CONFIG_NET_PKTGEN=m
-CONFIG_TUN=m
-CONFIG_INET_DIAG=m
-CONFIG_SCTP_DIAG=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
new file mode 100755
index 000000000000..84b8eb12143a
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
@@ -0,0 +1,173 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+clash_resolution_active=0
+dport=22111
+ret=0
+
+cleanup()
+{
+	# netns cleanup also zaps any remaining socat echo server.
+	cleanup_all_ns
+}
+
+checktool "nft --version" "run test without nft"
+checktool "conntrack --version" "run test without conntrack"
+checktool "socat -h" "run test without socat"
+
+trap cleanup EXIT
+
+setup_ns nsclient1 nsclient2 nsrouter
+
+ip netns exec "$nsrouter" nft -f -<<EOF
+table ip t {
+	chain lb {
+		meta l4proto udp dnat to numgen random mod 3 map { 0 : 10.0.2.1 . 9000, 1 : 10.0.2.1 . 9001, 2 : 10.0.2.1 . 9002 }
+	}
+
+	chain prerouting {
+		type nat hook prerouting priority dstnat
+
+		udp dport $dport counter jump lb
+	}
+
+	chain output {
+		type nat hook output priority dstnat
+
+		udp dport $dport counter jump lb
+	}
+}
+EOF
+
+load_simple_ruleset()
+{
+ip netns exec "$1" nft -f -<<EOF
+table ip t {
+	chain forward {
+		type filter hook forward priority 0
+
+		ct state new counter
+	}
+}
+EOF
+}
+
+spawn_servers()
+{
+	local ns="$1"
+	local ports="9000 9001 9002"
+
+	for port in $ports; do
+		ip netns exec "$ns" socat UDP-RECVFROM:$port,fork PIPE 2>/dev/null &
+	done
+
+	for port in $ports; do
+		wait_local_port_listen "$ns" $port udp
+	done
+}
+
+add_addr()
+{
+	local ns="$1"
+	local dev="$2"
+	local i="$3"
+	local j="$4"
+
+	ip -net "$ns" link set "$dev" up
+	ip -net "$ns" addr add "10.0.$i.$j/24" dev "$dev"
+}
+
+ping_test()
+{
+	local ns="$1"
+	local daddr="$2"
+
+	if ! ip netns exec "$ns" ping -q -c 1 $daddr > /dev/null;then
+		echo "FAIL: ping from $ns to $daddr"
+		exit 1
+	fi
+}
+
+run_one_clash_test()
+{
+	local ns="$1"
+	local ctns="$2"
+	local daddr="$3"
+	local dport="$4"
+	local entries
+	local cre
+
+	if ! ip netns exec "$ns" timeout 30 ./udpclash $daddr $dport;then
+		echo "INFO: did not receive expected number of replies for $daddr:$dport"
+		ip netns exec "$ctns" conntrack -S
+		# don't fail: check if clash resolution triggered after all.
+	fi
+
+	entries=$(ip netns exec "$ctns" conntrack -S | wc -l)
+	cre=$(ip netns exec "$ctns" conntrack -S | grep "clash_resolve=0" | wc -l)
+
+	if [ "$cre" -ne "$entries" ];then
+		clash_resolution_active=1
+		return 0
+	fi
+
+	# not a failure: clash resolution logic did not trigger.
+	# With right timing, xmit completed sequentially and
+	# no parallel insertion occurs.
+	return $ksft_xfail
+}
+
+run_clash_test()
+{
+	local ns="$1"
+	local ctns="$2"
+	local daddr="$3"
+	local dport="$4"
+	local softerr=0
+
+	for i in $(seq 1 10);do
+		run_one_clash_test "$ns" "$ctns" "$daddr" "$dport"
+		local rv=$?
+		if [ $rv -eq 0 ];then
+			echo "PASS: clash resolution test for $daddr:$dport on attempt $i"
+			return 0
+		elif [ $rv -eq $ksft_xfail ]; then
+			softerr=1
+		fi
+	done
+
+	[ $softerr -eq 1 ] && echo "XFAIL: clash resolution for $daddr:$dport did not trigger"
+}
+
+ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter"
+ip link add veth0 netns "$nsclient2" type veth peer name veth1 netns "$nsrouter"
+add_addr "$nsclient1" veth0 1 1
+add_addr "$nsclient2" veth0 2 1
+add_addr "$nsrouter" veth0 1 99
+add_addr "$nsrouter" veth1 2 99
+
+ip -net "$nsclient1" route add default via 10.0.1.99
+ip -net "$nsclient2" route add default via 10.0.2.99
+ip netns exec "$nsrouter" sysctl -q net.ipv4.ip_forward=1
+
+ping_test "$nsclient1" 10.0.1.99
+ping_test "$nsclient1" 10.0.2.1
+ping_test "$nsclient2" 10.0.1.1
+
+spawn_servers "$nsclient2"
+
+# exercise clash resolution with nat:
+# nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}.
+run_clash_test "$nsclient1" "$nsrouter" 10.0.1.99 "$dport"
+
+# exercise clash resolution without nat.
+load_simple_ruleset "$nsclient2"
+run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001
+
+if [ $clash_resolution_active -eq 0 ];then
+	[ "$ret" -eq 0 ] && ret=$ksft_xfail
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
index 5f827e10717d..5cecb8a1bc94 100644
--- a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
+++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
@@ -10,7 +10,7 @@
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
 #include <linux/netfilter/nf_conntrack_tcp.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 #define TEST_ZONE_ID 123
 #define NF_CT_DEFAULT_ZONE_ID 0
diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
new file mode 100755
index 000000000000..615fe3c6f405
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
@@ -0,0 +1,515 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+checktool "conntrack --version" "run test without conntrack"
+checktool "nft --version" "run test without nft tool"
+
+init_net_max=0
+ct_buckets=0
+tmpfile=""
+tmpfile_proc=""
+tmpfile_uniq=""
+ret=0
+have_socat=0
+
+socat -h > /dev/null && have_socat=1
+
+insert_count=2000
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400
+
+modprobe -q nf_conntrack
+if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then
+	echo "SKIP: conntrack sysctls not available"
+	exit $KSFT_SKIP
+fi
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1
+ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1
+
+cleanup() {
+	cleanup_all_ns
+
+	rm -f "$tmpfile" "$tmpfile_proc" "$tmpfile_uniq"
+
+	# restore original sysctl setting
+	sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+	sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets
+}
+trap cleanup EXIT
+
+check_max_alias()
+{
+	local expected="$1"
+	# old name, expected to alias to the first, i.e. changing one
+	# changes the other as well.
+	local lv=$(sysctl -n net.nf_conntrack_max)
+
+	if [ $expected -ne "$lv" ];then
+		echo "nf_conntrack_max sysctls should have identical values"
+		exit 1
+	fi
+}
+
+insert_ctnetlink() {
+	local ns="$1"
+	local count="$2"
+	local i=0
+	local bulk=16
+
+	while [ $i -lt $count ] ;do
+		ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \
+			if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+					  -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+					  --protonum 17 --timeout 3600 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \
+					  return;\
+			fi & \
+		done ; wait" 2>/dev/null
+
+		i=$((i+bulk))
+	done
+}
+
+check_ctcount() {
+	local ns="$1"
+	local count="$2"
+	local msg="$3"
+
+	local now=$(ip netns exec "$ns" conntrack -C)
+
+	if [ $now -ne "$count" ] ;then
+		echo "expected $count entries in $ns, not $now: $msg"
+		exit 1
+	fi
+
+	echo "PASS: got $count connections: $msg"
+}
+
+ctresize() {
+	local duration="$1"
+	local now=$(date +%s)
+	local end=$((now + duration))
+
+	while [ $now -lt $end ]; do
+		sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM
+		now=$(date +%s)
+	done
+}
+
+do_rsleep() {
+	local limit="$1"
+	local r=$RANDOM
+
+	r=$((r%limit))
+	sleep "$r"
+}
+
+ct_flush_once() {
+	local ns="$1"
+
+	ip netns exec "$ns" conntrack -F 2>/dev/null
+}
+
+ctflush() {
+	local ns="$1"
+	local duration="$2"
+	local now=$(date +%s)
+	local end=$((now + duration))
+
+	do_rsleep "$duration"
+
+        while [ $now -lt $end ]; do
+		ct_flush_once "$ns"
+		do_rsleep "$duration"
+		now=$(date +%s)
+        done
+}
+
+ct_pingflood()
+{
+	local ns="$1"
+	local duration="$2"
+	local msg="$3"
+	local now=$(date +%s)
+	local end=$((now + duration))
+	local j=0
+	local k=0
+
+        while [ $now -lt $end ]; do
+		j=$((j%256))
+		k=$((k%256))
+
+		ip netns exec "$ns" bash -c \
+			"j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1
+
+		j=$((j+1))
+
+		if [ $j -eq 256 ];then
+			k=$((k+1))
+		fi
+
+		now=$(date +%s)
+	done
+
+	wait
+}
+
+ct_udpflood()
+{
+	local ns="$1"
+	local duration="$2"
+	local now=$(date +%s)
+	local end=$((now + duration))
+
+	[ $have_socat -ne "1" ] && return
+
+        while [ $now -lt $end ]; do
+ip netns exec "$ns" bash<<"EOF"
+	for i in $(seq 1 100);do
+		dport=$(((RANDOM%65536)+1))
+
+		echo bar | socat -u STDIN UDP:"127.0.0.1:$dport" &
+	done > /dev/null 2>&1
+	wait
+EOF
+		now=$(date +%s)
+	done
+}
+
+ct_udpclash()
+{
+	local ns="$1"
+	local duration="$2"
+	local now=$(date +%s)
+	local end=$((now + duration))
+
+	[ -x udpclash ] || return
+
+        while [ $now -lt $end ]; do
+		ip netns exec "$ns" timeout 30 ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1
+
+		now=$(date +%s)
+	done
+}
+
+# dump to /dev/null.  We don't want dumps to cause infinite loops
+# or use-after-free even when conntrack table is altered while dumps
+# are in progress.
+ct_nulldump()
+{
+	local ns="$1"
+
+	ip netns exec "$ns" conntrack -L > /dev/null 2>&1 &
+
+	# Don't require /proc support in conntrack
+	if [ -r /proc/self/net/nf_conntrack ] ; then
+		ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null &
+	fi
+
+	wait
+}
+
+ct_nulldump_loop()
+{
+	local ns="$1"
+	local duration="$2"
+	local now=$(date +%s)
+	local end=$((now + duration))
+
+        while [ $now -lt $end ]; do
+		ct_nulldump "$ns"
+		sleep $((RANDOM%2))
+		now=$(date +%s)
+	done
+}
+
+change_timeouts()
+{
+	local ns="$1"
+	local r1=$((RANDOM%2))
+	local r2=$((RANDOM%2))
+
+	[ "$r1" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=$((RANDOM%5))
+	[ "$r2" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=$((RANDOM%5))
+}
+
+ct_change_timeouts_loop()
+{
+	local ns="$1"
+	local duration="$2"
+	local now=$(date +%s)
+	local end=$((now + duration))
+
+        while [ $now -lt $end ]; do
+		change_timeouts "$ns"
+		sleep $((RANDOM%2))
+		now=$(date +%s)
+	done
+
+	# restore defaults
+	ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=30
+	ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=30
+}
+
+check_taint()
+{
+	local tainted_then="$1"
+	local msg="$2"
+
+	local tainted_now=0
+
+	if [ "$tainted_then" -ne 0 ];then
+		return
+	fi
+
+	read tainted_now < /proc/sys/kernel/tainted
+
+	if [ "$tainted_now" -eq 0 ];then
+		echo "PASS: $msg"
+	else
+		echo "TAINT: $msg"
+		dmesg
+		exit 1
+	fi
+}
+
+insert_flood()
+{
+	local n="$1"
+	local timeout="$2"
+	local r=0
+
+	r=$((RANDOM%$insert_count))
+
+	ct_pingflood "$n" "$timeout" "floodresize" &
+	ct_udpflood "$n" "$timeout" &
+	ct_udpclash "$n" "$timeout" &
+
+	insert_ctnetlink "$n" "$r" &
+	ctflush "$n" "$timeout" &
+	ct_nulldump_loop "$n" "$timeout" &
+	ct_change_timeouts_loop "$n" "$timeout" &
+
+	wait
+}
+
+test_floodresize_all()
+{
+	local timeout=20
+	local n=""
+	local tainted_then=""
+
+	read tainted_then < /proc/sys/kernel/tainted
+
+	for n in "$nsclient1" "$nsclient2";do
+		insert_flood "$n" "$timeout" &
+	done
+
+	# resize table constantly while flood/insert/dump/flushs
+	# are happening in parallel.
+	ctresize "$timeout"
+
+	# wait for subshells to complete, everything is limited
+	# by $timeout.
+	wait
+
+	check_taint "$tainted_then" "resize+flood"
+}
+
+check_dump()
+{
+	local ns="$1"
+	local protoname="$2"
+	local c=0
+	local proto=0
+	local proc=0
+	local unique=""
+	local lret=0
+
+	# NOTE: assumes timeouts are large enough to not have
+	# expirations in all following tests.
+	l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | sort | tee "$tmpfile" | wc -l)
+	c=$(ip netns exec "$ns" conntrack -C)
+
+	if [ "$c" -eq 0 ]; then
+		echo "FAIL: conntrack count for $ns is 0"
+		lret=1
+	fi
+
+	if [ "$c" -ne "$l" ]; then
+		echo "FAIL: conntrack count inconsistency for $ns -L: $c != $l"
+		lret=1
+	fi
+
+	# check the dump we retrieved is free of duplicated entries.
+	unique=$(uniq "$tmpfile" | tee "$tmpfile_uniq" | wc -l)
+	if [ "$l" -ne "$unique" ]; then
+		echo "FAIL: listing contained redundant entries for $ns: $l != $unique"
+		diff -u "$tmpfile" "$tmpfile_uniq"
+		lret=1
+	fi
+
+	# we either inserted icmp or only udp, hence, --proto should return same entry count as without filter.
+	proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | sort | uniq | tee "$tmpfile_uniq" | wc -l)
+	if [ "$l" -ne "$proto" ]; then
+		echo "FAIL: dump inconsistency for $ns -L --proto $protoname: $l != $proto"
+		diff -u "$tmpfile" "$tmpfile_uniq"
+		lret=1
+	fi
+
+	if [ -r /proc/self/net/nf_conntrack ] ; then
+		proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | tee \"$tmpfile_proc\" | wc -l")
+
+		if [ "$l" -ne "$proc" ]; then
+			echo "FAIL: proc inconsistency for $ns: $l != $proc"
+			lret=1
+		fi
+
+		proc=$(uniq "$tmpfile_proc" | tee "$tmpfile_uniq" | wc -l)
+		if [ "$l" -ne "$proc" ]; then
+			echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc"
+			diff -u "$tmpfile_proc" "$tmpfile_uniq"
+			lret=1
+		fi
+	fi
+
+	if [ $lret -eq 0 ];then
+		echo "PASS: dump in netns $ns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)"
+	else
+		echo "FAIL: dump in netns $ns had different entry count (-C $c, -L $l, -p $proto, /proc $proc)"
+		ret=1
+	fi
+}
+
+test_dump_all()
+{
+	local timeout=3
+	local tainted_then=""
+
+	read tainted_then < /proc/sys/kernel/tainted
+
+	ct_flush_once "$nsclient1"
+	ct_flush_once "$nsclient2"
+
+	ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600
+
+	ct_pingflood "$nsclient1" $timeout "dumpall" &
+	insert_ctnetlink "$nsclient2" $insert_count
+
+	wait
+
+	check_dump "$nsclient1" "icmp"
+	check_dump "$nsclient2" "udp"
+
+	check_taint "$tainted_then" "test parallel conntrack dumps"
+}
+
+check_sysctl_immutable()
+{
+	local ns="$1"
+	local name="$2"
+	local failhard="$3"
+	local o=0
+	local n=0
+
+	o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+	n=$((o+1))
+
+	# return value isn't reliable, need to read it back
+	ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null
+
+	n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+
+	[ -z "$n" ] && return 1
+
+	if [ $o -ne $n ]; then
+		if [ $failhard -gt 0 ] ;then
+			echo "FAIL: net.$name should not be changeable from namespace (now $n)"
+			ret=1
+		fi
+		return 0
+	fi
+
+	return 1
+}
+
+test_conntrack_max_limit()
+{
+	sysctl -q net.netfilter.nf_conntrack_max=100
+	insert_ctnetlink "$nsclient1" 101
+
+	# check netns is clamped by init_net, i.e., either netns follows
+	# init_net value, or a higher pernet limit (compared to init_net) is ignored.
+	check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound"
+
+	sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+}
+
+test_conntrack_disable()
+{
+	local timeout=2
+
+	# disable conntrack pickups
+	ip netns exec "$nsclient1" nft flush table ip test_ct
+
+	ct_flush_once "$nsclient1"
+	ct_flush_once "$nsclient2"
+
+	ct_pingflood "$nsclient1" "$timeout" "conntrack disable"
+	ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1
+
+	# Disabled, should not have picked up any connection.
+	check_ctcount "$nsclient1" 0 "conntrack disabled"
+
+	# This one is still active, expect 1 connection.
+	check_ctcount "$nsclient2" 1 "conntrack enabled"
+}
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max)
+
+check_max_alias $init_net_max
+
+sysctl -q net.netfilter.nf_conntrack_max="262000"
+check_max_alias 262000
+
+setup_ns nsclient1 nsclient2
+
+# check this only works from init_net
+for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do
+	check_sysctl_immutable "$nsclient1" "net.$n" 1
+done
+
+# won't work on older kernels. If it works, check that the netns obeys the limit
+if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then
+	# subtest: if pernet is changeable, check that reducing it in pernet
+	# limits the pernet entries.  Inverse, pernet clamped by a lower init_net
+	# setting, is already checked by "test_conntrack_max_limit" test.
+
+	ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1
+	insert_ctnetlink "$nsclient1" 2
+	check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound"
+	ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+fi
+
+for n in "$nsclient1" "$nsclient2";do
+# enable conntrack in both namespaces
+ip netns exec "$n" nft -f - <<EOF
+table ip test_ct {
+	chain input {
+		type filter hook input priority 0
+		ct state new counter
+	}
+}
+EOF
+done
+
+tmpfile=$(mktemp)
+tmpfile_proc=$(mktemp)
+tmpfile_uniq=$(mktemp)
+test_conntrack_max_limit
+test_dump_all
+test_floodresize_all
+test_conntrack_disable
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
index 507930cee8cb..462d628cc3bd 100644
--- a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
+++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
@@ -33,9 +33,14 @@ static void die(const char *e)
 	exit(111);
 }
 
-static void die_port(uint16_t got, uint16_t want)
+static void die_port(const struct sockaddr_in *sin, uint16_t want)
 {
-	fprintf(stderr, "Port number changed, wanted %d got %d\n", want, ntohs(got));
+	uint16_t got = ntohs(sin->sin_port);
+	char str[INET_ADDRSTRLEN];
+
+	inet_ntop(AF_INET, &sin->sin_addr, str, sizeof(str));
+
+	fprintf(stderr, "Port number changed, wanted %d got %d from %s\n", want, got, str);
 	exit(1);
 }
 
@@ -100,7 +105,7 @@ int main(int argc, char *argv[])
 				die("child recvfrom");
 
 			if (peer.sin_port != htons(PORT))
-				die_port(peer.sin_port, PORT);
+				die_port(&peer, PORT);
 		} else {
 			if (sendto(s2, buf, LEN, 0, (struct sockaddr *)&sa1, sizeof(sa1)) != LEN)
 				continue;
@@ -109,7 +114,7 @@ int main(int argc, char *argv[])
 				die("parent recvfrom");
 
 			if (peer.sin_port != htons((PORT + 1)))
-				die_port(peer.sin_port, PORT + 1);
+				die_port(&peer, PORT + 1);
 		}
 	}
 
diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
index a24c896347a8..dc7e9d6da062 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
@@ -45,6 +45,8 @@ if ip netns exec "$ns0" ./conntrack_reverse_clash; then
 	echo "PASS: No SNAT performed for null bindings"
 else
 	echo "ERROR: SNAT performed without any matching snat rule"
+	ip netns exec "$ns0" conntrack -L
+	ip netns exec "$ns0" conntrack -S
 	exit 1
 fi
 
diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
index e95ecb37c2b1..207b79932d91 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
@@ -32,7 +32,6 @@ source lib.sh
 
 IP0=172.30.30.1
 IP1=172.30.30.2
-DUMMYNET=10.9.9
 PFXL=30
 ret=0
 
@@ -52,11 +51,6 @@ trap cleanup EXIT
 
 setup_ns ns0 ns1
 
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1
-
 if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then
 	echo "SKIP: Could not add veth device"
 	exit $ksft_skip
@@ -67,18 +61,13 @@ if ! ip -net "$ns0" li add tvrf type vrf table 9876; then
 	exit $ksft_skip
 fi
 
-ip -net "$ns0" link add dummy0 type dummy
-
 ip -net "$ns0" li set veth0 master tvrf
-ip -net "$ns0" li set dummy0 master tvrf
 ip -net "$ns0" li set tvrf up
 ip -net "$ns0" li set veth0 up
-ip -net "$ns0" li set dummy0 up
 ip -net "$ns1" li set veth0 up
 
 ip -net "$ns0" addr add $IP0/$PFXL dev veth0
 ip -net "$ns1" addr add $IP1/$PFXL dev veth0
-ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0
 
 listener_ready()
 {
@@ -219,35 +208,9 @@ EOF
 	fi
 }
 
-test_fib()
-{
-ip netns exec "$ns0" nft -f - <<EOF
-flush ruleset
-table ip t {
-	counter fibcount { }
-
-	chain prerouting {
-		type filter hook prerouting priority 0;
-		meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack
-	}
-}
-EOF
-	ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0
-	ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null
-
-	if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then
-		echo "PASS: fib lookup returned exepected output interface"
-	else
-		echo "FAIL: fib lookup did not return exepected output interface"
-		ret=1
-		return
-	fi
-}
-
 test_ct_zone_in
 test_masquerade_vrf "default"
 test_masquerade_vrf "pfifo"
 test_masquerade_veth
-test_fib
 
 exit $ret
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
index d3edb16cd4b3..9c9d5b38ab71 100755
--- a/tools/testing/selftests/net/netfilter/ipvs.sh
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -129,9 +129,6 @@ test_dr() {
 	# avoid incorrect arp response
 	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
 	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
-	# avoid reverse route lookup
-	ip netns exec "${ns2}" sysctl -qw  net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${ns2}" sysctl -qw  net.ipv4.conf.veth21.rp_filter=0
 	ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
 
 	test_service
@@ -154,7 +151,7 @@ test_nat() {
 test_tun() {
 	ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
 
-	ip netns exec "${ns1}" modprobe -q ipip
+	modprobe -q ipip
 	ip netns exec "${ns1}" ip link set tunl0 up
 	ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0
 	ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0
@@ -163,13 +160,10 @@ test_tun() {
 	ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port}
 	ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1
 
-	ip netns exec "${ns2}" modprobe -q ipip
 	ip netns exec "${ns2}" ip link set tunl0 up
 	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
 	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
-	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
 	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
-	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
 	ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
 
 	test_service
diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
index 1014551dd769..6731fe1eaf2e 100755
--- a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
+++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
@@ -17,9 +17,31 @@ cleanup()
 
 checktool "socat -h" "run test without socat"
 checktool "iptables --version" "run test without iptables"
+checktool "conntrack --version" "run test without conntrack"
 
 trap cleanup EXIT
 
+connect_done()
+{
+	local ns="$1"
+	local port="$2"
+
+	ip netns exec "$ns" ss -nt -o state established "dport = :$port" | grep -q "$port"
+}
+
+check_ctstate()
+{
+	local ns="$1"
+	local dp="$2"
+
+	if ! ip netns exec "$ns" conntrack --get -s 192.168.1.2 -d 192.168.1.1 -p tcp \
+	     --sport 10000 --dport "$dp" --state ESTABLISHED > /dev/null 2>&1;then
+		echo "FAIL: Did not find expected state for dport $2"
+		ip netns exec "$ns" bash -c 'conntrack -L; conntrack -S; ss -nt'
+		ret=1
+	fi
+}
+
 setup_ns ns1 ns2
 
 # Connect the namespaces using a veth pair
@@ -44,15 +66,18 @@ socatpid=$!
 ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000"
 
 # add a virtual IP using DNAT
-ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 || exit 1
 
 # ... and route it to the other namespace
 ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1
 
-# add a persistent connection from the other namespace
-ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+# listener should be up by now, wait if it isn't yet.
+wait_local_port_listen "$ns1" 5201 tcp
 
-sleep 1
+# add a persistent connection from the other namespace
+sleep 10 | ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+cpid0=$!
+busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" "5201"
 
 # ip daddr:dport will be rewritten to 192.168.1.1 5201
 # NAT must reallocate source port 10000 because
@@ -71,26 +96,25 @@ fi
 ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
 ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
 
-sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+cpid1=$!
 
-# if connect succeeds, client closes instantly due to EOF on stdin.
-# if connect hangs, it will time out after 5s.
-echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
+sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
 cpid2=$!
 
-time_then=$(date +%s)
-wait $cpid2
-rv=$?
-time_now=$(date +%s)
+busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5202
+busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5203
 
-# Check how much time has elapsed, expectation is for
-# 'cpid2' to connect and then exit (and no connect delay).
-delta=$((time_now - time_then))
+check_ctstate "$ns1" 5202
+check_ctstate "$ns1" 5203
 
-if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then
+kill $socatpid $cpid0 $cpid1 $cpid2
+socatpid=0
+
+if [ $ret -eq 0 ]; then
 	echo "PASS: could connect to service via redirected ports"
 else
-	echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
+	echo "FAIL: socat cannot connect to service via redirect"
 	ret=1
 fi
 
diff --git a/tools/testing/selftests/net/netfilter/nf_queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c
index 9e56b9d47037..8bbec37f5356 100644
--- a/tools/testing/selftests/net/netfilter/nf_queue.c
+++ b/tools/testing/selftests/net/netfilter/nf_queue.c
@@ -18,6 +18,9 @@
 struct options {
 	bool count_packets;
 	bool gso_enabled;
+	bool failopen;
+	bool out_of_order;
+	bool bogus_verdict;
 	int verbose;
 	unsigned int queue_num;
 	unsigned int timeout;
@@ -30,7 +33,7 @@ static struct options opts;
 
 static void help(const char *p)
 {
-	printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
+	printf("Usage: %s [-c|-v [-vv] ] [-o] [-O] [-b] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
 }
 
 static int parse_attr_cb(const struct nlattr *attr, void *data)
@@ -236,6 +239,8 @@ struct mnl_socket *open_queue(void)
 
 	flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
 	flags |= NFQA_CFG_F_UID_GID;
+	if (opts.failopen)
+		flags |= NFQA_CFG_F_FAIL_OPEN;
 	mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
 	mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
 
@@ -272,7 +277,9 @@ static int mainloop(void)
 	unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
 	struct mnl_socket *nl;
 	struct nlmsghdr *nlh;
+	uint32_t ooo_ids[16];
 	unsigned int portid;
+	int ooo_count = 0;
 	char *buf;
 	int ret;
 
@@ -305,6 +312,9 @@ static int mainloop(void)
 
 		ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL);
 		if (ret < 0) {
+			/* bogus verdict mode will generate ENOENT error messages */
+			if (opts.bogus_verdict && errno == ENOENT)
+				continue;
 			perror("mnl_cb_run");
 			exit(EXIT_FAILURE);
 		}
@@ -313,10 +323,35 @@ static int mainloop(void)
 		if (opts.delay_ms)
 			sleep_ms(opts.delay_ms);
 
-		nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
-		if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
-			perror("mnl_socket_sendto");
-			exit(EXIT_FAILURE);
+		if (opts.bogus_verdict) {
+			for (int i = 0; i < 50; i++) {
+				nlh = nfq_build_verdict(buf, id + 0x7FFFFFFF + i,
+							opts.queue_num, opts.verdict);
+				mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
+			}
+		}
+
+		if (opts.out_of_order) {
+			ooo_ids[ooo_count] = id;
+			if (ooo_count >= 15) {
+				for (ooo_count; ooo_count >= 0; ooo_count--) {
+					nlh = nfq_build_verdict(buf, ooo_ids[ooo_count],
+								opts.queue_num, opts.verdict);
+					if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+						perror("mnl_socket_sendto");
+						exit(EXIT_FAILURE);
+					}
+				}
+				ooo_count = 0;
+			} else {
+				ooo_count++;
+			}
+		} else {
+			nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
+			if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+				perror("mnl_socket_sendto");
+				exit(EXIT_FAILURE);
+			}
 		}
 	}
 
@@ -329,7 +364,7 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
+	while ((c = getopt(argc, argv, "chvoObt:q:Q:d:G")) != -1) {
 		switch (c) {
 		case 'c':
 			opts.count_packets = true;
@@ -366,9 +401,18 @@ static void parse_opts(int argc, char **argv)
 		case 'G':
 			opts.gso_enabled = false;
 			break;
+		case 'o':
+			opts.failopen = true;
+			break;
 		case 'v':
 			opts.verbose++;
 			break;
+		case 'O':
+			opts.out_of_order = true;
+			break;
+		case 'b':
+			opts.bogus_verdict = true;
+			break;
 		}
 	}
 
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 47088b005390..ffdc6ccc6511 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -15,10 +15,12 @@ source lib.sh
 # Available test groups:
 # - reported_issues: check for issues that were reported in the past
 # - correctness: check that packets match given entries, and only those
+# - correctness_large: same but with additional non-matching entries
 # - concurrency: attempt races between insertion, deletion and lookup
 # - timeout: check that packets match entries until they expire
 # - performance: estimate matching rate, compare with rbtree and hash baselines
-TESTS="reported_issues correctness concurrency timeout"
+TESTS="reported_issues correctness correctness_large concurrency timeout"
+
 [ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}"
 
 # Set types, defined by TYPE_ variables below
@@ -27,7 +29,8 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
        net6_port_net6_port net_port_mac_proto_net"
 
 # Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add reload net_port_proto_match"
+BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate
+      insert_overlap load_flush_load4 load_flush_load8"
 
 # List of possible paths to pktgen script from kernel tree for performance tests
 PKTGEN_SCRIPT_PATHS="
@@ -376,7 +379,7 @@ display		net,port,proto
 type_spec	ipv4_addr . inet_service . inet_proto
 chain_spec	ip daddr . udp dport . meta l4proto
 dst		addr4 port proto
-src
+src		 
 start		1
 count		9
 src_delta	9
@@ -387,6 +390,73 @@ race_repeat	0
 
 perf_duration	0
 "
+
+TYPE_avx2_mismatch="
+display		avx2 false match
+type_spec	inet_proto . ipv6_addr
+chain_spec	meta l4proto . ip6 daddr
+dst		proto addr6
+src
+start		1
+count		1
+src_delta	1
+tools		ping
+proto		icmp6
+
+race_repeat	0
+
+perf_duration	0
+"
+
+
+TYPE_doublecreate="
+display		cannot create same element twice
+type_spec	ipv4_addr . ipv4_addr
+chain_spec	ip saddr . ip daddr
+dst		addr4
+proto		icmp
+
+race_repeat	0
+
+perf_duration	0
+"
+
+TYPE_insert_overlap="
+display		reject overlapping range on add
+type_spec	ipv4_addr . ipv4_addr
+chain_spec	ip saddr . ip daddr
+dst		addr4
+proto		icmp
+
+race_repeat	0
+
+perf_duration	0
+"
+
+TYPE_load_flush_load4="
+display		reload with flush, 4bit groups
+type_spec	ipv4_addr . ipv4_addr
+chain_spec	ip saddr . ip daddr
+dst		addr4
+proto		icmp
+
+race_repeat	0
+
+perf_duration	0
+"
+
+TYPE_load_flush_load8="
+display		reload with flush, 8bit groups
+type_spec	ipv4_addr . ipv4_addr
+chain_spec	ip saddr . ip daddr
+dst		addr4
+proto		icmp
+
+race_repeat	0
+
+perf_duration	0
+"
+
 # Set template for all tests, types and rules are filled in depending on test
 set_template='
 flush ruleset
@@ -398,6 +468,7 @@ table inet filter {
 
 	set test {
 		type ${type_spec}
+		counter
 		flags interval,timeout
 	}
 
@@ -1137,9 +1208,18 @@ del() {
 	fi
 }
 
-# Return packet count from 'test' counter in 'inet filter' table
+# Return packet count for elem $1 from 'test' counter in 'inet filter' table
 count_packets() {
 	found=0
+	for token in $(nft reset element inet filter test "${1}" ); do
+		[ ${found} -eq 1 ] && echo "${token}" && return
+		[ "${token}" = "packets" ] && found=1
+	done
+}
+
+# Return packet count from 'test' counter in 'inet filter' table
+count_packets_nomatch() {
+	found=0
 	for token in $(nft list counter inet filter test); do
 		[ ${found} -eq 1 ] && echo "${token}" && return
 		[ "${token}" = "packets" ] && found=1
@@ -1185,6 +1265,10 @@ perf() {
 
 # Set MAC addresses, send single packet, check that it matches, reset counter
 send_match() {
+	local elem="$1"
+
+	shift
+
 	ip link set veth_a address "$(format_mac "${1}")"
 	ip -n B link set veth_b address "$(format_mac "${2}")"
 
@@ -1195,7 +1279,7 @@ send_match() {
 		eval src_"$f"=\$\(format_\$f "${2}"\)
 	done
 	eval send_\$proto
-	if [ "$(count_packets)" != "1" ]; then
+	if [ "$(count_packets "$elem")" != "1" ]; then
 		err "${proto} packet to:"
 		err "  $(for f in ${dst}; do
 			 eval format_\$f "${1}"; printf ' '; done)"
@@ -1221,7 +1305,7 @@ send_nomatch() {
 		eval src_"$f"=\$\(format_\$f "${2}"\)
 	done
 	eval send_\$proto
-	if [ "$(count_packets)" != "0" ]; then
+	if [ "$(count_packets_nomatch)" != "0" ]; then
 		err "${proto} packet to:"
 		err "  $(for f in ${dst}; do
 			 eval format_\$f "${1}"; printf ' '; done)"
@@ -1234,15 +1318,54 @@ send_nomatch() {
 	fi
 }
 
+maybe_send_nomatch() {
+	local elem="$1"
+	local what="$4"
+
+	[ $((RANDOM%20)) -gt 0 ] && return
+
+	dst_addr4="$2"
+	dst_port="$3"
+	send_udp
+
+	if [ "$(count_packets_nomatch)" != "0" ]; then
+		err "Packet to $dst_addr4:$dst_port did match $what"
+		err "$(nft -a list ruleset)"
+		return 1
+	fi
+}
+
+maybe_send_match() {
+	local elem="$1"
+	local what="$4"
+
+	[ $((RANDOM%20)) -gt 0 ] && return
+
+	dst_addr4="$2"
+	dst_port="$3"
+	send_udp
+
+	if [ "$(count_packets "{ $elem }")" != "1" ]; then
+		err "Packet to $dst_addr4:$dst_port did not match $what"
+		err "$(nft -a list ruleset)"
+		return 1
+	fi
+	nft reset counter inet filter test >/dev/null
+	nft reset element inet filter test "{ $elem }" >/dev/null
+}
+
 # Correctness test template:
 # - add ranged element, check that packets match it
 # - check that packets outside range don't match it
 # - remove some elements, check that packets don't match anymore
-test_correctness() {
-	setup veth send_"${proto}" set || return ${ksft_skip}
-
+test_correctness_main() {
 	range_size=1
+
+	send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
+
 	for i in $(seq "${start}" $((start + count))); do
+		local elem=""
+
 		end=$((start + range_size))
 
 		# Avoid negative or zero-sized port ranges
@@ -1253,15 +1376,16 @@ test_correctness() {
 		srcstart=$((start + src_delta))
 		srcend=$((end + src_delta))
 
-		add "$(format)" || return 1
+		elem="$(format)"
+		add "$elem" || return 1
 		for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
-			send_match "${j}" $((j + src_delta)) || return 1
+			send_match "$elem" "${j}" $((j + src_delta)) || return 1
 		done
 		send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
 
 		# Delete elements now and then
 		if [ $((i % 3)) -eq 0 ]; then
-			del "$(format)" || return 1
+			del "$elem" || return 1
 			for j in $(seq "$start" \
 				   $((range_size / 2 + 1)) ${end}); do
 				send_nomatch "${j}" $((j + src_delta)) \
@@ -1274,6 +1398,163 @@ test_correctness() {
 	done
 }
 
+test_correctness() {
+	setup veth send_"${proto}" set || return ${ksft_skip}
+
+	test_correctness_main
+}
+
+# Repeat the correctness tests, but add extra non-matching entries.
+# This exercises the more compact '4 bit group' representation that
+# gets picked when the default 8-bit representation exceed
+# NFT_PIPAPO_LT_SIZE_HIGH bytes of memory.
+# See usage of NFT_PIPAPO_LT_SIZE_HIGH in pipapo_lt_bits_adjust().
+#
+# The format() helper is way too slow when generating lots of
+# entries so its not used here.
+test_correctness_large() {
+	setup veth send_"${proto}" set || return ${ksft_skip}
+	# number of dummy (filler) entries to add.
+	local dcount=16385
+
+	(
+	echo -n "add element inet filter test { "
+
+	case "$type_spec" in
+	"ether_addr . ipv4_addr")
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			format_mac $((1000000 + i))
+			printf ". 172.%i.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+		done
+		;;
+	"inet_proto . ipv6_addr")
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "%i . " $((RANDOM%256))
+			format_addr6 $((1000000 + i))
+		done
+		;;
+	"inet_service . inet_proto")
+		# smaller key sizes, need more entries to hit the
+		# 4-bit threshold.
+		dcount=65536
+		for i in $(seq 1 $dcount); do
+			local proto=$((RANDOM%256))
+
+			# Test uses UDP to match, as it also fails when matching
+			# an entry that doesn't exist, so skip 'udp' entries
+			# to not trigger a wrong failure.
+			[ $proto -eq 17 ] && proto=18
+			[ $i -gt 1 ] && echo ", "
+			printf "%i . %i " $(((i%65534) + 1)) $((proto))
+		done
+		;;
+	"inet_service . ipv4_addr")
+		dcount=32768
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "%i . 172.%i.%i.%i " $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+		done
+		;;
+	"ipv4_addr . ether_addr")
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "172.%i.%i.%i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+			format_mac $((1000000 + i))
+		done
+		;;
+	"ipv4_addr . inet_service")
+		dcount=32768
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "172.%i.%i.%i . %i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+		done
+		;;
+	"ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr")
+		dcount=65536
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "172.%i.%i.%i . %i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+			format_mac $((1000000 + i))
+			printf ". %i . 192.168.%i.%i" $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+		done
+		;;
+	"ipv4_addr . inet_service . inet_proto")
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "172.%i.%i.%i . %i . %i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256))
+		done
+		;;
+	"ipv4_addr . inet_service . inet_proto . ipv4_addr")
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "172.%i.%i.%i . %i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((RANDOM%256))
+		done
+		;;
+	"ipv4_addr . inet_service . ipv4_addr")
+		dcount=32768
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			printf "172.%i.%i.%i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256))
+		done
+		;;
+	"ipv6_addr . ether_addr")
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			format_addr6 $((i + 1000000))
+			echo -n " . "
+			format_mac $((1000000 + i))
+		done
+		;;
+	"ipv6_addr . inet_service")
+		dcount=32768
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			format_addr6 $((i + 1000000))
+			echo -n " .  $(((RANDOM%65534) + 1))"
+		done
+		;;
+	"ipv6_addr . inet_service . ether_addr")
+		dcount=32768
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			format_addr6 $((i + 1000000))
+			echo -n " .  $(((RANDOM%65534) + 1)) . "
+			format_mac $((i + 1000000))
+		done
+		;;
+	"ipv6_addr . inet_service . ether_addr . inet_proto")
+		dcount=65536
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			format_addr6 $((i + 1000000))
+			echo -n " .  $(((RANDOM%65534) + 1)) . "
+			format_mac $((i + 1000000))
+			echo -n " .  $((RANDOM%256))"
+		done
+		;;
+	"ipv6_addr . inet_service . ipv6_addr . inet_service")
+		dcount=32768
+		for i in $(seq 1 $dcount); do
+			[ $i -gt 1 ] && echo ", "
+			format_addr6 $((i + 1000000))
+			echo -n " .  $(((RANDOM%65534) + 1)) . "
+			format_addr6 $((i + 2123456))
+			echo -n " .  $((RANDOM%256))"
+		done
+		;;
+	*)
+		"Unhandled $type_spec"
+		return 1
+	esac
+	echo -n "}"
+
+	) | nft -f - || return 1
+
+	test_correctness_main
+}
+
 # Concurrency test template:
 # - add all the elements
 # - start a thread for each physical thread that:
@@ -1396,14 +1677,17 @@ test_timeout() {
 
 	range_size=1
 	for i in $(seq "$start" $((start + count))); do
+		local elem=""
+
 		end=$((start + range_size))
 		srcstart=$((start + src_delta))
 		srcend=$((end + src_delta))
 
-		add "$(format)" || return 1
+		elem="$(format)"
+		add "$elem" || return 1
 
 		for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
-			send_match "${j}" $((j + src_delta)) || return 1
+			send_match "$elem" "${j}" $((j + src_delta)) || return 1
 		done
 
 		range_size=$((range_size + 1))
@@ -1561,7 +1845,7 @@ test_bug_reload() {
 		srcend=$((end + src_delta))
 
 		for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
-			send_match "${j}" $((j + src_delta)) || return 1
+			send_match "$(format)" "${j}" $((j + src_delta)) || return 1
 		done
 
 		range_size=$((range_size + 1))
@@ -1580,22 +1864,34 @@ test_bug_net_port_proto_match() {
 	range_size=1
 	for i in $(seq 1 10); do
 		for j in $(seq 1 20) ; do
-			elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+			local dport=$j
+
+			elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
+
+			# too slow, do not test all addresses
+			maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "before add" || return 1
 
 			nft "add element inet filter test { $elem }" || return 1
+
+			maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "after add" || return 1
+
 			nft "get element inet filter test { $elem }" | grep -q "$elem"
 			if [ $? -ne 0 ];then
 				local got=$(nft "get element inet filter test { $elem }")
 				err "post-add: should have returned $elem but got $got"
 				return 1
 			fi
+
+			maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "out-of-range" || return 1
 		done
 	done
 
 	# recheck after set was filled
 	for i in $(seq 1 10); do
 		for j in $(seq 1 20) ; do
-			elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+			local dport=$j
+
+			elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
 
 			nft "get element inet filter test { $elem }" | grep -q "$elem"
 			if [ $? -ne 0 ];then
@@ -1603,6 +1899,9 @@ test_bug_net_port_proto_match() {
 				err "post-fill: should have returned $elem but got $got"
 				return 1
 			fi
+
+			maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "recheck" || return 1
+			maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "recheck out-of-range" || return 1
 		done
 	done
 
@@ -1610,9 +1909,10 @@ test_bug_net_port_proto_match() {
 	for i in $(seq 1 10); do
 		for j in $(seq 1 20) ; do
 			local rnd=$((RANDOM%10))
+			local dport=$j
 			local got=""
 
-			elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+			elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
 			if [ $rnd -gt 0 ];then
 				continue
 			fi
@@ -1623,12 +1923,148 @@ test_bug_net_port_proto_match() {
 				err "post-delete: query for $elem returned $got instead of error."
 				return 1
 			fi
+
+			maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "match after deletion" || return 1
 		done
 	done
 
 	nft flush ruleset
 }
 
+test_bug_avx2_mismatch()
+{
+	setup veth send_"${proto}" set || return ${ksft_skip}
+
+	local a1="fe80:dead:01ff:0a02:0b03:6007:8009:a001"
+	local a2="fe80:dead:01fe:0a02:0b03:6007:8009:a001"
+
+	nft "add element inet filter test { icmpv6 . $a1 }"
+
+	dst_addr6="$a2"
+	send_icmp6
+
+	if [ "$(count_packets "{ icmpv6 . $a1 }")" -gt "0" ]; then
+		err "False match for $a2"
+		return 1
+	fi
+}
+
+test_bug_doublecreate()
+{
+	local elements="1.2.3.4 . 1.2.4.1, 1.2.4.1 . 1.2.3.4"
+	local ret=1
+	local i
+
+	setup veth send_"${proto}" set || return ${ksft_skip}
+
+	add "{ $elements }" || return 1
+	# expected to work: 'add' on existing should be no-op.
+	add "{ $elements }" || return 1
+
+	# 'create' should return an error.
+	if nft create element inet filter test "{ $elements }" 2>/dev/null; then
+		err "Could create an existing element"
+		return 1
+	fi
+nft -f - <<EOF 2>/dev/null
+flush set inet filter test
+create element inet filter test { $elements }
+create element inet filter test { $elements }
+EOF
+	ret=$?
+	if [ $ret -eq 0 ]; then
+		err "Could create element twice in one transaction"
+		err "$(nft -a list ruleset)"
+		return 1
+	fi
+
+nft -f - <<EOF 2>/dev/null
+flush set inet filter test
+create element inet filter test { $elements }
+EOF
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		err "Could not flush and re-create element in one transaction"
+		return 1
+	fi
+
+	return 0
+}
+
+add_fail()
+{
+	if nft add element inet filter test "$1" 2>/dev/null ; then
+		err "Returned success for add ${1} given set:"
+		err "$(nft -a list set inet filter test )"
+		return 1
+	fi
+
+	return 0
+}
+
+test_bug_insert_overlap()
+{
+	local elements="1.2.3.4 . 1.2.4.1"
+
+	setup veth send_"${proto}" set || return ${ksft_skip}
+
+	add "{ $elements }" || return 1
+
+	elements="1.2.3.0-1.2.3.4 . 1.2.4.1"
+	add_fail "{ $elements }" || return 1
+
+	elements="1.2.3.0-1.2.3.4 . 1.2.4.2"
+	add "{ $elements }" || return 1
+
+	elements="1.2.3.4 . 1.2.4.1-1.2.4.2"
+	add_fail "{ $elements }" || return 1
+
+	return 0
+}
+
+test_bug_load_flush_load4()
+{
+	local i
+
+	setup veth send_"${proto}" set || return ${ksft_skip}
+
+	for i in $(seq 0 255); do
+		local addelem="add element inet filter test"
+		local j
+
+		for j in $(seq 0 20); do
+			echo "$addelem { 10.$j.0.$i . 10.$j.1.$i }"
+			echo "$addelem { 10.$j.0.$i . 10.$j.2.$i }"
+		done
+	done > "$tmp"
+
+	nft -f "$tmp" || return 1
+
+	( echo "flush set inet filter test";cat "$tmp") | nft -f -
+	[ $? -eq 0 ] || return 1
+
+	return 0
+}
+
+test_bug_load_flush_load8()
+{
+	local i
+
+	setup veth send_"${proto}" set || return ${ksft_skip}
+
+	for i in $(seq 1 100); do
+		echo "add element inet filter test { 10.0.0.$i . 10.0.1.$i }"
+		echo "add element inet filter test { 10.0.0.$i . 10.0.2.$i }"
+	done > "$tmp"
+
+	nft -f "$tmp" || return 1
+
+	( echo "flush set inet filter test";cat "$tmp") | nft -f -
+	[ $? -eq 0 ] || return 1
+
+	return 0
+}
+
 test_reported_issues() {
 	eval test_bug_"${subtest}"
 }
diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh
index ce1451c275fd..04544905c216 100755
--- a/tools/testing/selftests/net/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/net/netfilter/nft_fib.sh
@@ -3,6 +3,10 @@
 # This tests the fib expression.
 #
 # Kselftest framework requirement - SKIP code is 4.
+#
+#  10.0.1.99     10.0.1.1           10.0.2.1         10.0.2.99
+# dead:1::99    dead:1::1          dead:2::1        dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
 
 source lib.sh
 
@@ -45,6 +49,19 @@ table inet filter {
 EOF
 }
 
+load_input_ruleset() {
+	local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+	chain input {
+		type filter hook input priority 0; policy accept;
+	        fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+	}
+}
+EOF
+}
+
 load_pbr_ruleset() {
 	local netns=$1
 
@@ -59,6 +76,89 @@ table inet filter {
 EOF
 }
 
+load_type_ruleset() {
+	local netns=$1
+
+	for family in ip ip6;do
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table $family filter {
+	chain type_match_in {
+		fib daddr type local counter comment "daddr configured on other iface"
+		fib daddr . iif type local counter comment "daddr configured on iif"
+		fib daddr type unicast counter comment "daddr not local"
+		fib daddr . iif type unicast counter comment "daddr not configured on iif"
+	}
+
+	chain type_match_out {
+		fib daddr type unicast counter
+		fib daddr . oif type unicast counter
+		fib daddr type local counter
+		fib daddr . oif type local counter
+	}
+
+	chain prerouting {
+		type filter hook prerouting priority 0;
+		icmp type echo-request counter jump type_match_in
+		icmpv6 type echo-request counter jump type_match_in
+	}
+
+	chain input {
+		type filter hook input priority 0;
+		icmp type echo-request counter jump type_match_in
+		icmpv6 type echo-request counter jump type_match_in
+	}
+
+	chain forward {
+		type filter hook forward priority 0;
+		icmp type echo-request counter jump type_match_in
+		icmpv6 type echo-request counter jump type_match_in
+	}
+
+	chain output {
+		type filter hook output priority 0;
+		icmp type echo-request counter jump type_match_out
+		icmpv6 type echo-request counter jump type_match_out
+	}
+
+	chain postrouting {
+		type filter hook postrouting priority 0;
+		icmp type echo-request counter jump type_match_out
+		icmpv6 type echo-request counter jump type_match_out
+	}
+}
+EOF
+done
+}
+
+reload_type_ruleset() {
+	ip netns exec "$1" nft flush table ip filter
+	ip netns exec "$1" nft flush table ip6 filter
+	load_type_ruleset "$1"
+}
+
+check_fib_type_counter_family() {
+	local family="$1"
+	local want="$2"
+	local ns="$3"
+	local chain="$4"
+	local what="$5"
+	local errmsg="$6"
+
+	if ! ip netns exec "$ns" nft list chain "$family" filter "$chain" | grep "$what" | grep -q "packets $want";then
+		echo "Netns $ns $family fib type counter doesn't match expected packet count of $want for $what $errmsg" 1>&2
+		ip netns exec "$ns" nft list chain "$family" filter "$chain"
+		ret=1
+		return 1
+	fi
+
+	return 0
+}
+
+check_fib_type_counter() {
+	check_fib_type_counter_family "ip" "$@" || return 1
+	check_fib_type_counter_family "ip6" "$@" || return 1
+}
+
 load_ruleset_count() {
 	local netns=$1
 
@@ -77,6 +177,7 @@ check_drops() {
 	if dmesg | grep -q ' nft_rpfilter: ';then
 		dmesg | grep ' nft_rpfilter: '
 		echo "FAIL: rpfilter did drop packets"
+		ret=1
 		return 1
 	fi
 
@@ -151,19 +252,509 @@ test_ping() {
   return 0
 }
 
+test_ping_unreachable() {
+  local daddr4=$1
+  local daddr6=$2
+
+  if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr4" > /dev/null; then
+	echo "FAIL: ${ns1} could reach $daddr4" 1>&2
+	return 1
+  fi
+
+  if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr6" > /dev/null; then
+	echo "FAIL: ${ns1} could reach $daddr6" 1>&2
+	return 1
+  fi
+
+  return 0
+}
+
+test_fib_type() {
+	local notice="$1"
+	local errmsg="addr-on-if"
+	local lret=0
+
+	if ! load_type_ruleset "$nsrouter";then
+		echo "SKIP: Could not load fib type ruleset"
+		[ $ret -eq 0 ] && ret=$ksft_skip
+		return
+	fi
+
+	# makes router receive packet for addresses configured on incoming
+	# interface.
+	test_ping 10.0.1.1 dead:1::1 || return 1
+
+	# expectation: triggers all 'local' in prerouting/input.
+	check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+	check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type local" "$errmsg" || lret=1
+
+	reload_type_ruleset "$nsrouter"
+	# makes router receive packet for address configured on a different (but local)
+	# interface.
+	test_ping 10.0.2.1 dead:2::1 || return 1
+
+	# expectation: triggers 'unicast' in prerouting/input for daddr . iif and local for 'daddr'.
+	errmsg="addr-on-host"
+	check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+	check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+	reload_type_ruleset "$nsrouter"
+	test_ping 10.0.2.99 dead:2::99 || return 1
+	errmsg="addr-on-otherhost"
+	check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type unicast" "$errmsg" || lret=1
+	check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+	if [ $lret -eq 0 ];then
+		echo "PASS: fib expression address types match ($notice)"
+	else
+		echo "FAIL: fib expression address types match ($notice)"
+		ret=1
+	fi
+}
+
+test_fib_vrf_dev_add_dummy()
+{
+	if ! ip -net "$nsrouter" link add dummy0 type dummy ;then
+		echo "SKIP: VRF tests: dummy device type not supported"
+		return 1
+	fi
+
+	if ! ip -net "$nsrouter" link add tvrf type vrf table 9876;then
+		echo "SKIP: VRF tests: vrf device type not supported"
+		return 1
+	fi
+
+	ip -net "$nsrouter" link set dummy0 master tvrf
+	ip -net "$nsrouter" link set dummy0 up
+	ip -net "$nsrouter" link set tvrf up
+}
+
+load_ruleset_vrf()
+{
+# Due to the many different possible combinations using named counters
+# or one-rule-per-expected-result is complex.
+#
+# Instead, add dynamic sets for the fib modes
+# (fib address type, fib output interface lookup .. ),
+# and then add the obtained fib results to them.
+#
+# The test is successful if the sets contain the expected results
+# and no unexpected extra entries existed.
+ip netns exec "$nsrouter" nft -f - <<EOF
+flush ruleset
+table inet t {
+	set fibif4 {
+		typeof meta iif . ip daddr . fib daddr oif
+		flags dynamic
+		counter
+	}
+
+	set fibif4iif {
+		typeof meta iif . ip daddr . fib daddr . iif oif
+		flags dynamic
+		counter
+	}
+
+	set fibif6 {
+		typeof meta iif . ip6 daddr . fib daddr oif
+		flags dynamic
+		counter
+	}
+
+	set fibif6iif {
+		typeof meta iif . ip6 daddr . fib daddr . iif oif
+		flags dynamic
+		counter
+	}
+
+	set fibtype4 {
+		typeof meta iif . ip daddr . fib daddr type
+		flags dynamic
+		counter
+	}
+
+	set fibtype4iif {
+		typeof meta iif . ip daddr . fib daddr . iif type
+		flags dynamic
+		counter
+	}
+
+	set fibtype6 {
+		typeof meta iif . ip6 daddr . fib daddr type
+		flags dynamic
+		counter
+	}
+
+	set fibtype6iif {
+		typeof meta iif . ip6 daddr . fib daddr . iif type
+		flags dynamic
+		counter
+	}
+
+	chain fib_test {
+		meta nfproto ipv4 jump {
+			add @fibif4 { meta iif . ip daddr . fib daddr oif }
+			add @fibif4iif { meta iif . ip daddr . fib daddr . iif oif }
+			add @fibtype4 { meta iif . ip daddr . fib daddr type }
+			add @fibtype4iif { meta iif . ip daddr . fib daddr . iif type }
+
+			add @fibif4 { meta iif . ip saddr . fib saddr oif }
+			add @fibif4iif { meta iif . ip saddr . fib saddr . iif oif }
+		}
+
+		meta nfproto ipv6 jump {
+			add @fibif6    { meta iif . ip6 daddr . fib daddr oif }
+			add @fibif6iif { meta iif . ip6 daddr . fib daddr . iif oif }
+			add @fibtype6    { meta iif . ip6 daddr . fib daddr type }
+			add @fibtype6iif { meta iif . ip6 daddr . fib daddr . iif type }
+
+			add @fibif6 { meta iif . ip6 saddr . fib saddr oif }
+			add @fibif6iif { meta iif . ip6 saddr . fib saddr . iif oif }
+		}
+	}
+
+	chain prerouting {
+		type filter hook prerouting priority 0;
+		icmp type echo-request counter jump fib_test
+
+		# neighbour discovery to be ignored.
+		icmpv6 type echo-request counter jump fib_test
+	}
+}
+EOF
+
+if [ $? -ne 0 ] ;then
+	echo "SKIP: Could not load ruleset for fib vrf test"
+	[ $ret -eq 0 ] && ret=$ksft_skip
+	return 1
+fi
+}
+
+check_type()
+{
+	local setname="$1"
+	local iifname="$2"
+	local addr="$3"
+	local type="$4"
+	local count="$5"
+	local lret=0
+
+	[ -z "$count" ] && count=1
+
+	if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then
+		echo "FAIL: did not find $iifname . $addr . $type in $setname with $count packets"
+		ip netns exec "$nsrouter" nft list set inet t "$setname"
+		ret=1
+		# do not fail right away, delete entry if it exists so later test that
+		# checks for unwanted keys don't get confused by this *expected* key.
+		lret=1
+	fi
+
+	# delete the entry, this allows to check if anything unexpected appeared
+	# at the end of the test run: all dynamic sets should be empty by then.
+	if ! ip netns exec "$nsrouter" nft delete element inet t "$setname" { "$iifname" . "$addr" . "$type" } ; then
+		echo "FAIL: can't delete $iifname . $addr . $type in $setname"
+		ip netns exec "$nsrouter" nft list set inet t "$setname"
+		ret=1
+		return 1
+	fi
+
+	return $lret
+}
+
+check_local()
+{
+	check_type $@ "local" 1
+}
+
+check_unicast()
+{
+	check_type $@ "unicast" 1
+}
+
+check_rpf()
+{
+	check_type $@
+}
+
+check_fib_vrf_sets_empty()
+{
+	local setname=""
+	local lret=0
+
+	# A non-empty set means that we have seen unexpected packets OR
+	# that a fib lookup provided unexpected results.
+	for setname in "fibif4" "fibif4iif" "fibif6" "fibif6iif" \
+		       "fibtype4" "fibtype4iif" "fibtype6" "fibtype6iif";do
+		if ip netns exec "$nsrouter" nft list set inet t "$setname" | grep -q elements;then
+			echo "FAIL: $setname not empty"
+	                ip netns exec "$nsrouter" nft list set inet t "$setname"
+			ret=1
+			lret=1
+		fi
+	done
+
+	return $lret
+}
+
+check_fib_vrf_type()
+{
+	local msg="$1"
+
+	local addr
+	# the incoming interface is always veth0.  As its not linked to a VRF,
+	# the 'tvrf' device should NOT show up anywhere.
+	local ifname="veth0"
+	local lret=0
+
+	# local_veth0, local_veth1
+	for addr in "10.0.1.1" "10.0.2.1"; do
+		check_local fibtype4  "$ifname" "$addr" || lret=1
+		check_type  fibif4    "$ifname" "$addr" "0" || lret=1
+	done
+	for addr in "dead:1::1" "dead:2::1";do
+		check_local fibtype6  "$ifname" "$addr" || lret=1
+		check_type  fibif6    "$ifname" "$addr" "0" || lret=1
+	done
+
+	# when restricted to the incoming interface, 10.0.1.1 should
+	# be 'local', but 10.0.2.1 unicast.
+	check_local fibtype4iif   "$ifname" "10.0.1.1" || lret=1
+	check_unicast fibtype4iif "$ifname" "10.0.2.1" || lret=1
+
+	# same for the ipv6 addresses.
+	check_local fibtype6iif   "$ifname" "dead:1::1" || lret=1
+	check_unicast fibtype6iif "$ifname" "dead:2::1" || lret=1
+
+	# None of these addresses should find a valid route when restricting
+	# to the incoming interface (we ask for daddr - 10.0.1.1/2.1 are
+	# reachable via 'lo'.
+	for addr in "10.0.1.1" "10.0.2.1" "10.9.9.1" "10.9.9.2";do
+		check_type fibif4iif "$ifname" "$addr" "0" || lret=1
+	done
+
+	# expect default route (veth1), dummy0 is part of VRF but iif isn't.
+	for addr in "10.9.9.1" "10.9.9.2";do
+		check_unicast fibtype4    "$ifname" "$addr" || lret=1
+		check_unicast fibtype4iif "$ifname" "$addr" || lret=1
+		check_type fibif4 "$ifname" "$addr" "veth1" || lret=1
+	done
+	for addr in "dead:9::1" "dead:9::2";do
+		check_unicast fibtype6    "$ifname" "$addr" || lret=1
+		check_unicast fibtype6iif "$ifname" "$addr" || lret=1
+		check_type fibif6 "$ifname" "$addr" "veth1" || lret=1
+	done
+
+	# same for the IPv6 equivalent addresses.
+	for addr in "dead:1::1" "dead:2::1" "dead:9::1" "dead:9::2";do
+		check_type  fibif6iif "$ifname" "$addr" "0" || lret=1
+	done
+
+	check_unicast fibtype4    "$ifname" "10.0.2.99" || lret=1
+	check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+	check_unicast fibtype6    "$ifname" "dead:2::99" || lret=1
+	check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+	check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1
+	check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1
+	check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1
+	check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+	check_rpf  fibif4    "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+	check_rpf  fibif4iif "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+	check_rpf  fibif6    "$ifname" "dead:1::99" "veth0" 5 || lret=1
+	check_rpf  fibif6iif "$ifname" "dead:1::99" "veth0" 5 || lret=1
+
+	check_fib_vrf_sets_empty || lret=1
+
+	if [ $lret -eq 0 ];then
+		echo "PASS: $msg"
+	else
+		echo "FAIL: $msg"
+		ret=1
+	fi
+}
+
+check_fib_veth_vrf_type()
+{
+	local msg="$1"
+
+	local addr
+	local ifname
+	local setname
+	local lret=0
+
+	# as veth0 is now part of tvrf interface, packets will be seen
+	# twice, once with iif veth0, then with iif tvrf.
+
+	for ifname in "veth0" "tvrf"; do
+		for addr in "10.0.1.1" "10.9.9.1"; do
+			check_local fibtype4  "$ifname" "$addr" || lret=1
+			# addr local, but nft_fib doesn't return routes with RTN_LOCAL.
+			check_type  fibif4    "$ifname" "$addr" 0 || lret=1
+			check_type  fibif4iif "$ifname" "$addr" 0 || lret=1
+		done
+
+		for addr in "dead:1::1" "dead:9::1"; do
+			check_local fibtype6 "$ifname" "$addr" || lret=1
+			# same, address is local but no route is returned for lo.
+			check_type  fibif6    "$ifname" "$addr" 0 || lret=1
+			check_type  fibif6iif "$ifname" "$addr" 0 || lret=1
+		done
+
+		for t in fibtype4 fibtype4iif; do
+			check_unicast "$t" "$ifname" 10.9.9.2 || lret=1
+		done
+		for t in fibtype6 fibtype6iif; do
+			check_unicast "$t" "$ifname" dead:9::2 || lret=1
+		done
+
+		check_unicast fibtype4iif "$ifname" "10.9.9.1" || lret=1
+		check_unicast fibtype6iif "$ifname" "dead:9::1" || lret=1
+
+		check_unicast fibtype4    "$ifname" "10.0.2.99" || lret=1
+		check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+
+		check_unicast fibtype6    "$ifname" "dead:2::99" || lret=1
+		check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+		check_type fibif4    "$ifname"  "10.0.2.99" "veth1" || lret=1
+		check_type fibif6    "$ifname" "dead:2::99" "veth1" || lret=1
+		check_type fibif4    "$ifname"   "10.9.9.2" "dummy0" || lret=1
+		check_type fibif6    "$ifname"  "dead:9::2" "dummy0" || lret=1
+
+		# restricted to iif -- MUST NOT provide result, its != $ifname.
+		check_type fibif4iif "$ifname"  "10.0.2.99" 0 || lret=1
+		check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+		check_rpf  fibif4 "$ifname" "10.0.1.99" "veth0" 4 || lret=1
+		check_rpf  fibif6 "$ifname" "dead:1::99" "veth0" 4 || lret=1
+		check_rpf  fibif4iif "$ifname" "10.0.1.99" "$ifname" 4 || lret=1
+		check_rpf  fibif6iif "$ifname" "dead:1::99" "$ifname" 4 || lret=1
+	done
+
+	check_local fibtype4iif "veth0" "10.0.1.1" || lret=1
+	check_local fibtype6iif "veth0" "dead:1::1" || lret=1
+
+	check_unicast fibtype4iif "tvrf" "10.0.1.1" || lret=1
+	check_unicast fibtype6iif "tvrf" "dead:1::1" || lret=1
+
+	# 10.9.9.2 should not provide a result for iif veth, but
+	# should when iif is tvrf.
+	# This is because its reachable via dummy0 which is part of
+	# tvrf.  iif veth0 MUST conceal the dummy0 result (i.e. return oif 0).
+	check_type fibif4iif "veth0" "10.9.9.2" 0 || lret=1
+	check_type fibif6iif "veth0"  "dead:9::2" 0 || lret=1
+
+	check_type fibif4iif "tvrf" "10.9.9.2" "tvrf" || lret=1
+	check_type fibif6iif "tvrf" "dead:9::2" "tvrf" || lret=1
+
+	check_fib_vrf_sets_empty || lret=1
+
+	if [ $lret -eq 0 ];then
+		echo "PASS: $msg"
+	else
+		echo "FAIL: $msg"
+		ret=1
+	fi
+}
+
+# Extends nsrouter config by adding dummy0+vrf.
+#
+#  10.0.1.99     10.0.1.1           10.0.2.1         10.0.2.99
+# dead:1::99    dead:1::1          dead:2::1        dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
+#                         [dummy0]
+#                         10.9.9.1
+#                        dead:9::1
+#                          [tvrf]
+test_fib_vrf()
+{
+	local cntname=""
+
+	if ! test_fib_vrf_dev_add_dummy; then
+		[ $ret -eq 0 ] && ret=$ksft_skip
+		return
+	fi
+
+	ip -net "$nsrouter" addr add "10.9.9.1"/24 dev dummy0
+	ip -net "$nsrouter" addr add "dead:9::1"/64 dev dummy0 nodad
+
+	ip -net "$nsrouter" route add default via 10.0.2.99
+	ip -net "$nsrouter" route add default via dead:2::99
+
+	load_ruleset_vrf || return
+
+	# no echo reply for these addresses: The dummy interface is part of tvrf,
+	# but veth0 (incoming interface) isn't linked to it.
+	test_ping_unreachable "10.9.9.1" "dead:9::1" &
+	test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+	# expect replies from these.
+	test_ping "10.0.1.1" "dead:1::1"
+	test_ping "10.0.2.1" "dead:2::1"
+	test_ping "10.0.2.99" "dead:2::99"
+
+	wait
+
+	check_fib_vrf_type "fib expression address types match (iif not in vrf)"
+
+	# second round: this time, make veth0 (rx interface) part of the vrf.
+	# 10.9.9.1 / dead:9::1 become reachable from ns1, while ns2
+	# becomes unreachable.
+	ip -net "$nsrouter" link set veth0 master tvrf
+	ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+	# this reload should not be needed, but in case
+	# there is some error (missing or unexpected entry) this will prevent them
+	# from leaking into round 2.
+	load_ruleset_vrf || return
+
+	test_ping "10.0.1.1" "dead:1::1"
+	test_ping "10.9.9.1" "dead:9::1"
+
+	# ns2 should no longer be reachable (veth1 not in vrf)
+	test_ping_unreachable "10.0.2.99" "dead:2::99" &
+
+	# vrf via dummy0, but host doesn't exist
+	test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+	wait
+
+	check_fib_veth_vrf_type "fib expression address types match (iif in vrf)"
+}
+
 ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
 ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
 ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
-ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
-ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
 
 test_ping 10.0.2.1 dead:2::1 || exit 1
-check_drops || exit 1
+check_drops
 
 test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops
+
+[ $ret -eq 0 ] && echo "PASS: fib expression did not cause unwanted packet drops"
+
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1
+check_drops
+
+test_ping 10.0.1.99 dead:1::99
+check_drops
+
+[ $ret -eq 0 ] && echo "PASS: fib expression did not discard loopback packets"
+
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1 || exit 1
 check_drops || exit 1
 
-echo "PASS: fib expression did not cause unwanted packet drops"
+test_ping 10.0.1.99 dead:1::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not discard loopback packets"
 
 ip netns exec "$nsrouter" nft flush table inet filter
 
@@ -213,7 +804,7 @@ ip -net "$nsrouter" addr del dead:2::1/64 dev veth0
 # ... pbr ruleset for the router, check iif+oif.
 if ! load_pbr_ruleset "$nsrouter";then
 	echo "SKIP: Could not load fib forward ruleset"
-	exit $ksft_skip
+	[ "$ret" -eq 0 ] && ret=$ksft_skip
 fi
 
 ip -net "$nsrouter" rule add from all table 128
@@ -224,11 +815,36 @@ ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1
 # drop main ipv4 table
 ip -net "$nsrouter" -4 rule delete table main
 
-if ! test_ping 10.0.2.99 dead:2::99;then
-	ip -net "$nsrouter" nft list ruleset
-	echo "FAIL: fib mismatch in pbr setup"
-	exit 1
+if test_ping 10.0.2.99 dead:2::99;then
+	echo "PASS: fib expression forward check with policy based routing"
+else
+	echo "FAIL: fib expression forward check with policy based routing"
+	ret=1
 fi
 
-echo "PASS: fib expression forward check with policy based routing"
-exit 0
+test_fib_type "policy routing"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+# Un-do policy routing changes
+ip -net "$nsrouter" rule del from all table 128
+ip -net "$nsrouter" rule del from all iif veth0 table 129
+
+ip -net "$nsrouter" route del table 128 to 10.0.1.0/24 dev veth0
+ip -net "$nsrouter" route del table 129 to 10.0.2.0/24 dev veth1
+
+ip -net "$ns1" -4 route del default
+ip -net "$ns1" -6 route del default
+
+ip -net "$ns1" -4 route add default via 10.0.1.1
+ip -net "$ns1" -6 route add default via dead:1::1
+
+ip -net "$nsrouter" -4 rule add from all table main priority 32766
+
+test_fib_type "default table"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+test_fib_vrf
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index a4ee5496f2a1..7a34ef468975 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -20,6 +20,7 @@ ret=0
 SOCAT_TIMEOUT=60
 
 nsin=""
+nsin_small=""
 ns1out=""
 ns2out=""
 
@@ -36,7 +37,7 @@ cleanup() {
 
 	cleanup_all_ns
 
-	rm -f "$nsin" "$ns1out" "$ns2out"
+	rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out"
 
 	[ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns"
 }
@@ -72,6 +73,7 @@ lmtu=1500
 rmtu=2000
 
 filesize=$((2 * 1024 * 1024))
+filesize_small=$((filesize / 16))
 
 usage(){
 	echo "nft_flowtable.sh [OPTIONS]"
@@ -89,7 +91,10 @@ do
 		o) omtu=$OPTARG;;
 		l) lmtu=$OPTARG;;
 		r) rmtu=$OPTARG;;
-		s) filesize=$OPTARG;;
+		s)
+			filesize=$OPTARG
+			filesize_small=$((OPTARG / 16))
+		;;
 		*) usage;;
 	esac
 done
@@ -122,6 +127,8 @@ ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad
 ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0
 ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad
 
+ip netns exec "$nsr1" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsr2" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
 for i in 0 1; do
   ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
   ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
@@ -148,7 +155,9 @@ ip -net "$ns1" route add default via dead:1::1
 ip -net "$ns2" route add default via dead:2::1
 
 ip -net "$nsr1" route add default via 192.168.10.2
+ip -6 -net "$nsr1" route add default via fee1:2::2
 ip -net "$nsr2" route add default via 192.168.10.1
+ip -6 -net "$nsr2" route add default via fee1:2::1
 
 ip netns exec "$nsr1" nft -f - <<EOF
 table inet filter {
@@ -215,6 +224,7 @@ if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
 fi
 
 nsin=$(mktemp)
+nsin_small=$(mktemp)
 ns1out=$(mktemp)
 ns2out=$(mktemp)
 
@@ -265,6 +275,7 @@ check_counters()
 check_dscp()
 {
 	local what=$1
+	local pmtud="$2"
 	local ok=1
 
 	local counter
@@ -277,37 +288,39 @@ check_dscp()
 	local pc4z=${counter%*bytes*}
 	local pc4z=${pc4z#*packets}
 
+	local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected"
+
 	case "$what" in
 	"dscp_none")
 		if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then
-			echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
+			echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
 			ret=1
 			ok=0
 		fi
 		;;
 	"dscp_fwd")
 		if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then
-			echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
+			echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
 			ret=1
 			ok=0
 		fi
 		;;
 	"dscp_ingress")
 		if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
-			echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+			echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
 			ret=1
 			ok=0
 		fi
 		;;
 	"dscp_egress")
 		if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
-			echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+			echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
 			ret=1
 			ok=0
 		fi
 		;;
 	*)
-		echo "FAIL: Unknown DSCP check" 1>&2
+		echo "$failmsg: Unknown DSCP check" 1>&2
 		ret=1
 		ok=0
 	esac
@@ -319,9 +332,9 @@ check_dscp()
 
 check_transfer()
 {
-	in=$1
-	out=$2
-	what=$3
+	local in=$1
+	local out=$2
+	local what=$3
 
 	if ! cmp "$in" "$out" > /dev/null 2>&1; then
 		echo "FAIL: file mismatch for $what" 1>&2
@@ -342,25 +355,42 @@ test_tcp_forwarding_ip()
 {
 	local nsa=$1
 	local nsb=$2
-	local dstip=$3
-	local dstport=$4
+	local pmtu=$3
+	local proto=$4
+	local dstip=$5
+	local dstport=$6
 	local lret=0
+	local socatc
+	local socatl
+	local infile="$nsin"
+
+	if [ $pmtu -eq 0 ]; then
+		infile="$nsin_small"
+	fi
 
-	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" &
+	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -${proto} \
+            TCP"${proto}"-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
 	lpid=$!
 
 	busywait 1000 listener_ready
 
-	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out"
+	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -${proto} \
+            TCP"${proto}":"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
+	socatc=$?
 
 	wait $lpid
+	socatl=$?
 
-	if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
+	if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then
+		rc=1
+	fi
+
+	if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then
 		lret=1
 		ret=1
 	fi
 
-	if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
+	if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then
 		lret=1
 		ret=1
 	fi
@@ -370,14 +400,22 @@ test_tcp_forwarding_ip()
 
 test_tcp_forwarding()
 {
-	test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+	local pmtu="$3"
+	local proto="$4"
+	local dstip="$5"
+	local dstport="$6"
+
+	test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
 
 	return $?
 }
 
 test_tcp_forwarding_set_dscp()
 {
-	check_dscp "dscp_none"
+	local pmtu="$3"
+	local proto="$4"
+	local dstip="$5"
+	local dstport="$6"
 
 ip netns exec "$nsr1" nft -f - <<EOF
 table netdev dscpmangle {
@@ -388,8 +426,8 @@ table netdev dscpmangle {
 }
 EOF
 if [ $? -eq 0 ]; then
-	test_tcp_forwarding_ip "$1" "$2"  10.0.2.99 12345
-	check_dscp "dscp_ingress"
+	test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
+	check_dscp "dscp_ingress" "$pmtu"
 
 	ip netns exec "$nsr1" nft delete table netdev dscpmangle
 else
@@ -405,10 +443,10 @@ table netdev dscpmangle {
 }
 EOF
 if [ $? -eq 0 ]; then
-	test_tcp_forwarding_ip "$1" "$2"  10.0.2.99 12345
-	check_dscp "dscp_egress"
+	test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
+	check_dscp "dscp_egress" "$pmtu"
 
-	ip netns exec "$nsr1" nft flush table netdev dscpmangle
+	ip netns exec "$nsr1" nft delete table netdev dscpmangle
 else
 	echo "SKIP: Could not load netdev:egress for veth1"
 fi
@@ -416,48 +454,53 @@ fi
 	# partial.  If flowtable really works, then both dscp-is-0 and dscp-is-cs3
 	# counters should have seen packets (before and after ft offload kicks in).
 	ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
-	test_tcp_forwarding_ip "$1" "$2"  10.0.2.99 12345
-	check_dscp "dscp_fwd"
+	test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
+	check_dscp "dscp_fwd" "$pmtu"
 }
 
 test_tcp_forwarding_nat()
 {
+	local nsa="$1"
+	local nsb="$2"
+	local pmtu="$3"
+	local what="$4"
 	local lret
-	local pmtu
 
-	test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
-	lret=$?
+	[ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)"
 
-	pmtu=$3
-	what=$4
+	test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 4 10.0.2.99 12345
+	lret=$?
 
 	if [ "$lret" -eq 0 ] ; then
 		if [ "$pmtu" -eq 1 ] ;then
-			check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
+			check_counters "flow offload for ns1/ns2 with masquerade $what"
 		else
 			echo "PASS: flow offload for ns1/ns2 with masquerade $what"
 		fi
 
-		test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
+		test_tcp_forwarding_ip "$1" "$2" "$pmtu" 4 10.6.6.6 1666
 		lret=$?
 		if [ "$pmtu" -eq 1 ] ;then
-			check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
+			check_counters "flow offload for ns1/ns2 with dnat $what"
 		elif [ "$lret" -eq 0 ] ; then
 			echo "PASS: flow offload for ns1/ns2 with dnat $what"
 		fi
+	else
+		echo "FAIL: flow offload for ns1/ns2 with dnat $what"
 	fi
 
 	return $lret
 }
 
 make_file "$nsin" "$filesize"
+make_file "$nsin_small" "$filesize_small"
 
 # First test:
 # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
 # Due to MTU mismatch in both directions, all packets (except small packets like pure
 # acks) have to be handled by normal forwarding path.  Therefore, packet counters
 # are not checked.
-if test_tcp_forwarding "$ns1" "$ns2"; then
+if test_tcp_forwarding "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
 	echo "PASS: flow offloaded for ns1/ns2"
 else
 	echo "FAIL: flow offload for ns1/ns2:" 1>&2
@@ -465,6 +508,14 @@ else
 	ret=1
 fi
 
+if test_tcp_forwarding "$ns1" "$ns2" 0 6 "[dead:2::99]" 12345; then
+	echo "PASS: IPv6 flow offloaded for ns1/ns2"
+else
+	echo "FAIL: IPv6 flow offload for ns1/ns2:" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
 # delete default route, i.e. ns2 won't be able to reach ns1 and
 # will depend on ns1 being masqueraded in nsr1.
 # expect ns1 has nsr1 address.
@@ -489,8 +540,9 @@ table ip nat {
 }
 EOF
 
-if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
-	echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
+check_dscp "dscp_none" "0"
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
+	echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2
 	exit 0
 fi
 
@@ -513,12 +565,138 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 # For earlier tests (large mtus), packets cannot be handled via flowtable
 # (except pure acks and other small packets).
 ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
+ip netns exec "$ns2"  nft reset counters table inet filter >/dev/null
+
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
+	echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2
+	exit 0
+fi
+
+ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
 
 if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
 	echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
 	ip netns exec "$nsr1" nft list ruleset
 fi
 
+# IPIP tunnel test:
+# Add IPIP tunnel interfaces and check flowtable acceleration.
+test_ipip() {
+if ! ip -net "$nsr1" link add name tun0 type ipip \
+     local 192.168.10.1 remote 192.168.10.2 >/dev/null;then
+	echo "SKIP: could not add ipip tunnel"
+	[ "$ret" -eq 0 ] && ret=$ksft_skip
+	return
+fi
+ip -net "$nsr1" link set tun0 up
+ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+
+ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2
+ip -net "$nsr1" link set tun6 up
+ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad
+
+ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
+ip -net "$nsr2" link set tun0 up
+ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+
+ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1 || ret=1
+ip -net "$nsr2" link set tun6 up
+ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad
+
+ip -net "$nsr1" route change default via 192.168.100.2
+ip -net "$nsr2" route change default via 192.168.100.1
+
+# do not use "route change" and delete old default so
+# socat fails to connect in case new default can't be added.
+ip -6 -net "$nsr1" route delete default
+ip -6 -net "$nsr1" route add default via fee1:3::2
+ip -6 -net "$nsr2" route delete default
+ip -6 -net "$nsr2" route add default via fee1:3::1
+ip -net "$ns2" route add default via 10.0.2.1
+ip -6 -net "$ns2" route add default via dead:2::1
+
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept'
+ip netns exec "$nsr1" nft -a insert rule inet filter forward \
+	'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
+	echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+	echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel"
+else
+	echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
+# Create vlan tagged devices for IPIP traffic.
+ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
+ip -net "$nsr1" link set veth1.10 up
+ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
+ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad
+ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
+
+ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2
+ip -net "$nsr1" link set tun0.10 up
+ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10
+ip -net "$nsr1" route change default via 192.168.200.2
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept'
+
+ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2
+ip -net "$nsr1" link set tun6.10 up
+ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad
+ip -6 -net "$nsr1" route delete default
+ip -6 -net "$nsr1" route add default via fee1:5::2
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept'
+
+ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
+ip -net "$nsr2" link set veth0.10 up
+ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
+ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad
+ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
+
+ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1
+ip -net "$nsr2" link set tun0.10 up
+ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10
+ip -net "$nsr2" route change default via 192.168.200.1
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null
+
+ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1 || ret=1
+ip -net "$nsr2" link set tun6.10 up
+ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad
+ip -6 -net "$nsr2" route delete default
+ip -6 -net "$nsr2" route add default via fee1:5::1
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
+	echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+	echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan"
+else
+	echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
+# Restore the previous configuration
+ip -net "$nsr1" route change default via 192.168.10.2
+ip -net "$nsr2" route change default via 192.168.10.1
+ip -net "$ns2" route del default via 10.0.2.1
+ip -6 -net "$ns2" route del default via dead:2::1
+}
+
 # Another test:
 # Add bridge interface br0 to Router1, with NAT enabled.
 test_bridge() {
@@ -604,6 +782,8 @@ ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
 ip -net "$nsr1" link set up dev veth0
 }
 
+test_ipip
+
 test_bridge
 
 KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
@@ -644,7 +824,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
 ip -net "$ns2" route add default via 10.0.2.1
 ip -net "$ns2" route add default via dead:2::1
 
-if test_tcp_forwarding "$ns1" "$ns2"; then
+if test_tcp_forwarding "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
 	check_counters "ipsec tunnel mode for ns1/ns2"
 else
 	echo "FAIL: ipsec tunnel mode for ns1/ns2"
@@ -652,6 +832,14 @@ else
 	ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
 fi
 
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+	check_counters "IPv6 ipsec tunnel mode for ns1/ns2"
+else
+	echo "FAIL: IPv6 ipsec tunnel mode for ns1/ns2"
+	ip netns exec "$nsr1" nft list ruleset 1>&2
+	ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
+fi
+
 if [ "$1" = "" ]; then
 	low=1280
 	mtu=$((65536 - low))
@@ -668,7 +856,7 @@ if [ "$1" = "" ]; then
 	fi
 
 	echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize"
-	$0 -o "$o" -l "$l" -r "$r" -s "$filesize"
+	$0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1
 fi
 
 exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
new file mode 100755
index 000000000000..c0fffaa6dbd9
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
@@ -0,0 +1,157 @@
+#!/bin/bash -e
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+# Torture nftables' netdevice notifier callbacks and related code by frequent
+# renaming of interfaces which netdev-family chains and flowtables hook into.
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+checktool "iperf3 --version" "run test without iperf3 tool"
+
+read kernel_tainted < /proc/sys/kernel/tainted
+
+# how many seconds to torture the kernel?
+# default to 80% of max run time but don't exceed 48s
+TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10))
+[[ $TEST_RUNTIME -gt 48 ]] && TEST_RUNTIME=48
+
+trap "cleanup_all_ns" EXIT
+
+setup_ns nsc nsr nss
+
+ip -net $nsc link add cr0 type veth peer name rc0 netns $nsr
+ip -net $nsc addr add 10.0.0.1/24 dev cr0
+ip -net $nsc link set cr0 up
+ip -net $nsc route add default via 10.0.0.2
+
+ip -net $nss link add sr0 type veth peer name rs0 netns $nsr
+ip -net $nss addr add 10.1.0.1/24 dev sr0
+ip -net $nss link set sr0 up
+ip -net $nss route add default via 10.1.0.2
+
+ip -net $nsr addr add 10.0.0.2/24 dev rc0
+ip -net $nsr link set rc0 up
+ip -net $nsr addr add 10.1.0.2/24 dev rs0
+ip -net $nsr link set rs0 up
+ip netns exec $nsr sysctl -q net.ipv4.ip_forward=1
+ip netns exec $nsr sysctl -q net.ipv4.conf.all.forwarding=1
+
+{
+	echo "table netdev t {"
+	for ((i = 0; i < 10; i++)); do
+		cat <<-EOF
+		chain chain_rc$i {
+			type filter hook ingress device rc$i priority 0
+			counter
+		}
+		chain chain_rs$i {
+			type filter hook ingress device rs$i priority 0
+			counter
+		}
+		EOF
+	done
+	echo "}"
+	echo "table ip t {"
+	for ((i = 0; i < 10; i++)); do
+		cat <<-EOF
+		flowtable ft_${i} {
+			hook ingress priority 0
+			devices = { rc$i, rs$i }
+		}
+		EOF
+	done
+	echo "chain c {"
+	echo "type filter hook forward priority 0"
+	for ((i = 0; i < 10; i++)); do
+		echo -n "iifname rc$i oifname rs$i "
+		echo    "ip protocol tcp counter flow add @ft_${i}"
+	done
+	echo "counter"
+	echo "}"
+	echo "}"
+} | ip netns exec $nsr nft -f - || {
+	echo "SKIP: Could not load nft ruleset"
+	exit $ksft_skip
+}
+
+for ((o=0, n=1; ; o=n, n++, n %= 10)); do
+	ip -net $nsr link set rc$o name rc$n
+	ip -net $nsr link set rs$o name rs$n
+done &
+rename_loop_pid=$!
+
+while true; do ip netns exec $nsr nft list ruleset >/dev/null 2>&1; done &
+nft_list_pid=$!
+
+ip netns exec $nsr nft monitor >/dev/null &
+nft_monitor_pid=$!
+
+ip netns exec $nss iperf3 --server --daemon -1
+summary_expr='s,^\[SUM\] .* \([0-9\.]\+\) Kbits/sec .* receiver,\1,p'
+rate=$(ip netns exec $nsc iperf3 \
+	--format k -c 10.1.0.1 --time $TEST_RUNTIME \
+	--length 56 --parallel 10 -i 0 | sed -n "$summary_expr")
+
+kill $nft_list_pid
+kill $nft_monitor_pid
+kill $rename_loop_pid
+wait
+
+wildcard_prep() {
+	ip netns exec $nsr nft -f - <<EOF
+table ip t {
+	flowtable ft_wild {
+		hook ingress priority 0
+		devices = { wild* }
+	}
+}
+EOF
+}
+
+if ! wildcard_prep; then
+	echo "SKIP wildcard tests: not supported by host's nft?"
+else
+	for ((i = 0; i < 100; i++)); do
+		ip -net $nsr link add wild$i type dummy &
+	done
+	wait
+	for ((i = 80; i < 100; i++)); do
+		ip -net $nsr link del wild$i &
+	done
+	for ((i = 0; i < 80; i++)); do
+		ip -net $nsr link del wild$i &
+	done
+	wait
+	for ((i = 0; i < 100; i += 10)); do
+		(
+		for ((j = 0; j < 10; j++)); do
+			ip -net $nsr link add wild$((i + j)) type dummy
+		done
+		for ((j = 0; j < 10; j++)); do
+			ip -net $nsr link del wild$((i + j))
+		done
+		) &
+	done
+	wait
+fi
+
+
+[[ $kernel_tainted -eq 0 && $(</proc/sys/kernel/tainted) -ne 0 ]] && {
+	echo "FAIL: Kernel is tainted!"
+	exit $ksft_fail
+}
+
+[[ $rate -gt 0 ]] || {
+	echo "FAIL: Zero throughput in iperf3"
+	exit $ksft_fail
+}
+
+[[ -f /sys/kernel/debug/kmemleak && \
+   -n $(</sys/kernel/debug/kmemleak) ]] && {
+	echo "FAIL: non-empty kmemleak report"
+	exit $ksft_fail
+}
+
+exit $ksft_pass
diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh
index 9e39de26455f..b3ec2d0a3f56 100755
--- a/tools/testing/selftests/net/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat.sh
@@ -569,7 +569,7 @@ test_redirect6()
 	ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
 
 	if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
-		echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6"
+		echo "ERROR: cannot ping $ns1 from $ns2 via ipv6"
 		lret=1
 	fi
 
@@ -859,13 +859,31 @@ EOF
 	# from router:service bypass connection tracking.
 	test_port_shadow_notrack "$family"
 
-	# test nat based mitigation: fowarded packets coming from service port
+	# test nat based mitigation: forwarded packets coming from service port
 	# are masqueraded with random highport.
 	test_port_shadow_pat "$family"
 
 	ip netns exec "$ns0" nft delete table $family nat
 }
 
+file_cmp()
+{
+	local infile="$1"
+	local outfile="$2"
+
+	if ! cmp "$infile" "$outfile";then
+		echo -n "Infile "
+		ls -l "$infile"
+		echo -n "Outfile "
+		ls -l "$outfile"
+		echo "ERROR: in and output file mismatch when checking $msg" 1>&1
+		ret=1
+		return 1
+	fi
+
+	return 0
+}
+
 test_stateless_nat_ip()
 {
 	local lret=0
@@ -966,11 +984,7 @@ EOF
 
 	wait
 
-	if ! cmp "$INFILE" "$OUTFILE";then
-		ls -l "$INFILE" "$OUTFILE"
-		echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
-		lret=1
-	fi
+	file_cmp "$INFILE" "$OUTFILE" "udp with stateless nat" || lret=1
 
 	:> "$OUTFILE"
 
@@ -991,6 +1005,62 @@ EOF
 	return $lret
 }
 
+test_dnat_clash()
+{
+	local lret=0
+
+	if ! socat -h > /dev/null 2>&1;then
+		echo "SKIP: Could not run dnat clash test without socat tool"
+		[ $ret -eq 0 ] && ret=$ksft_skip
+		return $ksft_skip
+	fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+flush ruleset
+table ip dnat-test {
+ chain prerouting {
+  type nat hook prerouting priority dstnat; policy accept;
+  ip daddr 10.0.2.1 udp dport 1234 counter dnat to 10.0.1.1:1234
+ }
+}
+EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add dnat rules"
+		[ $ret -eq 0 ] && ret=$ksft_skip
+		return $ksft_skip
+	fi
+
+	local udpdaddr="10.0.2.1"
+	for i in 1 2;do
+		echo "PING $udpdaddr" > "$INFILE"
+		echo "PONG 10.0.1.1 step $i" | ip netns exec "$ns0" timeout 3 socat STDIO UDP4-LISTEN:1234,bind=10.0.1.1 > "$OUTFILE" 2>/dev/null &
+		local lpid=$!
+
+		busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1234 "-u"
+
+		result=$(ip netns exec "$ns1" timeout 3 socat STDIO UDP4-SENDTO:"$udpdaddr:1234,sourceport=4321" < "$INFILE")
+		udpdaddr="10.0.1.1"
+
+		if [ "$result" != "PONG 10.0.1.1 step $i" ] ; then
+			echo "ERROR: failed to test udp $ns1 to $ns2 with dnat rule step $i, result: \"$result\"" 1>&2
+			lret=1
+			ret=1
+		fi
+
+		wait
+
+		file_cmp "$INFILE" "$OUTFILE" "udp dnat step $i" || lret=1
+
+		:> "$OUTFILE"
+	done
+
+	test $lret -eq 0 && echo "PASS: IP dnat clash $ns1:$ns2"
+
+	ip netns exec "$ns0" nft flush ruleset
+
+	return $lret
+}
+
 # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
 for i in "$ns0" "$ns1" "$ns2" ;do
 ip netns exec "$i" nft -f /dev/stdin <<EOF
@@ -1147,6 +1217,7 @@ $test_inet_nat && test_redirect6 inet
 
 test_port_shadowing
 test_stateless_nat_ip
+test_dnat_clash
 
 if [ $ret -ne 0 ];then
 	echo -n "FAIL: "
diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
index 3b81d88bdde3..9f200f80253a 100755
--- a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
@@ -88,7 +88,6 @@ for i in $(seq 1 "$maxclients");do
   echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
   echo netns exec "$gw" ip link set "veth$i" up
   echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2
-  echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0
 
   # clients have same IP addresses.
   echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
@@ -178,7 +177,6 @@ fi
 
 ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
 ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
 
 # useful for debugging: allows to use 'ping' from clients to gateway.
 ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 785e3875a6da..d80390848e85 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -10,6 +10,9 @@ source lib.sh
 ret=0
 timeout=5
 
+SCTP_TEST_TIMEOUT=60
+STRESS_TEST_TIMEOUT=30
+
 cleanup()
 {
 	ip netns pids "$ns1" | xargs kill 2>/dev/null
@@ -40,7 +43,7 @@ TMPFILE3=$(mktemp)
 
 TMPINPUT=$(mktemp)
 COUNT=200
-[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=$((COUNT/8))
 dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT"
 
 if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
@@ -275,9 +278,11 @@ test_tcp_forward()
 	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
 	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2
 
+	local tthen=$(date +%s)
+
 	ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
 
-	wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
+	wait_and_check_retval "$rpid" "tcp and nfqueue in forward chain" "$tthen"
 	kill "$nfqpid"
 }
 
@@ -288,13 +293,14 @@ test_tcp_localhost()
 
 	ip netns exec "$nsrouter" ./nf_queue -q 3 &
 	local nfqpid=$!
+	local tthen=$(date +%s)
 
 	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
 	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
 
 	ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
 
-	wait "$rpid" && echo "PASS: tcp via loopback"
+	wait_and_check_retval "$rpid" "tcp via loopback" "$tthen"
 	kill "$nfqpid"
 }
 
@@ -417,6 +423,23 @@ check_output_files()
 	fi
 }
 
+wait_and_check_retval()
+{
+	local rpid="$1"
+	local msg="$2"
+	local tthen="$3"
+	local tnow=$(date +%s)
+
+	if wait "$rpid";then
+		echo -n "PASS: "
+	else
+		echo -n "FAIL: "
+		ret=1
+	fi
+
+	printf "%s (duration: %ds)\n" "$msg" $((tnow-tthen))
+}
+
 test_sctp_forward()
 {
 	ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
@@ -428,13 +451,14 @@ table inet sctpq {
         }
 }
 EOF
-	timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+	timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
 	local rpid=$!
 
 	busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
 
 	ip netns exec "$nsrouter" ./nf_queue -q 10 -G &
 	local nfqpid=$!
+	local tthen=$(date +%s)
 
 	ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
 
@@ -443,7 +467,7 @@ EOF
 		exit 1
 	fi
 
-	wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain"
+	wait_and_check_retval "$rpid" "sctp and nfqueue in forward chain" "$tthen"
 	kill "$nfqpid"
 
 	check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward"
@@ -462,13 +486,14 @@ EOF
 	# reduce test file size, software segmentation causes sk wmem increase.
 	dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT"
 
-	timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+	timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
 	local rpid=$!
 
 	busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
 
 	ip netns exec "$ns1" ./nf_queue -q 11 &
 	local nfqpid=$!
+	local tthen=$(date +%s)
 
 	ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
 
@@ -478,7 +503,7 @@ EOF
 	fi
 
 	# must wait before checking completeness of output file.
-	wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO"
+	wait_and_check_retval "$rpid" "sctp and nfqueue in output chain with GSO" "$tthen"
 	kill "$nfqpid"
 
 	check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output"
@@ -486,7 +511,7 @@ EOF
 
 udp_listener_ready()
 {
-	ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345
+	ss -S -N "$1" -uln -o "sport = :$2" | grep -q "$2"
 }
 
 output_files_written()
@@ -494,7 +519,7 @@ output_files_written()
 	test -s "$1" && test -s "$2"
 }
 
-test_udp_ct_race()
+test_udp_nat_race()
 {
         ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
 flush ruleset
@@ -521,8 +546,8 @@ EOF
 	ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 &
 	local nfqpid=$!
 
-	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2"
-	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3"
+	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12345
+	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" 12345
 	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12
 
 	# Send two packets, one should end up in ns1, other in ns2.
@@ -533,7 +558,7 @@ EOF
 
 	busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2"
 
-	kill "$nfqpid"
+	kill "$nfqpid" "$rpid1" "$rpid2"
 
 	if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then
 		echo "FAIL: Expected One udp conntrack entry"
@@ -561,6 +586,208 @@ EOF
 	echo "PASS: both udp receivers got one packet each"
 }
 
+# Make sure UDPGRO aggregated packets don't lose
+# their skb->nfct entry when nfqueue passes the
+# skb to userspace with software gso segmentation on.
+test_udp_gro_ct()
+{
+	local errprefix="FAIL: test_udp_gro_ct:"
+	local timeout=5
+
+	ip netns exec "$nsrouter" conntrack -F 2>/dev/null
+
+        ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet udpq {
+	# Number of packets/bytes queued to userspace
+	counter toqueue { }
+	# Number of packets/bytes reinjected from userspace with 'ct new' intact
+	counter fromqueue { }
+	# These two counters should be identical and not 0.
+
+	chain prerouting {
+		type filter hook prerouting priority -300; policy accept;
+
+		# userspace sends small packets, if < 1000, UDPGRO did
+		# not kick in, but test needs a 'new' conntrack with udpgro skb.
+		meta iifname veth0 meta l4proto udp meta length > 1000 accept
+
+		# don't pick up non-gso packets and don't queue them to
+		# userspace.
+		notrack
+	}
+
+        chain postrouting {
+		type filter hook postrouting priority 0; policy accept;
+
+		# Only queue unconfirmed fraglist gro skbs to userspace.
+		udp dport 12346 ct status ! confirmed counter name "toqueue" mark set 1 queue num 1
+        }
+
+	chain validate {
+		type filter hook postrouting priority 1; policy accept;
+		# ... and only count those that were reinjected with the
+		# skb->nfct intact.
+		mark 1 counter name "fromqueue"
+	}
+}
+EOF
+	timeout "$timeout" ip netns exec "$ns2" socat UDP-LISTEN:12346,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc &
+	local rpid=$!
+
+	ip netns exec "$nsrouter" nice -n -19 ./nf_queue -G -c -q 1 -o -t 2 > "$TMPFILE2" &
+	local nfqpid=$!
+
+	ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding on rx-gro-list on generic-receive-offload on
+
+	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12346
+	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1
+
+	local bs=512
+	local count=$(((32 * 1024 * 1024) / bs))
+
+	local nprocs=$(nproc)
+	[ $nprocs -gt 1 ] && nprocs=$((nprocs - 1))
+
+	dd if=/dev/zero bs="$bs" count="$count" 2>/dev/null | for i in $(seq 1 $nprocs); do
+		timeout "$timeout" nice -n 19 ip netns exec "$ns1" \
+			socat -u -b 512 STDIN UDP-DATAGRAM:10.0.2.99:12346,reuseport,bind=0.0.0.0:55221 &
+	done
+
+	busywait 10000 test -s "$TMPFILE1"
+
+	kill "$rpid"
+
+	wait
+
+	local p
+	local b
+	local pqueued
+	local bqueued
+
+	c=$(ip netns exec "$nsrouter" nft list counter inet udpq "toqueue" | grep packets)
+	read p pqueued b bqueued <<EOF
+$c
+EOF
+	local preinject
+	local breinject
+	c=$(ip netns exec "$nsrouter" nft list counter inet udpq "fromqueue" | grep packets)
+	read p preinject b breinject <<EOF
+$c
+EOF
+	ip netns exec "$nsrouter" ethtool -K "veth0" rx-udp-gro-forwarding off
+	ip netns exec "$nsrouter" ethtool -K "veth1" rx-udp-gro-forwarding off
+
+	if [ "$pqueued" -eq 0 ];then
+		# happens when gro did not build at least on aggregate
+		echo "SKIP: No packets were queued"
+		return
+	fi
+
+	local saw_ct_entry=0
+	if ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12346 2>/dev/null | wc -l | grep -q "^1"'; then
+		saw_ct_entry=1
+	else
+		echo "$errprefix Expected udp conntrack entry"
+		ip netns exec "$nsrouter" conntrack -L
+		ret=1
+	fi
+
+	if [ "$pqueued" -ge "$preinject" ] ;then
+		echo "$errprefix Expected software segmentation to occur, had $pqueued and $preinject"
+		ret=1
+		return
+	fi
+
+	# sw segmentation adds extra udp and ip headers.
+	local breinject_expect=$((preinject * (512 + 20 + 8)))
+
+	if [ "$breinject" -eq "$breinject_expect" ]; then
+		if [ "$saw_ct_entry" -eq 1 ];then
+			echo "PASS: fraglist gro skb passed with conntrack entry"
+		else
+			echo "$errprefix fraglist gro skb passed without conntrack entry"
+			ret=1
+		fi
+	else
+		echo "$errprefix Counter mismatch, conntrack entry dropped by nfqueue? Queued: $pqueued, $bqueued. Post-queue: $preinject, $breinject. Expected $breinject_expect"
+		ret=1
+	fi
+
+	if ! ip netns exec "$nsrouter" nft delete table inet udpq; then
+		echo "$errprefix: Could not delete udpq table"
+		ret=1
+	fi
+}
+
+check_tainted()
+{
+	local msg="$1"
+
+	if [ "$tainted_then" -ne 0 ];then
+		return
+	fi
+
+	read tainted_now < /proc/sys/kernel/tainted
+	if [ "$tainted_now" -eq 0 ];then
+		echo "PASS: $msg"
+	else
+		echo "TAINT: $msg"
+		dmesg
+		ret=1
+	fi
+}
+
+test_queue_stress()
+{
+	read tainted_then < /proc/sys/kernel/tainted
+	local i
+
+        ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet t {
+	chain forward {
+		type filter hook forward priority 0; policy accept;
+
+		queue flags bypass to numgen random mod 8
+	}
+}
+EOF
+	timeout "$STRESS_TEST_TIMEOUT" ip netns exec "$ns2" \
+		socat -u UDP-LISTEN:12345,fork,pf=ipv4 STDOUT > /dev/null &
+
+	timeout "$STRESS_TEST_TIMEOUT" ip netns exec "$ns3" \
+		socat -u UDP-LISTEN:12345,fork,pf=ipv4 STDOUT > /dev/null &
+
+	for i in $(seq 0 7); do
+		ip netns exec "$nsrouter" timeout "$STRESS_TEST_TIMEOUT" \
+			./nf_queue -q $i -t 2 -O -b > /dev/null &
+	done
+
+	ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \
+		ping -q -f 10.0.2.99 > /dev/null 2>&1 &
+	ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \
+		ping -q -f 10.0.3.99 > /dev/null 2>&1 &
+	ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \
+		ping -q -f "dead:2::99" > /dev/null 2>&1 &
+	ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \
+		ping -q -f "dead:3::99" > /dev/null 2>&1 &
+
+	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" 12345
+	busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" 12345
+
+	for i in $(seq 1 4);do
+		ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \
+			socat -u STDIN UDP-DATAGRAM:10.0.2.99:12345 < /dev/zero > /dev/null &
+		ip netns exec "$ns1" timeout "$STRESS_TEST_TIMEOUT" \
+			socat -u STDIN UDP-DATAGRAM:10.0.3.99:12345 < /dev/zero > /dev/null &
+	done
+
+	wait
+
+	check_tainted "concurrent queueing"
+}
+
 test_queue_removal()
 {
 	read tainted_then < /proc/sys/kernel/tainted
@@ -584,17 +811,7 @@ EOF
 
 	ip netns exec "$ns1" nft flush ruleset
 
-	if [ "$tainted_then" -ne 0 ];then
-		return
-	fi
-
-	read tainted_now < /proc/sys/kernel/tainted
-	if [ "$tainted_now" -eq 0 ];then
-		echo "PASS: queue program exiting while packets queued"
-	else
-		echo "TAINT: queue program exiting while packets queued"
-		ret=1
-	fi
+	check_tainted "queue program exiting while packets queued"
 }
 
 ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -638,7 +855,9 @@ test_tcp_localhost_connectclose
 test_tcp_localhost_requeue
 test_sctp_forward
 test_sctp_output
-test_udp_ct_race
+test_udp_nat_race
+test_udp_gro_ct
+test_queue_stress
 
 # should be last, adds vrf device in ns1 and changes routes
 test_icmp_vrf
diff --git a/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh
index d16de13fe5a7..1dc7b0450145 100755
--- a/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh
+++ b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh
@@ -190,13 +190,13 @@ table inet filter {
 }
 EOF
 
-	timeout "$timeout" ip netns exec "$nsrouter" socat -u "$socat_ipproto" udp-listen:12345,fork,ip-transparent,reuseport udp:"$ns1_ip_port",ip-transparent,reuseport,bind="$ns2_ip_port" 2>/dev/null &
+	timeout "$timeout" ip netns exec "$nsrouter" socat -u "$socat_ipproto" udp-listen:12345,fork,ip-transparent,reuseport,shut-none udp:"$ns1_ip_port",ip-transparent,reuseport,bind="$ns2_ip_port",shut-none 2>/dev/null &
 	local tproxy_pid=$!
 
-	timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null &
+	timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" udp-listen:8080,fork,shut-none SYSTEM:"echo PONG_NS2" 2>/dev/null &
 	local server2_pid=$!
 
-	timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null &
+	timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" udp-listen:8080,fork,shut-none SYSTEM:"echo PONG_NS3" 2>/dev/null &
 	local server3_pid=$!
 
 	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-u"
@@ -205,7 +205,7 @@ EOF
 
 	local result
 	# request from ns1 to ns2 (forwarded traffic)
-	result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",sourceport=18888)
+	result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",sourceport=18888,shut-none)
 	if [ "$result" == "$expect_ns1_ns2" ] ;then
 		echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2"
 	else
@@ -214,7 +214,7 @@ EOF
 	fi
 
 	# request from ns1 to ns3 (forwarded traffic)
-	result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port")
+	result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port",shut-none)
 	if [ "$result" = "$expect_ns1_ns3" ] ;then
 		echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3"
 	else
@@ -223,7 +223,7 @@ EOF
 	fi
 
 	# request from nsrouter to ns2 (localy originated traffic)
-	result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port")
+	result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",shut-none)
 	if [ "$result" == "$expect_nsrouter_ns2" ] ;then
 		echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2"
 	else
@@ -232,7 +232,7 @@ EOF
 	fi
 
 	# request from nsrouter to ns3 (localy originated traffic)
-	result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port")
+	result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port",shut-none)
 	if [ "$result" = "$expect_nsrouter_ns3" ] ;then
 		echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3"
 	else
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
index 3442cd29bc93..cdb3910af95b 100644
--- a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
@@ -26,7 +26,7 @@
 
 +0.01 > R 643160523:643160523(0) win 0
 
-+0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
++0.1 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
 
 // Must go through.
 +0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh
index 86ec4e68594d..24ad41d526d9 100755
--- a/tools/testing/selftests/net/netfilter/rpath.sh
+++ b/tools/testing/selftests/net/netfilter/rpath.sh
@@ -1,8 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-# return code to signal skipped test
-ksft_skip=4
+source lib.sh
 
 # search for legacy iptables (it uses the xtables extensions
 if iptables-legacy --version >/dev/null 2>&1; then
@@ -32,17 +31,10 @@ if [ -z "$iptables$ip6tables$nft" ]; then
 	exit $ksft_skip
 fi
 
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-trap "ip netns del $ns1; ip netns del $ns2" EXIT
-
-# create two netns, disable rp_filter in ns2 and
-# keep IPv6 address when moving into VRF
-ip netns add "$ns1"
-ip netns add "$ns2"
-ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0
+trap cleanup_all_ns EXIT
+
+# create two netns, keep IPv6 address when moving into VRF
+setup_ns ns1 ns2
 ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1
 
 # a standard connection between the netns, should not trigger rp filter
diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c
index 21bb1cfd8a85..b282d1785c9b 100644
--- a/tools/testing/selftests/net/netfilter/sctp_collision.c
+++ b/tools/testing/selftests/net/netfilter/sctp_collision.c
@@ -9,9 +9,10 @@
 int main(int argc, char *argv[])
 {
 	struct sockaddr_in saddr = {}, daddr = {};
-	int sd, ret, len = sizeof(daddr);
+	socklen_t len = sizeof(daddr);
 	struct timeval tv = {25, 0};
 	char buf[] = "hello";
+	int sd, ret;
 
 	if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
 		printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n",
diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c
new file mode 100644
index 000000000000..79de163d61ab
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/udpclash.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Usage: ./udpclash <IP> <PORT>
+ *
+ * Emit THREAD_COUNT UDP packets sharing the same saddr:daddr pair.
+ *
+ * This mimics DNS resolver libraries that emit A and AAAA requests
+ * in parallel.
+ *
+ * This exercises conntrack clash resolution logic added and later
+ * refined in
+ *
+ *  71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race")
+ *  ed07d9a021df ("netfilter: nf_conntrack: resolve clash for matching conntracks")
+ *  6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries")
+ */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <pthread.h>
+
+#define THREAD_COUNT 128
+
+struct thread_args {
+	const struct sockaddr_in *si_remote;
+	int sockfd;
+};
+
+static volatile int wait = 1;
+
+static void *thread_main(void *varg)
+{
+	const struct sockaddr_in *si_remote;
+	const struct thread_args *args = varg;
+	static const char msg[] = "foo";
+
+	si_remote = args->si_remote;
+
+	while (wait == 1)
+		;
+
+	if (sendto(args->sockfd, msg, strlen(msg), MSG_NOSIGNAL,
+		   (struct sockaddr *)si_remote, sizeof(*si_remote)) < 0)
+		exit(111);
+
+	return varg;
+}
+
+static int run_test(int fd, const struct sockaddr_in *si_remote)
+{
+	struct thread_args thread_args = {
+		.si_remote = si_remote,
+		.sockfd = fd,
+	};
+	pthread_t *tid = calloc(THREAD_COUNT, sizeof(pthread_t));
+	unsigned int repl_count = 0, timeout = 0;
+	int i;
+
+	if (!tid) {
+		perror("calloc");
+		return 1;
+	}
+
+	for (i = 0; i < THREAD_COUNT; i++) {
+		int err = pthread_create(&tid[i], NULL, &thread_main, &thread_args);
+
+		if (err != 0) {
+			perror("pthread_create");
+			exit(1);
+		}
+	}
+
+	wait = 0;
+
+	for (i = 0; i < THREAD_COUNT; i++)
+		pthread_join(tid[i], NULL);
+
+	while (repl_count < THREAD_COUNT) {
+		struct sockaddr_in si_repl;
+		socklen_t si_repl_len = sizeof(si_repl);
+		char repl[512];
+		ssize_t ret;
+
+		ret = recvfrom(fd, repl, sizeof(repl), MSG_NOSIGNAL,
+			       (struct sockaddr *) &si_repl, &si_repl_len);
+		if (ret < 0) {
+			if (timeout++ > 5000) {
+				fputs("timed out while waiting for reply from thread\n", stderr);
+				break;
+			}
+
+			/* give reply time to pass though the stack */
+			usleep(1000);
+			continue;
+		}
+
+		if (si_repl_len != sizeof(*si_remote)) {
+			fprintf(stderr, "warning: reply has unexpected repl_len %d vs %d\n",
+				(int)si_repl_len, (int)sizeof(si_repl));
+		} else if (si_remote->sin_addr.s_addr != si_repl.sin_addr.s_addr ||
+			si_remote->sin_port != si_repl.sin_port) {
+			char a[64], b[64];
+
+			inet_ntop(AF_INET, &si_remote->sin_addr, a, sizeof(a));
+			inet_ntop(AF_INET, &si_repl.sin_addr, b, sizeof(b));
+
+			fprintf(stderr, "reply from wrong source: want %s:%d got %s:%d\n",
+				a, ntohs(si_remote->sin_port), b, ntohs(si_repl.sin_port));
+		}
+
+		repl_count++;
+	}
+
+	printf("got %d of %d replies\n", repl_count, THREAD_COUNT);
+
+	free(tid);
+
+	return repl_count == THREAD_COUNT ? 0 : 1;
+}
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_in si_local = {
+		.sin_family = AF_INET,
+	};
+	struct sockaddr_in si_remote = {
+		.sin_family = AF_INET,
+	};
+	int fd, ret;
+
+	if (argc < 3) {
+		fputs("Usage: send_udp <daddr> <dport>\n", stderr);
+		return 1;
+	}
+
+	si_remote.sin_port = htons(atoi(argv[2]));
+	si_remote.sin_addr.s_addr = inet_addr(argv[1]);
+
+	fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, IPPROTO_UDP);
+	if (fd < 0) {
+		perror("socket");
+		return 1;
+	}
+
+	if (bind(fd, (struct sockaddr *)&si_local, sizeof(si_local)) < 0) {
+		perror("bind");
+		return 1;
+	}
+
+	ret = run_test(fd, &si_remote);
+
+	close(fd);
+
+	return ret;
+}
diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c
index 07423f256f96..51129c564d0a 100644
--- a/tools/testing/selftests/net/netlink-dumps.c
+++ b/tools/testing/selftests/net/netlink-dumps.c
@@ -18,7 +18,7 @@
 #include <linux/mqueue.h>
 #include <linux/rtnetlink.h>
 
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 #include <ynl.h>
 
@@ -31,9 +31,18 @@ struct ext_ack {
 	const char *str;
 };
 
-/* 0: no done, 1: done found, 2: extack found, -1: error */
-static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea)
+enum get_ea_ret {
+	ERROR = -1,
+	NO_CTRL = 0,
+	FOUND_DONE,
+	FOUND_ERR,
+	FOUND_EXTACK,
+};
+
+static enum get_ea_ret
+nl_get_extack(char *buf, size_t n, struct ext_ack *ea)
 {
+	enum get_ea_ret ret = NO_CTRL;
 	const struct nlmsghdr *nlh;
 	const struct nlattr *attr;
 	ssize_t rem;
@@ -41,15 +50,19 @@ static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea)
 	for (rem = n; rem > 0; NLMSG_NEXT(nlh, rem)) {
 		nlh = (struct nlmsghdr *)&buf[n - rem];
 		if (!NLMSG_OK(nlh, rem))
-			return -1;
+			return ERROR;
 
-		if (nlh->nlmsg_type != NLMSG_DONE)
+		if (nlh->nlmsg_type == NLMSG_ERROR)
+			ret = FOUND_ERR;
+		else if (nlh->nlmsg_type == NLMSG_DONE)
+			ret = FOUND_DONE;
+		else
 			continue;
 
 		ea->err = -*(int *)NLMSG_DATA(nlh);
 
 		if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
-			return 1;
+			return ret;
 
 		ynl_attr_for_each(attr, nlh, sizeof(int)) {
 			switch (ynl_attr_type(attr)) {
@@ -68,10 +81,10 @@ static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea)
 			}
 		}
 
-		return 2;
+		return FOUND_EXTACK;
 	}
 
-	return 0;
+	return ret;
 }
 
 static const struct {
@@ -99,9 +112,9 @@ static const struct {
 TEST(dump_extack)
 {
 	int netlink_sock;
+	int i, cnt, ret;
 	char buf[8192];
 	int one = 1;
-	int i, cnt;
 	ssize_t n;
 
 	netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
@@ -118,7 +131,7 @@ TEST(dump_extack)
 	ASSERT_EQ(n, 0);
 
 	/* Dump so many times we fill up the buffer */
-	cnt = 64;
+	cnt = 80;
 	for (i = 0; i < cnt; i++) {
 		n = send(netlink_sock, &dump_neigh_bad,
 			 sizeof(dump_neigh_bad), 0);
@@ -130,6 +143,7 @@ TEST(dump_extack)
 	EXPECT_EQ(n, -1);
 	EXPECT_EQ(errno, ENOBUFS);
 
+	ret = NO_CTRL;
 	for (i = 0; i < cnt; i++) {
 		struct ext_ack ea = {};
 
@@ -140,10 +154,20 @@ TEST(dump_extack)
 		}
 		ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr));
 
-		EXPECT_EQ(nl_get_extack(buf, n, &ea), 2);
+		ret = nl_get_extack(buf, n, &ea);
+		/* Once we fill the buffer we'll see one ENOBUFS followed
+		 * by a number of EBUSYs. Then the last recv() will finally
+		 * trigger and complete the dump.
+		 */
+		if (ret == FOUND_ERR && (ea.err == ENOBUFS || ea.err == EBUSY))
+			continue;
+		EXPECT_EQ(ret, FOUND_EXTACK);
+		EXPECT_EQ(ea.err, EINVAL);
 		EXPECT_EQ(ea.attr_offs,
 			  sizeof(struct nlmsghdr) + sizeof(struct ndmsg));
 	}
+	/* Make sure last message was a full DONE+extack */
+	EXPECT_EQ(ret, FOUND_EXTACK);
 }
 
 static const struct {
diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh
index 6974474c26f3..38871bdef67f 100755
--- a/tools/testing/selftests/net/netns-name.sh
+++ b/tools/testing/selftests/net/netns-name.sh
@@ -7,10 +7,12 @@ set -o pipefail
 DEV=dummy-dev0
 DEV2=dummy-dev1
 ALT_NAME=some-alt-name
+NSIM_ADDR=2025
 
 RET_CODE=0
 
 cleanup() {
+    cleanup_netdevsim $NSIM_ADDR
     cleanup_ns $NS $test_ns
 }
 
@@ -25,12 +27,15 @@ setup_ns NS test_ns
 
 #
 # Test basic move without a rename
+# Use netdevsim because it has extra asserts for notifiers.
 #
-ip -netns $NS link add name $DEV type dummy || fail
-ip -netns $NS link set dev $DEV netns $test_ns ||
+
+nsim=$(create_netdevsim $NSIM_ADDR $NS)
+ip -netns $NS link set dev $nsim netns $test_ns ||
     fail "Can't perform a netns move"
-ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move"
-ip -netns $test_ns link del $DEV || fail
+ip -netns $test_ns link show dev $nsim >> /dev/null ||
+    fail "Device not found after move"
+cleanup_netdevsim $NSIM_ADDR
 
 #
 # Test move with a conflict
@@ -78,6 +83,16 @@ ip -netns $NS link show dev $ALT_NAME 2> /dev/null &&
     fail "Can still find alt-name after move"
 ip -netns $test_ns link del $DEV || fail
 
+#
+# Test no conflict of the same name/ifindex in different netns
+#
+ip -netns $NS link add name $DEV index 100 type dummy || fail
+ip -netns $NS link add netns $test_ns name $DEV index 100 type dummy ||
+    fail "Can create in netns without moving"
+ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found"
+ip -netns $NS link del $DEV || fail
+ip -netns $test_ns link del $DEV || fail
+
 echo -ne "$(basename $0) \t\t\t\t"
 if [ $RET_CODE -eq 0 ]; then
     echo "[  OK  ]"
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index cd8a58097448..1f5227f3d64d 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len)
 	name[0] = '\0';
 	rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen);
 	if (rc < 0)
-		log_err_errno("setsockopt(SO_BINDTODEVICE)");
+		log_err_errno("getsockopt(SO_BINDTODEVICE)");
 
 	return rc;
 }
@@ -535,7 +535,7 @@ static int set_freebind(int sd, int version)
 		break;
 	case AF_INET6:
 		if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) {
-			log_err_errno("setsockopt(IPV6_FREEBIND");
+			log_err_errno("setsockopt(IPV6_FREEBIND)");
 			rc = -1;
 		}
 		break;
@@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str,
 			sep++;
 			if (str_to_uint(sep, 1, pfx_len_max,
 					&args->prefix_len) != 0) {
-				fprintf(stderr, "Invalid port\n");
+				fprintf(stderr, "Invalid prefix length\n");
 				return 1;
 			}
 		} else {
@@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen,
 		}
 	}
 
-	nfds = interactive ? MAX(fileno(stdin), sd)  + 1 : sd + 1;
+	nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
 	while (1) {
 		FD_ZERO(&rfds);
 		FD_SET(sd, &rfds);
@@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args)
 	sd = socket(args->version, args->type, args->protocol);
 	if (sd < 0) {
 		log_err_errno("Error opening socket");
-		return  -1;
+		return -1;
 	}
 
 	if (set_reuseaddr(sd) != 0)
@@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
 	 * waiting to be told when to continue
 	 */
 	if (read(fd, &buf, sizeof(buf)) <= 0) {
-		log_err_errno("Failed to read IPC status from status");
+		log_err_errno("Failed to read IPC status from pipe");
 		return 1;
 	}
 	if (!buf) {
diff --git a/tools/testing/selftests/net/nk_qlease.py b/tools/testing/selftests/net/nk_qlease.py
new file mode 100755
index 000000000000..a84a73ff4eda
--- /dev/null
+++ b/tools/testing/selftests/net/nk_qlease.py
@@ -0,0 +1,2109 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import errno
+import time
+from lib.py import (
+    ksft_run,
+    ksft_exit,
+    ksft_eq,
+    ksft_ne,
+    ksft_in,
+    ksft_not_in,
+    ksft_raises,
+)
+from lib.py import (
+    NetNS,
+    NetNSEnter,
+    EthtoolFamily,
+    NetdevFamily,
+    RtnlFamily,
+    NetdevSimDev,
+)
+from lib.py import (
+    NlError,
+    Netlink,
+    cmd,
+    defer,
+    ip,
+)
+
+
+def wait_until(cond, timeout=2.0, interval=0.05):
+    deadline = time.monotonic() + timeout
+    while not cond():
+        if time.monotonic() >= deadline:
+            return
+        time.sleep(interval)
+
+
+def create_netkit(rxqueues, mode="l2"):
+    all_links = ip("-d link show", json=True)
+    old_idxs = {
+        link["ifindex"]
+        for link in all_links
+        if link.get("linkinfo", {}).get("info_kind") == "netkit"
+    }
+
+    rtnl = RtnlFamily()
+    rtnl.newlink(
+        {
+            "linkinfo": {
+                "kind": "netkit",
+                "data": {
+                    "mode": mode,
+                    "policy": "forward",
+                    "peer-policy": "forward",
+                },
+            },
+            "num-rx-queues": rxqueues,
+        },
+        flags=[Netlink.NLM_F_CREATE, Netlink.NLM_F_EXCL],
+    )
+
+    all_links = ip("-d link show", json=True)
+    nk_links = [
+        link
+        for link in all_links
+        if link.get("linkinfo", {}).get("info_kind") == "netkit"
+        and link["ifindex"] not in old_idxs
+    ]
+    nk_links.sort(key=lambda x: x["ifindex"])
+    return (
+        nk_links[1]["ifname"],
+        nk_links[1]["ifindex"],
+        nk_links[0]["ifname"],
+        nk_links[0]["ifindex"],
+    )
+
+
+def create_netkit_single(rxqueues):
+    rtnl = RtnlFamily()
+    rtnl.newlink(
+        {
+            "linkinfo": {
+                "kind": "netkit",
+                "data": {
+                    "mode": "l2",
+                    "pairing": "single",
+                },
+            },
+            "num-rx-queues": rxqueues,
+        },
+        flags=[Netlink.NLM_F_CREATE, Netlink.NLM_F_EXCL],
+    )
+
+    all_links = ip("-d link show", json=True)
+    nk_links = [
+        link
+        for link in all_links
+        if link.get("linkinfo", {}).get("info_kind") == "netkit"
+        and "UP" not in link.get("flags", [])
+    ]
+    return nk_links[0]["ifname"], nk_links[0]["ifindex"]
+
+
+def test_remove_phys(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    src_queue = 1
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        result = netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        nk_queue_id = result["id"]
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_eq(queue_info["lease"]["ifindex"], nk_guest_idx)
+    ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id)
+
+    nsimdev.remove()
+    wait_until(lambda: cmd(f"ip link show dev {nk_host}", fail=False).ret != 0)
+    ret = cmd(f"ip link show dev {nk_host}", fail=False)
+    ksft_ne(ret.ret, 0)
+
+
+def test_double_lease(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=3)
+    defer(cmd, f"ip link del dev {nk_host}")
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    src_queue = 1
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        result = netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        ksft_eq(result["id"], 1)
+
+        with ksft_raises(NlError) as e:
+            netdevnl.queue_create(
+                {
+                    "ifindex": nk_guest_idx,
+                    "type": "rx",
+                    "lease": {
+                        "ifindex": nsim.ifindex,
+                        "queue": {"id": src_queue, "type": "rx"},
+                        "netns-id": 0,
+                    },
+                }
+            )
+        ksft_eq(e.exception.nl_msg.error, -errno.EBUSY)
+
+
+def test_virtual_lessor(netns) -> None:
+    nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host_a}")
+    ip(f"link set dev {nk_host_a} up")
+    ip(f"link set dev {nk_guest_a} up")
+
+    nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host_b}")
+
+    ip(f"link set dev {nk_guest_b} netns {netns.name}")
+    ip(f"link set dev {nk_host_b} up")
+    ip(f"link set dev {nk_guest_b} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        with ksft_raises(NlError) as e:
+            netdevnl.queue_create(
+                {
+                    "ifindex": nk_guest_b_idx,
+                    "type": "rx",
+                    "lease": {
+                        "ifindex": nk_guest_a_idx,
+                        "queue": {"id": 0, "type": "rx"},
+                        "netns-id": 0,
+                    },
+                }
+            )
+        ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+
+def test_phys_lessee(_netns) -> None:
+    nsimdev_a = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev_a.remove)
+    nsim_a = nsimdev_a.nsims[0]
+    ip(f"link set dev {nsim_a.ifname} up")
+
+    nsimdev_b = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev_b.remove)
+    nsim_b = nsimdev_b.nsims[0]
+    ip(f"link set dev {nsim_b.ifname} up")
+
+    netdevnl = NetdevFamily()
+    with ksft_raises(NlError) as e:
+        netdevnl.queue_create(
+            {
+                "ifindex": nsim_a.ifindex,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim_b.ifindex,
+                    "queue": {"id": 0, "type": "rx"},
+                },
+            }
+        )
+    ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+
+def test_different_lessors(netns) -> None:
+    nsimdev_a = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev_a.remove)
+    nsim_a = nsimdev_a.nsims[0]
+    ip(f"link set dev {nsim_a.ifname} up")
+
+    nsimdev_b = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev_b.remove)
+    nsim_b = nsimdev_b.nsims[0]
+    ip(f"link set dev {nsim_b.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=3)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim_a.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+        with ksft_raises(NlError) as e:
+            netdevnl.queue_create(
+                {
+                    "ifindex": nk_guest_idx,
+                    "type": "rx",
+                    "lease": {
+                        "ifindex": nsim_b.ifindex,
+                        "queue": {"id": 1, "type": "rx"},
+                        "netns-id": 0,
+                    },
+                }
+            )
+        ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP)
+
+
+def test_queue_out_of_range(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        with ksft_raises(NlError) as e:
+            netdevnl.queue_create(
+                {
+                    "ifindex": nk_guest_idx,
+                    "type": "rx",
+                    "lease": {
+                        "ifindex": nsim.ifindex,
+                        "queue": {"id": 2, "type": "rx"},
+                        "netns-id": 0,
+                    },
+                }
+            )
+        ksft_eq(e.exception.nl_msg.error, -errno.ERANGE)
+
+
+def test_resize_leased(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    ethnl = EthtoolFamily()
+    with ksft_raises(NlError) as e:
+        ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1})
+    ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+
+def test_self_lease(_netns) -> None:
+    nk_host, _, _, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    netdevnl = NetdevFamily()
+    with ksft_raises(NlError) as e:
+        netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nk_guest_idx,
+                    "queue": {"id": 0, "type": "rx"},
+                },
+            }
+        )
+    ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+
+def test_veth_queue_create(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    ip("link add veth0 type veth peer name veth1")
+    defer(cmd, "ip link del dev veth0", fail=False)
+
+    all_links = ip("-d link show", json=True)
+    veth_peer = [
+        link
+        for link in all_links
+        if link.get("ifname") == "veth1"
+    ]
+    veth_peer_idx = veth_peer[0]["ifindex"]
+
+    ip(f"link set dev veth1 netns {netns.name}")
+    ip("link set dev veth0 up")
+    ip("link set dev veth1 up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        with ksft_raises(NlError) as e:
+            netdevnl.queue_create(
+                {
+                    "ifindex": veth_peer_idx,
+                    "type": "rx",
+                    "lease": {
+                        "ifindex": nsim.ifindex,
+                        "queue": {"id": 1, "type": "rx"},
+                        "netns-id": 0,
+                    },
+                }
+            )
+        ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+
+def test_create_tx_type(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        with ksft_raises(NlError) as e:
+            netdevnl.queue_create(
+                {
+                    "ifindex": nk_guest_idx,
+                    "type": "tx",
+                    "lease": {
+                        "ifindex": nsim.ifindex,
+                        "queue": {"id": 1, "type": "rx"},
+                        "netns-id": 0,
+                    },
+                }
+            )
+        ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+
+def test_create_primary(_netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, nk_host_idx, _, _ = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_host} up")
+
+    netdevnl = NetdevFamily()
+    with ksft_raises(NlError) as e:
+        netdevnl.queue_create(
+            {
+                "ifindex": nk_host_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                },
+            }
+        )
+    ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP)
+
+
+def test_create_limit(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=1)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        with ksft_raises(NlError) as e:
+            netdevnl.queue_create(
+                {
+                    "ifindex": nk_guest_idx,
+                    "type": "rx",
+                    "lease": {
+                        "ifindex": nsim.ifindex,
+                        "queue": {"id": 1, "type": "rx"},
+                        "netns-id": 0,
+                    },
+                }
+            )
+        ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+
+def test_link_flap_phys(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}")
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    src_queue = 1
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        result = netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        nk_queue_id = result["id"]
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id)
+
+    # Link flap the physical device
+    ip(f"link set dev {nsim.ifname} down")
+    ip(f"link set dev {nsim.ifname} up")
+
+    # Verify lease survives the flap
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id)
+
+
+def test_queue_get_virtual(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}")
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    src_queue = 1
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        result = netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        nk_queue_id = result["id"]
+
+        # queue-get on virtual device's leased queue should not show lease
+        # info (lease info is only shown from the physical device's side)
+        queue_info = netdevnl.queue_get(
+            {"ifindex": nk_guest_idx, "id": nk_queue_id, "type": "rx"}
+        )
+        ksft_eq(queue_info["id"], nk_queue_id)
+        ksft_eq(queue_info["ifindex"], nk_guest_idx)
+        ksft_not_in("lease", queue_info)
+
+        # Default queue (not leased) also has no lease info
+        queue_info = netdevnl.queue_get(
+            {"ifindex": nk_guest_idx, "id": 0, "type": "rx"}
+        )
+        ksft_not_in("lease", queue_info)
+
+
+def test_remove_virt_first(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    src_queue = 1
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        result = netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        ksft_eq(result["id"], 1)
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_eq(queue_info["lease"]["queue"]["id"], result["id"])
+
+    # Delete netkit (virtual device removed first, physical stays)
+    cmd(f"ip link del dev {nk_host}")
+
+    # Verify lease is cleaned up on physical device
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_not_in("lease", queue_info)
+
+
+def test_multiple_leases(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=3)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=4)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        r1 = netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        r2 = netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 2, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    ksft_eq(r1["id"], 1)
+    ksft_eq(r2["id"], 2)
+
+    # Verify both leases visible on physical device
+    netdevnl = NetdevFamily()
+    q1 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    q2 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 2, "type": "rx"}
+    )
+    ksft_in("lease", q1)
+    ksft_in("lease", q2)
+    ksft_eq(q1["lease"]["ifindex"], nk_guest_idx)
+    ksft_eq(q2["lease"]["ifindex"], nk_guest_idx)
+    ksft_eq(q1["lease"]["queue"]["id"], r1["id"])
+    ksft_eq(q2["lease"]["queue"]["id"], r2["id"])
+
+
+def test_lease_queue_tx_type(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        with ksft_raises(NlError) as e:
+            netdevnl.queue_create(
+                {
+                    "ifindex": nk_guest_idx,
+                    "type": "rx",
+                    "lease": {
+                        "ifindex": nsim.ifindex,
+                        "queue": {"id": 1, "type": "tx"},
+                        "netns-id": 0,
+                    },
+                }
+            )
+        ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+
+def test_invalid_netns(netns) -> None:
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        with ksft_raises(NlError) as e:
+            netdevnl.queue_create(
+                {
+                    "ifindex": nk_guest_idx,
+                    "type": "rx",
+                    "lease": {
+                        "ifindex": 1,
+                        "queue": {"id": 0, "type": "rx"},
+                        "netns-id": 999,
+                    },
+                }
+            )
+        ksft_eq(e.exception.nl_msg.error, -errno.ENONET)
+
+
+def test_invalid_phys_ifindex(netns) -> None:
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        with ksft_raises(NlError) as e:
+            netdevnl.queue_create(
+                {
+                    "ifindex": nk_guest_idx,
+                    "type": "rx",
+                    "lease": {
+                        "ifindex": 99999,
+                        "queue": {"id": 0, "type": "rx"},
+                        "netns-id": 0,
+                    },
+                }
+            )
+        ksft_eq(e.exception.nl_msg.error, -errno.ENODEV)
+
+
+def test_multi_netkit_remove_phys(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=3)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    # Create two netkit pairs, each leasing a different physical queue
+    nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host_a}", fail=False)
+
+    nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host_b}", fail=False)
+
+    ip(f"link set dev {nk_guest_a} netns {netns.name}")
+    ip(f"link set dev {nk_host_a} up")
+    ip(f"link set dev {nk_guest_a} up", ns=netns)
+
+    ip(f"link set dev {nk_guest_b} netns {netns.name}")
+    ip(f"link set dev {nk_host_b} up")
+    ip(f"link set dev {nk_guest_b} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_a_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_b_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 2, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    # Removing the physical device should take down both netkit pairs
+    nsimdev.remove()
+    wait_until(lambda: cmd(f"ip link show dev {nk_host_a}", fail=False).ret != 0
+                       and cmd(f"ip link show dev {nk_host_b}", fail=False).ret != 0)
+    ret = cmd(f"ip link show dev {nk_host_a}", fail=False)
+    ksft_ne(ret.ret, 0)
+    ret = cmd(f"ip link show dev {nk_host_b}", fail=False)
+    ksft_ne(ret.ret, 0)
+
+
+def test_single_remove_phys(_netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_name, nk_idx = create_netkit_single(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_name}", fail=False)
+
+    ip(f"link set dev {nk_name} up")
+
+    netdevnl = NetdevFamily()
+    netdevnl.queue_create(
+        {
+            "ifindex": nk_idx,
+            "type": "rx",
+            "lease": {
+                "ifindex": nsim.ifindex,
+                "queue": {"id": 1, "type": "rx"},
+            },
+        }
+    )
+
+    # Removing the physical device should take down the single netkit device
+    nsimdev.remove()
+    wait_until(lambda: cmd(f"ip link show dev {nk_name}", fail=False).ret != 0)
+    ret = cmd(f"ip link show dev {nk_name}", fail=False)
+    ksft_ne(ret.ret, 0)
+
+
+def test_link_flap_virt(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}")
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    src_queue = 1
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        result = netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        nk_queue_id = result["id"]
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id)
+
+    # Link flap the virtual (netkit) device
+    ip(f"link set dev {nk_guest} down", ns=netns)
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    # Verify lease survives the virtual device flap
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id)
+
+
+def test_phys_queue_no_lease(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}")
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    # Physical queue 0 (not leased) should have no lease info
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 0, "type": "rx"}
+    )
+    ksft_not_in("lease", queue_info)
+
+    # Physical queue 1 (leased) should have lease info
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+
+
+def test_same_ns_lease(_netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_name, nk_idx = create_netkit_single(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_name}", fail=False)
+
+    ip(f"link set dev {nk_name} up")
+
+    netdevnl = NetdevFamily()
+    result = netdevnl.queue_create(
+        {
+            "ifindex": nk_idx,
+            "type": "rx",
+            "lease": {
+                "ifindex": nsim.ifindex,
+                "queue": {"id": 1, "type": "rx"},
+            },
+        }
+    )
+    ksft_eq(result["id"], 1)
+
+    # Same namespace: lease info should NOT have netns-id
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_eq(queue_info["lease"]["ifindex"], nk_idx)
+    ksft_eq(queue_info["lease"]["queue"]["id"], result["id"])
+    ksft_not_in("netns-id", queue_info["lease"])
+
+
+def test_resize_after_unlease(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    # Resize should fail while lease is active
+    ethnl = EthtoolFamily()
+    with ksft_raises(NlError) as e:
+        ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1})
+    ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+    # Delete netkit, clearing the lease
+    cmd(f"ip link del dev {nk_host}")
+
+    # Resize should now succeed
+    ethnl.channels_set({"header": {"dev-index": nsim.ifindex}, "combined-count": 1})
+
+
+def test_lease_queue_zero(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        result = netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 0, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        ksft_eq(result["id"], 1)
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 0, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_eq(queue_info["lease"]["queue"]["id"], result["id"])
+
+
+def test_release_and_reuse(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    src_queue = 1
+
+    # First lease
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+
+    # Delete netkit, freeing the lease
+    cmd(f"ip link del dev {nk_host}")
+
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_not_in("lease", queue_info)
+
+    # Re-create netkit and lease the same physical queue again
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)):
+        netdevnl = NetdevFamily()
+        result = netdevnl.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        ksft_eq(result["id"], 1)
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_eq(queue_info["lease"]["queue"]["id"], result["id"])
+
+
+def test_two_netkits_same_queue(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host_a}", fail=False)
+
+    nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host_b}", fail=False)
+
+    ip(f"link set dev {nk_guest_a} netns {netns.name}")
+    ip(f"link set dev {nk_host_a} up")
+    ip(f"link set dev {nk_guest_a} up", ns=netns)
+
+    ip(f"link set dev {nk_guest_b} netns {netns.name}")
+    ip(f"link set dev {nk_host_b} up")
+    ip(f"link set dev {nk_guest_b} up", ns=netns)
+
+    src_queue = 1
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_a_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+        with ksft_raises(NlError) as e:
+            netdevnl_ns.queue_create(
+                {
+                    "ifindex": nk_guest_b_idx,
+                    "type": "rx",
+                    "lease": {
+                        "ifindex": nsim.ifindex,
+                        "queue": {"id": src_queue, "type": "rx"},
+                        "netns-id": 0,
+                    },
+                }
+            )
+        ksft_eq(e.exception.nl_msg.error, -errno.EBUSY)
+
+
+def test_l3_mode_lease(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2, mode="l3")
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    src_queue = 1
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        result = netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        ksft_eq(result["id"], 1)
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_eq(queue_info["lease"]["ifindex"], nk_guest_idx)
+    ksft_eq(queue_info["lease"]["queue"]["id"], result["id"])
+
+
+def test_single_double_lease(_netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_name, nk_idx = create_netkit_single(rxqueues=3)
+    defer(cmd, f"ip link del dev {nk_name}", fail=False)
+
+    ip(f"link set dev {nk_name} up")
+
+    netdevnl = NetdevFamily()
+    result = netdevnl.queue_create(
+        {
+            "ifindex": nk_idx,
+            "type": "rx",
+            "lease": {
+                "ifindex": nsim.ifindex,
+                "queue": {"id": 1, "type": "rx"},
+            },
+        }
+    )
+    ksft_eq(result["id"], 1)
+
+    with ksft_raises(NlError) as e:
+        netdevnl.queue_create(
+            {
+                "ifindex": nk_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                },
+            }
+        )
+    ksft_eq(e.exception.nl_msg.error, -errno.EBUSY)
+
+
+def test_single_different_lessors(_netns) -> None:
+    nsimdev_a = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev_a.remove)
+    nsim_a = nsimdev_a.nsims[0]
+    ip(f"link set dev {nsim_a.ifname} up")
+
+    nsimdev_b = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev_b.remove)
+    nsim_b = nsimdev_b.nsims[0]
+    ip(f"link set dev {nsim_b.ifname} up")
+
+    nk_name, nk_idx = create_netkit_single(rxqueues=3)
+    defer(cmd, f"ip link del dev {nk_name}", fail=False)
+
+    ip(f"link set dev {nk_name} up")
+
+    netdevnl = NetdevFamily()
+    netdevnl.queue_create(
+        {
+            "ifindex": nk_idx,
+            "type": "rx",
+            "lease": {
+                "ifindex": nsim_a.ifindex,
+                "queue": {"id": 1, "type": "rx"},
+            },
+        }
+    )
+
+    with ksft_raises(NlError) as e:
+        netdevnl.queue_create(
+            {
+                "ifindex": nk_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim_b.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                },
+            }
+        )
+    ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP)
+
+
+def test_cross_ns_netns_id(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    src_queue = 1
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_in("netns-id", queue_info["lease"])
+
+
+def test_delete_guest_netns(_netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    test_ns = NetNS()
+    ip("netns set init 0", ns=test_ns)
+    ip("link set lo up", ns=test_ns)
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {test_ns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=test_ns)
+
+    src_queue = 1
+    with NetNSEnter(str(test_ns)), NetdevFamily() as netdevnl_ns:
+        netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+
+    del test_ns
+    wait_until(lambda: "lease" not in netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}))
+
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_not_in("lease", queue_info)
+
+    ret = cmd(f"ip link show dev {nk_host}", fail=False)
+    ksft_ne(ret.ret, 0)
+
+
+def test_move_guest_netns(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    src_queue = 1
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        result = netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        nk_queue_id = result["id"]
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id)
+
+    new_ns = NetNS()
+    defer(new_ns.__del__)
+    ip(f"link set dev {nk_guest} netns {new_ns.name}", ns=netns)
+
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id)
+
+
+def test_resize_phys_no_reduction(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    ethnl = EthtoolFamily()
+    ethnl.channels_set(
+        {"header": {"dev-index": nsim.ifindex}, "combined-count": 2}
+    )
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+
+
+def test_delete_one_netkit_of_two(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=3)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host_a}", fail=False)
+
+    nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host_b}", fail=False)
+
+    ip(f"link set dev {nk_guest_a} netns {netns.name}")
+    ip(f"link set dev {nk_host_a} up")
+    ip(f"link set dev {nk_guest_a} up", ns=netns)
+
+    ip(f"link set dev {nk_guest_b} netns {netns.name}")
+    ip(f"link set dev {nk_host_b} up")
+    ip(f"link set dev {nk_guest_b} up", ns=netns)
+
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_a_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_b_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 2, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    netdevnl = NetdevFamily()
+    q1 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    q2 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 2, "type": "rx"}
+    )
+    ksft_in("lease", q1)
+    ksft_in("lease", q2)
+
+    cmd(f"ip link del dev {nk_host_a}")
+
+    q1 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    q2 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 2, "type": "rx"}
+    )
+    ksft_not_in("lease", q1)
+    ksft_in("lease", q2)
+
+
+def test_bind_rx_leased_phys_queue(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    netdevnl = NetdevFamily()
+    with ksft_raises(NlError) as e:
+        netdevnl.bind_rx(
+            {
+                "ifindex": nsim.ifindex,
+                "fd": 0,
+                "queues": [
+                    {"id": 0, "type": "rx"},
+                    {"id": 1, "type": "rx"},
+                ],
+            }
+        )
+    ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP)
+
+
+def test_resize_phys_shrink_past_leased(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=4)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    ethnl = EthtoolFamily()
+
+    # Shrink past the leased queue — only queue 3 removed, queue 1 untouched
+    ethnl.channels_set(
+        {"header": {"dev-index": nsim.ifindex}, "combined-count": 3}
+    )
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+
+    # Shrink further — queue 2 removed, queue 1 still untouched
+    ethnl.channels_set(
+        {"header": {"dev-index": nsim.ifindex}, "combined-count": 2}
+    )
+
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+
+    # Shrink into the leased queue — queue 1 is busy, must fail
+    with ksft_raises(NlError) as e:
+        ethnl.channels_set(
+            {"header": {"dev-index": nsim.ifindex}, "combined-count": 1}
+        )
+    ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+
+def test_resize_virt_not_supported(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, nk_host_idx, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    # Channel resize on the netkit host must fail — not supported
+    ethnl = EthtoolFamily()
+    with ksft_raises(NlError) as e:
+        ethnl.channels_set(
+            {"header": {"dev-index": nk_host_idx}, "combined-count": 1}
+        )
+    ksft_eq(e.exception.nl_msg.error, -errno.EOPNOTSUPP)
+
+    # Lease must be intact
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+
+
+def test_lease_devices_down(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+
+    # Create lease while both physical and virtual devices are down
+    src_queue = 1
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        result = netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        ksft_eq(result["id"], 1)
+
+    # Bring devices up before queue_get: netdevsim only instantiates NAPIs in
+    # ndo_open, and netdev-genl queue_get returns -ENOENT without a NAPI.
+    ip(f"link set dev {nsim.ifname} up")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+    ksft_eq(queue_info["lease"]["queue"]["id"], result["id"])
+
+
+def test_lease_capacity_exhaustion(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=4)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    # rxqueues=3 means num_rx_queues=3, real_num_rx_queues starts at 1.
+    # Can create 2 leased queues (real goes 1->2->3) but not a 3rd (3->4 > 3).
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=3)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        r1 = netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        ksft_eq(r1["id"], 1)
+
+        r2 = netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 2, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        ksft_eq(r2["id"], 2)
+
+        # Third lease fails — netkit queue capacity exhausted
+        with ksft_raises(NlError) as e:
+            netdevnl_ns.queue_create(
+                {
+                    "ifindex": nk_guest_idx,
+                    "type": "rx",
+                    "lease": {
+                        "ifindex": nsim.ifindex,
+                        "queue": {"id": 3, "type": "rx"},
+                        "netns-id": 0,
+                    },
+                }
+            )
+        ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+    # Verify the two successful leases are intact
+    netdevnl = NetdevFamily()
+    q1 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    q2 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 2, "type": "rx"}
+    )
+    ksft_in("lease", q1)
+    ksft_in("lease", q2)
+
+
+def test_resize_phys_up(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=3)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    # Shrink nsim first so we have room to grow
+    ethnl = EthtoolFamily()
+    ethnl.channels_set(
+        {"header": {"dev-index": nsim.ifindex}, "combined-count": 2}
+    )
+
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    # Grow channels — should succeed since leased queue is not removed
+    ethnl.channels_set(
+        {"header": {"dev-index": nsim.ifindex}, "combined-count": 3}
+    )
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+
+    # New queue 2 should exist without a lease
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 2, "type": "rx"}
+    )
+    ksft_not_in("lease", queue_info)
+
+
+def test_multi_ns_lease(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=3)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    ns_b = NetNS()
+    defer(ns_b.__del__)
+    ip("netns set init 0", ns=ns_b)
+    ip("link set lo up", ns=ns_b)
+
+    # First netkit pair, guest in netns
+    nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host_a}", fail=False)
+    ip(f"link set dev {nk_guest_a} netns {netns.name}")
+    ip(f"link set dev {nk_host_a} up")
+    ip(f"link set dev {nk_guest_a} up", ns=netns)
+
+    # Second netkit pair, guest in ns_b
+    nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host_b}", fail=False)
+    ip(f"link set dev {nk_guest_b} netns {ns_b.name}")
+    ip(f"link set dev {nk_host_b} up")
+    ip(f"link set dev {nk_guest_b} up", ns=ns_b)
+
+    # Lease from netns
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        result = netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_a_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        ksft_eq(result["id"], 1)
+
+    # Lease from ns_b (different namespace, same physical device)
+    with NetNSEnter(str(ns_b)), NetdevFamily() as netdevnl_ns:
+        result = netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_b_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 2, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+        ksft_eq(result["id"], 1)
+
+    # Verify both leases from the physical side
+    netdevnl = NetdevFamily()
+    q1 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    q2 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 2, "type": "rx"}
+    )
+    ksft_in("lease", q1)
+    ksft_in("lease", q2)
+    ksft_eq(q1["lease"]["ifindex"], nk_guest_a_idx)
+    ksft_eq(q2["lease"]["ifindex"], nk_guest_b_idx)
+
+
+def test_multi_ns_delete_one(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=3)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    ns_b = NetNS()
+    ip("netns set init 0", ns=ns_b)
+    ip("link set lo up", ns=ns_b)
+
+    # First netkit pair, guest in netns (ns_a)
+    nk_host_a, _, nk_guest_a, nk_guest_a_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host_a}", fail=False)
+    ip(f"link set dev {nk_guest_a} netns {netns.name}")
+    ip(f"link set dev {nk_host_a} up")
+    ip(f"link set dev {nk_guest_a} up", ns=netns)
+
+    # Second netkit pair, guest in ns_b
+    nk_host_b, _, nk_guest_b, nk_guest_b_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host_b}", fail=False)
+
+    ip(f"link set dev {nk_guest_b} netns {ns_b.name}")
+    ip(f"link set dev {nk_host_b} up")
+    ip(f"link set dev {nk_guest_b} up", ns=ns_b)
+
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_a_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 1, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    with NetNSEnter(str(ns_b)), NetdevFamily() as netdevnl_ns:
+        netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_b_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": 2, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )
+
+    netdevnl = NetdevFamily()
+    q1 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    q2 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 2, "type": "rx"}
+    )
+    ksft_in("lease", q1)
+    ksft_in("lease", q2)
+
+    # Delete ns_b — destroys nk_guest_b, triggers unlease of queue 2
+    del ns_b
+    wait_until(lambda: "lease" not in netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 2, "type": "rx"}))
+
+    # ns_a's lease on queue 1 must survive
+    q1 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 1, "type": "rx"}
+    )
+    ksft_in("lease", q1)
+    ksft_eq(q1["lease"]["ifindex"], nk_guest_a_idx)
+
+    # ns_b's lease on queue 2 must be gone
+    q2 = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": 2, "type": "rx"}
+    )
+    ksft_not_in("lease", q2)
+
+    # nk_host_b should be gone too (phys removal cascades to netkit pair)
+    ret = cmd(f"ip link show dev {nk_host_b}", fail=False)
+    ksft_ne(ret.ret, 0)
+
+
+def test_move_phys_netns(netns) -> None:
+    nsimdev = NetdevSimDev(port_count=1, queue_count=2)
+    defer(nsimdev.remove)
+    nsim = nsimdev.nsims[0]
+    ip(f"link set dev {nsim.ifname} up")
+
+    nk_host, _, nk_guest, nk_guest_idx = create_netkit(rxqueues=2)
+    defer(cmd, f"ip link del dev {nk_host}", fail=False)
+
+    ip(f"link set dev {nk_guest} netns {netns.name}")
+    ip(f"link set dev {nk_host} up")
+    ip(f"link set dev {nk_guest} up", ns=netns)
+
+    src_queue = 1
+    with NetNSEnter(str(netns)), NetdevFamily() as netdevnl_ns:
+        nk_queue_id = netdevnl_ns.queue_create(
+            {
+                "ifindex": nk_guest_idx,
+                "type": "rx",
+                "lease": {
+                    "ifindex": nsim.ifindex,
+                    "queue": {"id": src_queue, "type": "rx"},
+                    "netns-id": 0,
+                },
+            }
+        )["id"]
+
+    netdevnl = NetdevFamily()
+    queue_info = netdevnl.queue_get(
+        {"ifindex": nsim.ifindex, "id": src_queue, "type": "rx"}
+    )
+    ksft_in("lease", queue_info)
+
+    # Move the physical device to a new namespace. Move it back to init_net
+    # on cleanup before the other defers fire (new_ns deletion, nsimdev.remove)
+    # so nsim lives in a stable namespace when they run.
+    new_ns = NetNS()
+    defer(new_ns.__del__)
+    ip(f"link set dev {nsim.ifname} netns {new_ns.name}")
+    defer(ip, f"link set dev {nsim.ifname} netns init", ns=new_ns)
+
+    # Physical device is now in new_ns — find its ifindex there
+    all_links = ip("-d link show", json=True, ns=new_ns)
+    nsim_in_new = [lnk for lnk in all_links if lnk.get("ifname") == nsim.ifname]
+    new_ifindex = nsim_in_new[0]["ifindex"]
+
+    # Moving a device across netns brings it admin-down; bring it back up so
+    # netdevsim re-creates the NAPI (netdev-genl queue_get needs it).
+    ip(f"link set dev {nsim.ifname} up", ns=new_ns)
+
+    # Verify lease survived the namespace move
+    with NetNSEnter(str(new_ns)), NetdevFamily() as netdevnl_ns:
+        queue_info = netdevnl_ns.queue_get(
+            {"ifindex": new_ifindex, "id": src_queue, "type": "rx"}
+        )
+        ksft_in("lease", queue_info)
+        ksft_eq(queue_info["lease"]["queue"]["id"], nk_queue_id)
+
+
+def main() -> None:
+    netns = NetNS()
+    cmd("ip netns attach init 1")
+    ip("netns set init 0", ns=netns)
+    ip("link set lo up", ns=netns)
+
+    ksft_run(
+        [
+            test_remove_phys,
+            test_double_lease,
+            test_virtual_lessor,
+            test_phys_lessee,
+            test_different_lessors,
+            test_queue_out_of_range,
+            test_resize_leased,
+            test_self_lease,
+            test_create_tx_type,
+            test_create_primary,
+            test_create_limit,
+            test_link_flap_phys,
+            test_queue_get_virtual,
+            test_remove_virt_first,
+            test_multiple_leases,
+            test_lease_queue_tx_type,
+            test_invalid_netns,
+            test_invalid_phys_ifindex,
+            test_multi_netkit_remove_phys,
+            test_single_remove_phys,
+            test_link_flap_virt,
+            test_phys_queue_no_lease,
+            test_same_ns_lease,
+            test_resize_after_unlease,
+            test_lease_queue_zero,
+            test_release_and_reuse,
+            test_veth_queue_create,
+            test_two_netkits_same_queue,
+            test_l3_mode_lease,
+            test_single_double_lease,
+            test_single_different_lessors,
+            test_cross_ns_netns_id,
+            test_delete_guest_netns,
+            test_move_guest_netns,
+            test_resize_phys_no_reduction,
+            test_delete_one_netkit_of_two,
+            test_bind_rx_leased_phys_queue,
+            test_resize_phys_shrink_past_leased,
+            test_resize_virt_not_supported,
+            test_lease_devices_down,
+            test_lease_capacity_exhaustion,
+            test_resize_phys_up,
+            test_multi_ns_lease,
+            test_multi_ns_delete_one,
+            test_move_phys_netns,
+        ],
+        args=(netns,),
+    )
+
+    cmd("ip netns del init", fail=False)
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index 93e8cb671c3d..eff55c64a012 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -1,10 +1,15 @@
 #!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 
-import time
-from lib.py import ksft_run, ksft_exit, ksft_pr
-from lib.py import ksft_eq, ksft_ge, ksft_busy_wait
-from lib.py import NetdevFamily, NetdevSimDev, ip
+"""
+Tests for the netdev netlink family.
+"""
+
+import errno
+from os import system
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_raises, ksft_busy_wait
+from lib.py import NetdevFamily, NetdevSimDev, NlError, ip
 
 
 def empty_check(nf) -> None:
@@ -18,6 +23,15 @@ def lo_check(nf) -> None:
     ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0)
 
 
+def dev_dump_reject_attr(nf) -> None:
+    """Test that dev-get dump rejects attributes (no dump request policy)."""
+    with ksft_raises(NlError) as cm:
+        nf.dev_get({'ifindex': 1}, dump=True)
+    ksft_eq(cm.exception.nl_msg.error, -errno.EINVAL)
+    ksft_eq(cm.exception.nl_msg.extack['msg'], 'Unknown attribute type')
+    ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+
+
 def napi_list_check(nf) -> None:
     with NetdevSimDev(queue_count=100) as nsimdev:
         nsim = nsimdev.nsims[0]
@@ -34,6 +48,143 @@ def napi_list_check(nf) -> None:
                 ksft_eq(len(napis), 100,
                         comment=f"queue count after reset queue {q} mode {i}")
 
+def napi_set_threaded(nf) -> None:
+    """
+    Test that verifies various cases of napi threaded
+    set and unset at napi and device level.
+    """
+    with NetdevSimDev(queue_count=2) as nsimdev:
+        nsim = nsimdev.nsims[0]
+
+        ip(f"link set dev {nsim.ifname} up")
+
+        napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+        ksft_eq(len(napis), 2)
+
+        napi0_id = napis[0]['id']
+        napi1_id = napis[1]['id']
+
+        # set napi threaded and verify
+        nf.napi_set({'id': napi0_id, 'threaded': "enabled"})
+        napi0 = nf.napi_get({'id': napi0_id})
+        ksft_eq(napi0['threaded'], "enabled")
+        ksft_ne(napi0.get('pid'), None)
+
+        # check it is not set for napi1
+        napi1 = nf.napi_get({'id': napi1_id})
+        ksft_eq(napi1['threaded'], "disabled")
+        ksft_eq(napi1.get('pid'), None)
+
+        ip(f"link set dev {nsim.ifname} down")
+        ip(f"link set dev {nsim.ifname} up")
+
+        # verify if napi threaded is still set
+        napi0 = nf.napi_get({'id': napi0_id})
+        ksft_eq(napi0['threaded'], "enabled")
+        ksft_ne(napi0.get('pid'), None)
+
+        # check it is still not set for napi1
+        napi1 = nf.napi_get({'id': napi1_id})
+        ksft_eq(napi1['threaded'], "disabled")
+        ksft_eq(napi1.get('pid'), None)
+
+        # unset napi threaded and verify
+        nf.napi_set({'id': napi0_id, 'threaded': "disabled"})
+        napi0 = nf.napi_get({'id': napi0_id})
+        ksft_eq(napi0['threaded'], "disabled")
+        ksft_eq(napi0.get('pid'), None)
+
+        # set threaded at device level
+        system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+        # check napi threaded is set for both napis
+        napi0 = nf.napi_get({'id': napi0_id})
+        ksft_eq(napi0['threaded'], "enabled")
+        ksft_ne(napi0.get('pid'), None)
+        napi1 = nf.napi_get({'id': napi1_id})
+        ksft_eq(napi1['threaded'], "enabled")
+        ksft_ne(napi1.get('pid'), None)
+
+        # unset threaded at device level
+        system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+        # check napi threaded is unset for both napis
+        napi0 = nf.napi_get({'id': napi0_id})
+        ksft_eq(napi0['threaded'], "disabled")
+        ksft_eq(napi0.get('pid'), None)
+        napi1 = nf.napi_get({'id': napi1_id})
+        ksft_eq(napi1['threaded'], "disabled")
+        ksft_eq(napi1.get('pid'), None)
+
+        # set napi threaded for napi0
+        nf.napi_set({'id': napi0_id, 'threaded': 1})
+        napi0 = nf.napi_get({'id': napi0_id})
+        ksft_eq(napi0['threaded'], "enabled")
+        ksft_ne(napi0.get('pid'), None)
+
+        # unset threaded at device level
+        system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+        # check napi threaded is unset for both napis
+        napi0 = nf.napi_get({'id': napi0_id})
+        ksft_eq(napi0['threaded'], "disabled")
+        ksft_eq(napi0.get('pid'), None)
+        napi1 = nf.napi_get({'id': napi1_id})
+        ksft_eq(napi1['threaded'], "disabled")
+        ksft_eq(napi1.get('pid'), None)
+
+def dev_set_threaded(nf) -> None:
+    """
+    Test that verifies various cases of napi threaded
+    set and unset at device level using sysfs.
+    """
+    with NetdevSimDev(queue_count=2) as nsimdev:
+        nsim = nsimdev.nsims[0]
+
+        ip(f"link set dev {nsim.ifname} up")
+
+        napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+        ksft_eq(len(napis), 2)
+
+        napi0_id = napis[0]['id']
+        napi1_id = napis[1]['id']
+
+        # set threaded
+        system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+        # check napi threaded is set for both napis
+        napi0 = nf.napi_get({'id': napi0_id})
+        ksft_eq(napi0['threaded'], "enabled")
+        ksft_ne(napi0.get('pid'), None)
+        napi1 = nf.napi_get({'id': napi1_id})
+        ksft_eq(napi1['threaded'], "enabled")
+        ksft_ne(napi1.get('pid'), None)
+
+        # unset threaded
+        system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+        # check napi threaded is unset for both napis
+        napi0 = nf.napi_get({'id': napi0_id})
+        ksft_eq(napi0['threaded'], "disabled")
+        ksft_eq(napi0.get('pid'), None)
+        napi1 = nf.napi_get({'id': napi1_id})
+        ksft_eq(napi1['threaded'], "disabled")
+        ksft_eq(napi1.get('pid'), None)
+
+def nsim_rxq_reset_down(nf) -> None:
+    """
+    Test that the queue API supports resetting a queue
+    while the interface is down. We should convert this
+    test to testing real HW once more devices support
+    queue API.
+    """
+    with NetdevSimDev(queue_count=4) as nsimdev:
+        nsim = nsimdev.nsims[0]
+
+        ip(f"link set dev {nsim.ifname} down")
+        for i in [0, 2, 3]:
+            nsim.dfs_write("queue_reset", f"1 {i}")
+
 
 def page_pool_check(nf) -> None:
     with NetdevSimDev() as nsimdev:
@@ -105,8 +256,16 @@ def page_pool_check(nf) -> None:
 
 
 def main() -> None:
+    """ Ksft boiler plate main """
     nf = NetdevFamily()
-    ksft_run([empty_check, lo_check, page_pool_check, napi_list_check],
+    ksft_run([empty_check,
+              lo_check,
+              dev_dump_reject_attr,
+              napi_list_check,
+              napi_set_threaded,
+              dev_set_threaded,
+              nsim_rxq_reset_down,
+              page_pool_check],
              args=(nf, ))
     ksft_exit()
 
diff --git a/tools/testing/selftests/net/nl_nlctrl.py b/tools/testing/selftests/net/nl_nlctrl.py
new file mode 100755
index 000000000000..fe1f66dc9435
--- /dev/null
+++ b/tools/testing/selftests/net/nl_nlctrl.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Tests for the nlctrl genetlink family (family info and policy dumps).
+"""
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, ksft_ge, ksft_true, ksft_in, ksft_not_in
+from lib.py import NetdevFamily, EthtoolFamily, NlctrlFamily
+
+
+def getfamily_do(ctrl) -> None:
+    """Query a single family by name and validate its ops."""
+    fam = ctrl.getfamily({'family-name': 'netdev'})
+    ksft_eq(fam['family-name'], 'netdev')
+    ksft_true(fam['family-id'] > 0)
+
+    # The format of ops is quite odd, [{$idx: {"id"...}}, {$idx: {"id"...}}]
+    # Discard the indices and re-key by command id.
+    ops_by_id = {v['id']: v for op in fam['ops'] for v in op.values()}
+    ksft_eq(len(ops_by_id), len(fam['ops']))
+
+    # All ops should have a policy (either do or dump has one)
+    for op in ops_by_id.values():
+        ksft_in('cmd-cap-haspol', op['flags'],
+                comment=f"op {op['id']} missing haspol")
+
+    # dev-get (id 1) should support both do and dump
+    ksft_in('cmd-cap-do', ops_by_id[1]['flags'])
+    ksft_in('cmd-cap-dump', ops_by_id[1]['flags'])
+
+    # qstats-get (id 12) is dump-only
+    ksft_not_in('cmd-cap-do', ops_by_id[12]['flags'])
+    ksft_in('cmd-cap-dump', ops_by_id[12]['flags'])
+
+    # napi-set (id 14) is do-only and requires admin
+    ksft_in('cmd-cap-do', ops_by_id[14]['flags'])
+    ksft_not_in('cmd-cap-dump', ops_by_id[14]['flags'])
+    ksft_in('admin-perm', ops_by_id[14]['flags'])
+
+    # Notification-only commands (dev-add/del/change-ntf etc.) must
+    # not appear in the ops list since they have no do/dump handlers.
+    for ntf_id in [2, 3, 4, 6, 7, 8]:
+        ksft_not_in(ntf_id, ops_by_id,
+                    comment=f"ntf-only cmd {ntf_id} should not be in ops")
+
+
+def getfamily_dump(ctrl) -> None:
+    """Dump all families and verify expected entries."""
+    families = ctrl.getfamily({}, dump=True)
+    ksft_ge(len(families), 2)
+
+    names = [f['family-name'] for f in families]
+    ksft_in('nlctrl', names, comment="nlctrl not found in family dump")
+    ksft_in('netdev', names, comment="netdev not found in family dump")
+
+
+def getpolicy_dump(_ctrl) -> None:
+    """Dump policies for ops using get_policy() and validate results.
+
+    Test with netdev (split ops) where do and dump can have different
+    policies, and with ethtool (full ops) where they always share one.
+    """
+    # -- netdev (split ops) --
+    ndev = NetdevFamily()
+
+    # dev-get: do has a real policy with ifindex, dump has no policy
+    # (only the reject-all policy with maxattr=0)
+    pol = ndev.get_policy('dev-get', 'do')
+    ksft_in('ifindex', pol, comment="dev-get do policy should have ifindex")
+    ksft_eq(pol['ifindex'].type, 'u32')
+
+    pol_dump = ndev.get_policy('dev-get', 'dump')
+    ksft_eq(len(pol_dump), 0, comment="dev-get should not accept any attrs")
+
+    # napi-get: both do and dump have real policies
+    pol_do = ndev.get_policy('napi-get', 'do')
+    ksft_ge(len(pol_do), 1)
+
+    pol_dump = ndev.get_policy('napi-get', 'dump')
+    ksft_ge(len(pol_dump), 1)
+
+    # -- ethtool (full ops) --
+    et = EthtoolFamily()
+
+    # strset-get (has both do and dump, full ops share policy)
+    pol_do = et.get_policy('strset-get', 'do')
+    ksft_ge(len(pol_do), 1, comment="strset-get should have a do policy")
+
+    pol_dump = et.get_policy('strset-get', 'dump')
+    ksft_ge(len(pol_dump), 1, comment="strset-get should have a dump policy")
+
+    # Same policy means same attribute names
+    ksft_eq(set(pol_do.keys()), set(pol_dump.keys()))
+
+    # linkinfo-set is do-only (SET command), no dump
+    pol_do = et.get_policy('linkinfo-set', 'do')
+    ksft_ge(len(pol_do), 1, comment="linkinfo-set should have a do policy")
+
+    pol_dump = et.get_policy('linkinfo-set', 'dump')
+    ksft_eq(pol_dump, None,
+            comment="linkinfo-set should not have a dump policy")
+
+
+def getpolicy_by_op(_ctrl) -> None:
+    """Query policy for specific ops, check attr names are resolved."""
+    ndev = NetdevFamily()
+
+    # dev-get do policy should have named attributes from the spec
+    pol = ndev.get_policy('dev-get', 'do')
+    ksft_ge(len(pol), 1)
+    # All attr names should be resolved (no 'attr-N' fallbacks)
+    for name in pol:
+        ksft_true(not name.startswith('attr-'),
+                  comment=f"unresolved attr name: {name}")
+
+
+def main() -> None:
+    """ Ksft boiler plate main """
+    ctrl = NlctrlFamily()
+    ksft_run([getfamily_do,
+              getfamily_dump,
+              getpolicy_dump,
+              getpolicy_by_op],
+             args=(ctrl, ))
+    ksft_exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index 960e1ab4dd04..b327d3061ed5 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -25,6 +25,7 @@ tests="
 	nat_related_v4				ip4-nat-related: ICMP related matches work with SNAT
 	netlink_checks				ovsnl: validate netlink attrs and settings
 	upcall_interfaces			ovs: test the upcall interfaces
+	tunnel_metadata				ovs: test extraction of tunnel metadata
 	drop_reason				drop: test drop reasons are emitted
 	psample					psample: Sampling packets with psample"
 
@@ -113,13 +114,13 @@ ovs_add_dp () {
 }
 
 ovs_add_if () {
-	info "Adding IF to DP: br:$2 if:$3"
-	if [ "$4" != "-u" ]; then
-		ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if "$2" "$3" \
-		    || return 1
+	info "Adding IF to DP: br:$3 if:$4 ($2)"
+	if [ "$5" != "-u" ]; then
+		ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if \
+		    -t "$2" "$3" "$4" || return 1
 	else
 		python3 $ovs_base/ovs-dpctl.py add-if \
-		    -u "$2" "$3" >$ovs_dir/$3.out 2>$ovs_dir/$3.err &
+		    -u -t "$2" "$3" "$4" >$ovs_dir/$4.out 2>$ovs_dir/$4.err &
 		pid=$!
 		on_exit "ovs_sbx $1 kill -TERM $pid 2>/dev/null"
 	fi
@@ -166,9 +167,9 @@ ovs_add_netns_and_veths () {
 	fi
 
 	if [ "$7" != "-u" ]; then
-		ovs_add_if "$1" "$2" "$4" || return 1
+		ovs_add_if "$1" "netdev" "$2" "$4" || return 1
 	else
-		ovs_add_if "$1" "$2" "$4" -u || return 1
+		ovs_add_if "$1" "netdev" "$2" "$4" -u || return 1
 	fi
 
 	if [ $TRACING -eq 1 ]; then
@@ -330,6 +331,11 @@ test_psample() {
 # - drop packets and verify the right drop reason is reported
 test_drop_reason() {
 	which perf >/dev/null 2>&1 || return $ksft_skip
+	which pahole >/dev/null 2>&1 || return $ksft_skip
+
+	ovs_drop_subsys=$(pahole -C skb_drop_reason_subsys |
+			      awk '/OPENVSWITCH/ { print $3; }' |
+			      tr -d ,)
 
 	sbx_add "test_drop_reason" || return $?
 
@@ -373,7 +379,7 @@ test_drop_reason() {
 		"in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop'
 
 	ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20
-	ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION
+	ovs_drop_reason_count 0x${ovs_drop_subsys}0001 # OVS_DROP_FLOW_ACTION
 	if [[ "$?" -ne "2" ]]; then
 		info "Did not detect expected drops: $?"
 		return 1
@@ -390,7 +396,7 @@ test_drop_reason() {
 
 	ovs_drop_record_and_run \
             "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000
-	ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR
+	ovs_drop_reason_count 0x${ovs_drop_subsys}0004 # OVS_DROP_EXPLICIT_ACTION_ERROR
 	if [[ "$?" -ne "1" ]]; then
 		info "Did not detect expected explicit error drops: $?"
 		return 1
@@ -398,7 +404,7 @@ test_drop_reason() {
 
 	ovs_drop_record_and_run \
             "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000
-	ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION
+	ovs_drop_reason_count 0x${ovs_drop_subsys}0003 # OVS_DROP_EXPLICIT_ACTION
 	if [[ "$?" -ne "1" ]]; then
 		info "Did not detect expected explicit drops: $?"
 		return 1
@@ -751,6 +757,79 @@ test_upcall_interfaces() {
 	return 0
 }
 
+ovs_add_kernel_tunnel() {
+	local sbxname=$1; shift
+	local ns=$1; shift
+	local tnl_type=$1; shift
+	local name=$1; shift
+	local addr=$1; shift
+
+	info "setting up kernel ${tnl_type} tunnel ${name}"
+	ovs_sbx "${sbxname}" ip -netns ${ns} link add dev ${name} type ${tnl_type} $* || return 1
+	on_exit "ovs_sbx ${sbxname} ip -netns ${ns} link del ${name} >/dev/null 2>&1"
+	ovs_sbx "${sbxname}" ip -netns ${ns} addr add dev ${name} ${addr} || return 1
+	ovs_sbx "${sbxname}" ip -netns ${ns} link set dev ${name} mtu 1450 up || return 1
+}
+
+test_tunnel_metadata() {
+	which arping >/dev/null 2>&1 || return $ksft_skip
+
+	sbxname="test_tunnel_metadata"
+	sbx_add "${sbxname}" || return 1
+
+	info "setting up new DP"
+	ovs_add_dp "${sbxname}" tdp0 -V 2:1 || return 1
+
+	ovs_add_netns_and_veths "${sbxname}" tdp0 tns left0 l0 \
+		172.31.110.1/24 || return 1
+
+	info "removing veth interface from openvswitch and setting IP"
+	ovs_del_if "${sbxname}" tdp0 left0 || return 1
+	ovs_sbx "${sbxname}" ip addr add 172.31.110.2/24 dev left0 || return 1
+	ovs_sbx "${sbxname}" ip link set left0 up || return 1
+
+	info "setting up tunnel port in openvswitch"
+	ovs_add_if "${sbxname}" "vxlan" tdp0 ovs-vxlan0 -u || return 1
+	on_exit "ovs_sbx ${sbxname} ip link del ovs-vxlan0"
+	ovs_wait ip link show ovs-vxlan0 &>/dev/null || return 1
+	ovs_sbx "${sbxname}" ip link set ovs-vxlan0 up || return 1
+
+	configs=$(echo '
+	    1 172.31.221.1/24 1155332 32   set   udpcsum flags\(df\|csum\)
+	    2 172.31.222.1/24 1234567 45   set noudpcsum flags\(df\)
+	    3 172.31.223.1/24 1020304 23 unset   udpcsum flags\(csum\)
+	    4 172.31.224.1/24 1357986 15 unset noudpcsum' | sed '/^$/d')
+
+	while read -r i addr id ttl df csum flags; do
+		ovs_add_kernel_tunnel "${sbxname}" tns vxlan vxlan${i} ${addr} \
+			remote 172.31.110.2 id ${id} dstport 4789 \
+			ttl ${ttl} df ${df} ${csum} || return 1
+	done <<< "${configs}"
+
+	ovs_wait grep -q 'listening on upcall packet handler' \
+		${ovs_dir}/ovs-vxlan0.out || return 1
+
+	info "sending arping"
+	for i in 1 2 3 4; do
+		ovs_sbx "${sbxname}" ip netns exec tns \
+			arping -I vxlan${i} 172.31.22${i}.2 -c 1 \
+			>${ovs_dir}/arping.stdout 2>${ovs_dir}/arping.stderr
+	done
+
+	info "checking that received decapsulated packets carry correct metadata"
+	while read -r i addr id ttl df csum flags; do
+		arp_hdr="arp\\(sip=172.31.22${i}.1,tip=172.31.22${i}.2,op=1,sha="
+		addrs="src=172.31.110.1,dst=172.31.110.2"
+		ports="tp_src=[0-9]*,tp_dst=4789"
+		tnl_md="tunnel\\(tun_id=${id},${addrs},ttl=${ttl},${ports},${flags}\\)"
+
+		ovs_sbx "${sbxname}" grep -qE "MISS upcall.*${tnl_md}.*${arp_hdr}" \
+			${ovs_dir}/ovs-vxlan0.out || return 1
+	done <<< "${configs}"
+
+	return 0
+}
+
 run_test() {
 	(
 	tname="$1"
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 8a0396bfaf99..848f61fdcee0 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -1877,7 +1877,7 @@ class OvsPacket(GenericNetlinkSocket):
                     elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE:
                         up.execute(msg)
                     else:
-                        print("Unkonwn cmd: %d" % msg["cmd"])
+                        print("Unknown cmd: %d" % msg["cmd"])
             except NetlinkError as ne:
                 raise ne
 
@@ -2583,7 +2583,7 @@ def main(argv):
     prverscheck = pyroute2.__version__.split(".")
     if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6:
         print("Need to upgrade the python pyroute2 package to >= 0.6.")
-        sys.exit(0)
+        sys.exit(1)
 
     parser = argparse.ArgumentParser()
     parser.add_argument(
diff --git a/tools/testing/selftests/net/ovpn/.gitignore b/tools/testing/selftests/net/ovpn/.gitignore
new file mode 100644
index 000000000000..ee44c081ca7c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+ovpn-cli
diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile
new file mode 100644
index 000000000000..169f0464ac3a
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/Makefile
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES)
+CFLAGS += $(shell pkg-config --cflags mbedcrypto-3 mbedtls-3 2>/dev/null)
+
+VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_CFLAGS),)
+VAR_CFLAGS = -I/usr/include/libnl3
+endif
+CFLAGS += $(VAR_CFLAGS)
+
+MTLS_LDLIBS= $(shell pkg-config --libs mbedcrypto-3 mbedtls-3 2>/dev/null)
+ifeq ($(MTLS_LDLIBS),)
+MTLS_LDLIBS = -lmbedtls -lmbedcrypto
+endif
+LDLIBS += $(MTLS_LDLIBS)
+
+NL_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(NL_LDLIBS),)
+NL_LDLIBS = -lnl-genl-3 -lnl-3
+endif
+LDLIBS += $(NL_LDLIBS)
+
+
+TEST_FILES = \
+	common.sh \
+	data64.key \
+	json \
+	tcp_peers.txt \
+	udp_peers.txt \
+	../../../../net/ynl/pyynl/cli.py \
+# end of TEST_FILES
+
+TEST_PROGS := \
+	test-chachapoly.sh \
+	test-close-socket-tcp.sh \
+	test-close-socket.sh \
+	test-float.sh \
+	test-large-mtu.sh \
+	test-mark.sh \
+	test-symmetric-id-float.sh \
+	test-symmetric-id-tcp.sh \
+	test-symmetric-id.sh \
+	test-tcp.sh \
+	test.sh \
+# end of TEST_PROGS
+
+TEST_GEN_FILES := ovpn-cli
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh
new file mode 100644
index 000000000000..4c08f756e63a
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/common.sh
@@ -0,0 +1,183 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt}
+TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt}
+OVPN_CLI=${OVPN_CLI:-./ovpn-cli}
+YNL_CLI=${YNL_CLI:-../../../../net/ynl/pyynl/cli.py}
+ALG=${ALG:-aes}
+PROTO=${PROTO:-UDP}
+FLOAT=${FLOAT:-0}
+SYMMETRIC_ID=${SYMMETRIC_ID:-0}
+
+export ID_OFFSET=$(( 9 * (SYMMETRIC_ID == 0) ))
+
+JQ_FILTER='map(select(.msg.peer | has("remote-ipv6") | not)) |
+	map(del(.msg.ifindex)) | sort_by(.msg.peer.id)[]'
+LAN_IP="11.11.11.11"
+
+declare -A tmp_jsons=()
+declare -A listener_pids=()
+
+create_ns() {
+	ip netns add peer${1}
+}
+
+setup_ns() {
+	MODE="P2P"
+
+	if [ ${1} -eq 0 ]; then
+		MODE="MP"
+		for p in $(seq 1 ${NUM_PEERS}); do
+			ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p}
+
+			ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p}
+			ip -n peer0 addr add fd00:0:0:${p}::1/64 dev veth${p}
+			ip -n peer0 link set veth${p} up
+
+			ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p}
+			ip -n peer${p} addr add fd00:0:0:${p}::2/64 dev veth${p}
+			ip -n peer${p} link set veth${p} up
+		done
+	fi
+
+	ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE
+	ip -n peer${1} addr add ${2} dev tun${1}
+	# add a secondary IP to peer 1, to test a LAN behind a client
+	if [ ${1} -eq 1 -a -n "${LAN_IP}" ]; then
+		ip -n peer${1} addr add ${LAN_IP} dev tun${1}
+		ip -n peer0 route add ${LAN_IP} via $(echo ${2} |sed -e s'!/.*!!') dev tun0
+	fi
+	if [ -n "${3}" ]; then
+		ip -n peer${1} link set mtu ${3} dev tun${1}
+	fi
+	ip -n peer${1} link set tun${1} up
+}
+
+build_capture_filter() {
+	# match the first four bytes of the openvpn data payload
+	if [ "${PROTO}" == "UDP" ]; then
+		# For UDP, libpcap transport indexing only works for IPv4, so
+		# use an explicit IPv4 or IPv6 expression based on the peer
+		# address. The IPv6 branch assumes there are no extension
+		# headers in the outer packet.
+		if [[ "${2}" == *:* ]]; then
+			printf "ip6 and ip6[6] = 17 and ip6[48:4] = %s" "${1}"
+		else
+			printf "ip and udp[8:4] = %s" "${1}"
+		fi
+	else
+		# openvpn over TCP prepends a 2-byte packet length ahead of the
+		# DATA_V2 opcode, so skip it before matching the payload header
+		printf "ip and tcp[(((tcp[12] & 0xf0) >> 2) + 2):4] = %s" "${1}"
+	fi
+}
+
+setup_listener() {
+	file=$(mktemp)
+	PYTHONUNBUFFERED=1 ip netns exec peer${p} ${YNL_CLI} --family ovpn \
+		--subscribe peers --output-json --duration 40 > ${file} &
+	listener_pids[$1]=$!
+	tmp_jsons[$1]="${file}"
+}
+
+add_peer() {
+	labels=("ASYMM" "SYMM")
+	M_ID=${labels[SYMMETRIC_ID]}
+
+	if [ "${PROTO}" == "UDP" ]; then
+		if [ ${1} -eq 0 ]; then
+			ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 \
+				${M_ID} ${UDP_PEERS_FILE}
+
+			for p in $(seq 1 ${NUM_PEERS}); do
+				ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \
+					data64.key
+			done
+		else
+			if [ "${SYMMETRIC_ID}" -eq 1 ]; then
+				PEER_ID=${1}
+				TX_ID="none"
+			else
+				PEER_ID=$(awk "NR == ${1} {print \$2}" \
+					${UDP_PEERS_FILE})
+				TX_ID=${1}
+			fi
+			RADDR=$(awk "NR == ${1} {print \$3}" ${UDP_PEERS_FILE})
+			RPORT=$(awk "NR == ${1} {print \$4}" ${UDP_PEERS_FILE})
+			LPORT=$(awk "NR == ${1} {print \$6}" ${UDP_PEERS_FILE})
+			ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} \
+				${PEER_ID} ${TX_ID} ${LPORT} ${RADDR} ${RPORT}
+			ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} \
+				${PEER_ID} 1 0 ${ALG} 1 data64.key
+		fi
+	else
+		if [ ${1} -eq 0 ]; then
+			(ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${M_ID} \
+				${TCP_PEERS_FILE} && {
+				for p in $(seq 1 ${NUM_PEERS}); do
+					ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \
+						${ALG} 0 data64.key
+				done
+			}) &
+			sleep 5
+		else
+			if [ "${SYMMETRIC_ID}" -eq 1 ]; then
+				PEER_ID=${1}
+				TX_ID="none"
+			else
+				PEER_ID=$(awk "NR == ${1} {print \$2}" \
+					${TCP_PEERS_FILE})
+				TX_ID=${1}
+			fi
+			ip netns exec peer${1} ${OVPN_CLI} connect tun${1} \
+				${PEER_ID} ${TX_ID} 10.10.${1}.1 1 data64.key
+		fi
+	fi
+}
+
+compare_ntfs() {
+	if [ ${#tmp_jsons[@]} -gt 0 ]; then
+		suffix=""
+		[ "${SYMMETRIC_ID}" -eq 1 ] && suffix="${suffix}-symm"
+		[ "$FLOAT" == 1 ] && suffix="${suffix}-float"
+		expected="json/peer${1}${suffix}.json"
+		received="${tmp_jsons[$1]}"
+
+		kill -TERM ${listener_pids[$1]} || true
+		wait ${listener_pids[$1]} || true
+		printf "Checking notifications for peer ${1}... "
+		if diff <(jq -s "${JQ_FILTER}" ${expected}) \
+			<(jq -s "${JQ_FILTER}" ${received}); then
+			echo "OK"
+		fi
+
+		rm -f ${received} || true
+	fi
+}
+
+cleanup() {
+	# some ovpn-cli processes sleep in background so they need manual poking
+	killall $(basename ${OVPN_CLI}) 2>/dev/null || true
+
+	# netns peer0 is deleted without erasing ifaces first
+	for p in $(seq 1 10); do
+		ip -n peer${p} link set tun${p} down 2>/dev/null || true
+		ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true
+	done
+	for p in $(seq 1 10); do
+		ip -n peer0 link del veth${p} 2>/dev/null || true
+	done
+	for p in $(seq 0 10); do
+		ip netns del peer${p} 2>/dev/null || true
+	done
+}
+
+if [ "${PROTO}" == "UDP" ]; then
+	NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')}
+else
+	NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')}
+fi
diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config
new file mode 100644
index 000000000000..42699740936d
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/config
@@ -0,0 +1,10 @@
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_AES=y
+CONFIG_CRYPTO_CHACHA20POLY1305=y
+CONFIG_CRYPTO_GCM=y
+CONFIG_DST_CACHE=y
+CONFIG_INET=y
+CONFIG_NET=y
+CONFIG_NET_UDP_TUNNEL=y
+CONFIG_OVPN=m
+CONFIG_STREAM_PARSER=y
diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key
new file mode 100644
index 000000000000..d04febcdf5a2
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/data64.key
@@ -0,0 +1 @@
+jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3Bia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tEBofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w==
diff --git a/tools/testing/selftests/net/ovpn/json/peer0-float.json b/tools/testing/selftests/net/ovpn/json/peer0-float.json
new file mode 100644
index 000000000000..682fa58ad4ea
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer0-float.json
@@ -0,0 +1,9 @@
+{"name": "peer-float-ntf", "msg": {"ifindex": 0, "peer": {"id": 1, "remote-ipv4": "10.10.1.3", "remote-port": 1}}}
+{"name": "peer-float-ntf", "msg": {"ifindex": 0, "peer": {"id": 2, "remote-ipv4": "10.10.2.3", "remote-port": 1}}}
+{"name": "peer-float-ntf", "msg": {"ifindex": 0, "peer": {"id": 3, "remote-ipv4": "10.10.3.3", "remote-port": 1}}}
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 1}}}
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 2}}}
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 3}}}
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 4}}}
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 5}}}
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 6}}}
diff --git a/tools/testing/selftests/net/ovpn/json/peer0-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer0-symm-float.json
new file mode 120000
index 000000000000..e31a5bd59863
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer0-symm-float.json
@@ -0,0 +1 @@
+peer0-float.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer0-symm.json b/tools/testing/selftests/net/ovpn/json/peer0-symm.json
new file mode 120000
index 000000000000..57a163048eed
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer0-symm.json
@@ -0,0 +1 @@
+peer0.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer0.json b/tools/testing/selftests/net/ovpn/json/peer0.json
new file mode 100644
index 000000000000..7c46a33d5ecd
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer0.json
@@ -0,0 +1,6 @@
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 1}}}
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 2}}}
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 3}}}
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 4}}}
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 5}}}
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 6}}}
diff --git a/tools/testing/selftests/net/ovpn/json/peer1-float.json b/tools/testing/selftests/net/ovpn/json/peer1-float.json
new file mode 120000
index 000000000000..d28c328d1452
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer1-float.json
@@ -0,0 +1 @@
+peer1.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer1-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer1-symm-float.json
new file mode 120000
index 000000000000..b3615dcc523d
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer1-symm-float.json
@@ -0,0 +1 @@
+peer1-symm.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer1-symm.json b/tools/testing/selftests/net/ovpn/json/peer1-symm.json
new file mode 100644
index 000000000000..5da4ea9d51fb
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer1-symm.json
@@ -0,0 +1 @@
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 1}}}
diff --git a/tools/testing/selftests/net/ovpn/json/peer1.json b/tools/testing/selftests/net/ovpn/json/peer1.json
new file mode 100644
index 000000000000..1009d26dc14a
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer1.json
@@ -0,0 +1 @@
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 10}}}
diff --git a/tools/testing/selftests/net/ovpn/json/peer2-float.json b/tools/testing/selftests/net/ovpn/json/peer2-float.json
new file mode 120000
index 000000000000..b9f09980aaa0
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer2-float.json
@@ -0,0 +1 @@
+peer2.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer2-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer2-symm-float.json
new file mode 120000
index 000000000000..28a895cb5170
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer2-symm-float.json
@@ -0,0 +1 @@
+peer2-symm.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer2-symm.json b/tools/testing/selftests/net/ovpn/json/peer2-symm.json
new file mode 100644
index 000000000000..8f6db4f8c2ac
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer2-symm.json
@@ -0,0 +1 @@
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 2}}}
diff --git a/tools/testing/selftests/net/ovpn/json/peer2.json b/tools/testing/selftests/net/ovpn/json/peer2.json
new file mode 100644
index 000000000000..44e9fad2b622
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer2.json
@@ -0,0 +1 @@
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "userspace", "id": 11}}}
diff --git a/tools/testing/selftests/net/ovpn/json/peer3-float.json b/tools/testing/selftests/net/ovpn/json/peer3-float.json
new file mode 120000
index 000000000000..2700b55bcf2e
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer3-float.json
@@ -0,0 +1 @@
+peer3.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer3-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer3-symm-float.json
new file mode 120000
index 000000000000..ee8b9719c2fd
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer3-symm-float.json
@@ -0,0 +1 @@
+peer3-symm.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer3-symm.json b/tools/testing/selftests/net/ovpn/json/peer3-symm.json
new file mode 100644
index 000000000000..bdabd6fa2e64
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer3-symm.json
@@ -0,0 +1 @@
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 3}}}
diff --git a/tools/testing/selftests/net/ovpn/json/peer3.json b/tools/testing/selftests/net/ovpn/json/peer3.json
new file mode 100644
index 000000000000..d4be8ba130ae
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer3.json
@@ -0,0 +1 @@
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 12}}}
diff --git a/tools/testing/selftests/net/ovpn/json/peer4-float.json b/tools/testing/selftests/net/ovpn/json/peer4-float.json
new file mode 120000
index 000000000000..460f6c14cd60
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer4-float.json
@@ -0,0 +1 @@
+peer4.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer4-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer4-symm-float.json
new file mode 120000
index 000000000000..7d34ff7305da
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer4-symm-float.json
@@ -0,0 +1 @@
+peer4-symm.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer4-symm.json b/tools/testing/selftests/net/ovpn/json/peer4-symm.json
new file mode 100644
index 000000000000..c3734bb9251b
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer4-symm.json
@@ -0,0 +1 @@
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 4}}}
diff --git a/tools/testing/selftests/net/ovpn/json/peer4.json b/tools/testing/selftests/net/ovpn/json/peer4.json
new file mode 100644
index 000000000000..67d27e2d48ac
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer4.json
@@ -0,0 +1 @@
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 13}}}
diff --git a/tools/testing/selftests/net/ovpn/json/peer5-float.json b/tools/testing/selftests/net/ovpn/json/peer5-float.json
new file mode 120000
index 000000000000..0f725c50ce19
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer5-float.json
@@ -0,0 +1 @@
+peer5.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer5-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer5-symm-float.json
new file mode 120000
index 000000000000..afc0f5f9f13b
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer5-symm-float.json
@@ -0,0 +1 @@
+peer5-symm.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer5-symm.json b/tools/testing/selftests/net/ovpn/json/peer5-symm.json
new file mode 100644
index 000000000000..46c4a348299d
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer5-symm.json
@@ -0,0 +1 @@
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 5}}}
diff --git a/tools/testing/selftests/net/ovpn/json/peer5.json b/tools/testing/selftests/net/ovpn/json/peer5.json
new file mode 100644
index 000000000000..ecd9bd0b2f37
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer5.json
@@ -0,0 +1 @@
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 14}}}
diff --git a/tools/testing/selftests/net/ovpn/json/peer6-float.json b/tools/testing/selftests/net/ovpn/json/peer6-float.json
new file mode 120000
index 000000000000..4d9ded3e0a84
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer6-float.json
@@ -0,0 +1 @@
+peer6.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer6-symm-float.json b/tools/testing/selftests/net/ovpn/json/peer6-symm-float.json
new file mode 120000
index 000000000000..e39203204d8c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer6-symm-float.json
@@ -0,0 +1 @@
+peer6-symm.json
+\ No newline at end of file
diff --git a/tools/testing/selftests/net/ovpn/json/peer6-symm.json b/tools/testing/selftests/net/ovpn/json/peer6-symm.json
new file mode 100644
index 000000000000..aa30f2cff625
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer6-symm.json
@@ -0,0 +1 @@
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 6}}}
diff --git a/tools/testing/selftests/net/ovpn/json/peer6.json b/tools/testing/selftests/net/ovpn/json/peer6.json
new file mode 100644
index 000000000000..7fded29c5804
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/json/peer6.json
@@ -0,0 +1 @@
+{"name": "peer-del-ntf", "msg": {"ifindex": 0, "peer": {"del-reason": "expired", "id": 15}}}
diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c
new file mode 100644
index 000000000000..d40953375c86
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c
@@ -0,0 +1,2467 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  OpenVPN data channel accelerator
+ *
+ *  Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ *  Author:	Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <time.h>
+
+#include <linux/ovpn.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+#include <netlink/socket.h>
+#include <netlink/netlink.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/family.h>
+#include <netlink/genl/ctrl.h>
+
+#include <mbedtls/base64.h>
+#include <mbedtls/error.h>
+
+#include <sys/socket.h>
+
+#include "kselftest.h"
+
+/* defines to make checkpatch happy */
+#define strscpy strncpy
+
+/* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we
+ * have to explicitly do it to prevent the kernel from failing upon
+ * parsing of the message
+ */
+#define nla_nest_start(_msg, _type) \
+	nla_nest_start(_msg, (_type) | NLA_F_NESTED)
+
+/* libnl < 3.11.0 does not implement nla_get_uint() */
+uint64_t ovpn_nla_get_uint(struct nlattr *attr)
+{
+	if (nla_len(attr) == sizeof(uint32_t))
+		return nla_get_u32(attr);
+	else
+		return nla_get_u64(attr);
+}
+
+typedef int (*ovpn_nl_cb)(struct nl_msg *msg, void *arg);
+
+enum ovpn_key_direction {
+	KEY_DIR_IN = 0,
+	KEY_DIR_OUT,
+};
+
+#define KEY_LEN (256 / 8)
+#define NONCE_LEN 8
+
+#define PEER_ID_UNDEF 0x00FFFFFF
+#define MAX_PEERS 10
+
+struct nl_ctx {
+	struct nl_sock *nl_sock;
+	struct nl_msg *nl_msg;
+	struct nl_cb *nl_cb;
+
+	int ovpn_dco_id;
+};
+
+enum ovpn_cmd {
+	CMD_INVALID,
+	CMD_NEW_IFACE,
+	CMD_DEL_IFACE,
+	CMD_LISTEN,
+	CMD_CONNECT,
+	CMD_NEW_PEER,
+	CMD_NEW_MULTI_PEER,
+	CMD_SET_PEER,
+	CMD_DEL_PEER,
+	CMD_GET_PEER,
+	CMD_NEW_KEY,
+	CMD_DEL_KEY,
+	CMD_GET_KEY,
+	CMD_SWAP_KEYS,
+	CMD_LISTEN_MCAST,
+};
+
+struct ovpn_ctx {
+	enum ovpn_cmd cmd;
+
+	__u8 key_enc[KEY_LEN];
+	__u8 key_dec[KEY_LEN];
+	__u8 nonce[NONCE_LEN];
+
+	enum ovpn_cipher_alg cipher;
+
+	sa_family_t sa_family;
+
+	unsigned long peer_id, tx_id;
+	unsigned long lport;
+
+	union {
+		struct sockaddr_in in4;
+		struct sockaddr_in6 in6;
+	} remote;
+
+	union {
+		struct sockaddr_in in4;
+		struct sockaddr_in6 in6;
+	} peer_ip;
+
+	bool peer_ip_set;
+
+	unsigned int ifindex;
+	char ifname[IFNAMSIZ];
+	enum ovpn_mode mode;
+	bool mode_set;
+
+	int socket;
+	int cli_sockets[MAX_PEERS];
+
+	__u32 keepalive_interval;
+	__u32 keepalive_timeout;
+
+	enum ovpn_key_direction key_dir;
+	enum ovpn_key_slot key_slot;
+	int key_id;
+
+	uint32_t mark;
+	bool asymm_id;
+
+	const char *peers_file;
+};
+
+static int ovpn_nl_recvmsgs(struct nl_ctx *ctx)
+{
+	int ret;
+
+	ret = nl_recvmsgs(ctx->nl_sock, ctx->nl_cb);
+
+	switch (ret) {
+	case -NLE_INTR:
+		fprintf(stderr,
+			"netlink received interrupt due to signal - ignoring\n");
+		break;
+	case -NLE_NOMEM:
+		fprintf(stderr, "netlink out of memory error\n");
+		break;
+	case -NLE_AGAIN:
+		fprintf(stderr,
+			"netlink reports blocking read - aborting wait\n");
+		break;
+	default:
+		if (ret)
+			fprintf(stderr, "netlink reports error (%d): %s\n",
+				ret, nl_geterror(-ret));
+		break;
+	}
+
+	return ret;
+}
+
+static struct nl_ctx *nl_ctx_alloc_flags(struct ovpn_ctx *ovpn, int cmd,
+					 int flags)
+{
+	struct nl_ctx *ctx;
+	int err, ret;
+
+	ctx = calloc(1, sizeof(*ctx));
+	if (!ctx)
+		return NULL;
+
+	ctx->nl_sock = nl_socket_alloc();
+	if (!ctx->nl_sock) {
+		fprintf(stderr, "cannot allocate netlink socket\n");
+		goto err_free;
+	}
+
+	nl_socket_set_buffer_size(ctx->nl_sock, 8192, 8192);
+
+	ret = genl_connect(ctx->nl_sock);
+	if (ret) {
+		fprintf(stderr, "cannot connect to generic netlink: %s\n",
+			nl_geterror(ret));
+		goto err_sock;
+	}
+
+	/* enable Extended ACK for detailed error reporting */
+	err = 1;
+	setsockopt(nl_socket_get_fd(ctx->nl_sock), SOL_NETLINK, NETLINK_EXT_ACK,
+		   &err, sizeof(err));
+
+	ctx->ovpn_dco_id = genl_ctrl_resolve(ctx->nl_sock, OVPN_FAMILY_NAME);
+	if (ctx->ovpn_dco_id < 0) {
+		fprintf(stderr, "cannot find ovpn_dco netlink component: %d\n",
+			ctx->ovpn_dco_id);
+		goto err_free;
+	}
+
+	ctx->nl_msg = nlmsg_alloc();
+	if (!ctx->nl_msg) {
+		fprintf(stderr, "cannot allocate netlink message\n");
+		goto err_sock;
+	}
+
+	ctx->nl_cb = nl_cb_alloc(NL_CB_DEFAULT);
+	if (!ctx->nl_cb) {
+		fprintf(stderr, "failed to allocate netlink callback\n");
+		goto err_msg;
+	}
+
+	nl_socket_set_cb(ctx->nl_sock, ctx->nl_cb);
+
+	genlmsg_put(ctx->nl_msg, 0, 0, ctx->ovpn_dco_id, 0, flags, cmd, 0);
+
+	if (ovpn->ifindex > 0)
+		NLA_PUT_U32(ctx->nl_msg, OVPN_A_IFINDEX, ovpn->ifindex);
+
+	return ctx;
+nla_put_failure:
+err_msg:
+	nlmsg_free(ctx->nl_msg);
+err_sock:
+	nl_socket_free(ctx->nl_sock);
+err_free:
+	free(ctx);
+	return NULL;
+}
+
+static struct nl_ctx *nl_ctx_alloc(struct ovpn_ctx *ovpn, int cmd)
+{
+	return nl_ctx_alloc_flags(ovpn, cmd, 0);
+}
+
+static void nl_ctx_free(struct nl_ctx *ctx)
+{
+	if (!ctx)
+		return;
+
+	nl_socket_free(ctx->nl_sock);
+	nlmsg_free(ctx->nl_msg);
+	nl_cb_put(ctx->nl_cb);
+	free(ctx);
+}
+
+static int ovpn_nl_cb_error(struct sockaddr_nl (*nla)__always_unused,
+			    struct nlmsgerr *err, void *arg)
+{
+	struct nlmsghdr *nlh = (struct nlmsghdr *)err - 1;
+	struct nlattr *tb_msg[NLMSGERR_ATTR_MAX + 1];
+	int len = nlh->nlmsg_len;
+	struct nlattr *attrs;
+	int *ret = arg;
+	int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh);
+
+	*ret = err->error;
+
+	if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+		return NL_STOP;
+
+	if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+		ack_len += err->msg.nlmsg_len - sizeof(*nlh);
+
+	if (len <= ack_len)
+		return NL_STOP;
+
+	attrs = (void *)((uint8_t *)nlh + ack_len);
+	len -= ack_len;
+
+	nla_parse(tb_msg, NLMSGERR_ATTR_MAX, attrs, len, NULL);
+	if (tb_msg[NLMSGERR_ATTR_MSG]) {
+		len = strnlen((char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]),
+			      nla_len(tb_msg[NLMSGERR_ATTR_MSG]));
+		fprintf(stderr, "kernel error: %*s\n", len,
+			(char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]));
+	}
+
+	if (tb_msg[NLMSGERR_ATTR_MISS_NEST]) {
+		fprintf(stderr, "missing required nesting type %u\n",
+			nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_NEST]));
+	}
+
+	if (tb_msg[NLMSGERR_ATTR_MISS_TYPE]) {
+		fprintf(stderr, "missing required attribute type %u\n",
+			nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_TYPE]));
+	}
+
+	return NL_STOP;
+}
+
+static int ovpn_nl_cb_finish(struct nl_msg (*msg)__always_unused,
+			     void *arg)
+{
+	int *status = arg;
+
+	*status = 0;
+	return NL_SKIP;
+}
+
+static int ovpn_nl_cb_ack(struct nl_msg (*msg)__always_unused,
+			  void *arg)
+{
+	int *status = arg;
+
+	*status = 0;
+	return NL_STOP;
+}
+
+static int ovpn_nl_msg_send(struct nl_ctx *ctx, ovpn_nl_cb cb)
+{
+	int status = 1;
+
+	nl_cb_err(ctx->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &status);
+	nl_cb_set(ctx->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish,
+		  &status);
+	nl_cb_set(ctx->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_ack, &status);
+
+	if (cb)
+		nl_cb_set(ctx->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, ctx);
+
+	nl_send_auto_complete(ctx->nl_sock, ctx->nl_msg);
+
+	while (status == 1)
+		ovpn_nl_recvmsgs(ctx);
+
+	if (status < 0)
+		fprintf(stderr, "failed to send netlink message: %s (%d)\n",
+			strerror(-status), status);
+
+	return status;
+}
+
+static int ovpn_parse_key(const char *file, struct ovpn_ctx *ctx)
+{
+	int idx_enc, idx_dec, ret = -1;
+	unsigned char *ckey = NULL;
+	__u8 *bkey = NULL;
+	size_t olen = 0;
+	long ckey_len;
+	FILE *fp;
+
+	fp = fopen(file, "r");
+	if (!fp) {
+		fprintf(stderr, "cannot open: %s\n", file);
+		return -1;
+	}
+
+	/* get file size */
+	fseek(fp, 0L, SEEK_END);
+	ckey_len = ftell(fp);
+	rewind(fp);
+
+	/* if the file is longer, let's just read a portion */
+	if (ckey_len > 256)
+		ckey_len = 256;
+
+	ckey = malloc(ckey_len);
+	if (!ckey)
+		goto err;
+
+	ret = fread(ckey, 1, ckey_len, fp);
+	if (ret != ckey_len) {
+		fprintf(stderr,
+			"couldn't read enough data from key file: %dbytes read\n",
+			ret);
+		goto err;
+	}
+
+	olen = 0;
+	ret = mbedtls_base64_decode(NULL, 0, &olen, ckey, ckey_len);
+	if (ret != MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL) {
+		char buf[256];
+
+		mbedtls_strerror(ret, buf, sizeof(buf));
+		fprintf(stderr, "unexpected base64 error1: %s (%d)\n", buf,
+			ret);
+
+		goto err;
+	}
+
+	bkey = malloc(olen);
+	if (!bkey) {
+		fprintf(stderr, "cannot allocate binary key buffer\n");
+		goto err;
+	}
+
+	ret = mbedtls_base64_decode(bkey, olen, &olen, ckey, ckey_len);
+	if (ret) {
+		char buf[256];
+
+		mbedtls_strerror(ret, buf, sizeof(buf));
+		fprintf(stderr, "unexpected base64 error2: %s (%d)\n", buf,
+			ret);
+
+		goto err;
+	}
+
+	if (olen < 2 * KEY_LEN + NONCE_LEN) {
+		fprintf(stderr,
+			"not enough data in key file, found %zdB but needs %dB\n",
+			olen, 2 * KEY_LEN + NONCE_LEN);
+		goto err;
+	}
+
+	switch (ctx->key_dir) {
+	case KEY_DIR_IN:
+		idx_enc = 0;
+		idx_dec = 1;
+		break;
+	case KEY_DIR_OUT:
+		idx_enc = 1;
+		idx_dec = 0;
+		break;
+	default:
+		goto err;
+	}
+
+	memcpy(ctx->key_enc, bkey + KEY_LEN * idx_enc, KEY_LEN);
+	memcpy(ctx->key_dec, bkey + KEY_LEN * idx_dec, KEY_LEN);
+	memcpy(ctx->nonce, bkey + 2 * KEY_LEN, NONCE_LEN);
+
+	ret = 0;
+
+err:
+	fclose(fp);
+	free(bkey);
+	free(ckey);
+
+	return ret;
+}
+
+static int ovpn_parse_cipher(const char *cipher, struct ovpn_ctx *ctx)
+{
+	if (strcmp(cipher, "aes") == 0)
+		ctx->cipher = OVPN_CIPHER_ALG_AES_GCM;
+	else if (strcmp(cipher, "chachapoly") == 0)
+		ctx->cipher = OVPN_CIPHER_ALG_CHACHA20_POLY1305;
+	else if (strcmp(cipher, "none") == 0)
+		ctx->cipher = OVPN_CIPHER_ALG_NONE;
+	else
+		return -ENOTSUP;
+
+	return 0;
+}
+
+static int ovpn_parse_key_direction(const char *dir, struct ovpn_ctx *ctx)
+{
+	int in_dir;
+
+	in_dir = strtoll(dir, NULL, 10);
+	switch (in_dir) {
+	case KEY_DIR_IN:
+	case KEY_DIR_OUT:
+		ctx->key_dir = in_dir;
+		break;
+	default:
+		fprintf(stderr,
+			"invalid key direction provided. Can be 0 or 1 only\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto)
+{
+	struct sockaddr_storage local_sock = { 0 };
+	struct sockaddr_in6 *in6;
+	struct sockaddr_in *in;
+	int ret, s, sock_type;
+	size_t sock_len;
+
+	if (proto == IPPROTO_UDP)
+		sock_type = SOCK_DGRAM;
+	else if (proto == IPPROTO_TCP)
+		sock_type = SOCK_STREAM;
+	else
+		return -EINVAL;
+
+	s = socket(family, sock_type, 0);
+	if (s < 0) {
+		perror("cannot create socket");
+		return -1;
+	}
+
+	switch (family) {
+	case AF_INET:
+		in = (struct sockaddr_in *)&local_sock;
+		in->sin_family = family;
+		in->sin_port = htons(ctx->lport);
+		in->sin_addr.s_addr = htonl(INADDR_ANY);
+		sock_len = sizeof(*in);
+		break;
+	case AF_INET6:
+		in6 = (struct sockaddr_in6 *)&local_sock;
+		in6->sin6_family = family;
+		in6->sin6_port = htons(ctx->lport);
+		in6->sin6_addr = in6addr_any;
+		sock_len = sizeof(*in6);
+		break;
+	default:
+		return -1;
+	}
+
+	int opt = 1;
+
+	ret = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+	if (ret < 0) {
+		perror("setsockopt for SO_REUSEADDR");
+		return ret;
+	}
+
+	ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+	if (ret < 0) {
+		perror("setsockopt for SO_REUSEPORT");
+		return ret;
+	}
+
+	if (ctx->mark != 0) {
+		ret = setsockopt(s, SOL_SOCKET, SO_MARK, (void *)&ctx->mark,
+				 sizeof(ctx->mark));
+		if (ret < 0) {
+			perror("setsockopt for SO_MARK");
+			return ret;
+		}
+	}
+
+	if (family == AF_INET6) {
+		opt = 0;
+		if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt,
+			       sizeof(opt))) {
+			perror("failed to set IPV6_V6ONLY");
+			return -1;
+		}
+	}
+
+	ret = bind(s, (struct sockaddr *)&local_sock, sock_len);
+	if (ret < 0) {
+		perror("cannot bind socket");
+		goto err_socket;
+	}
+
+	ctx->socket = s;
+	ctx->sa_family = family;
+	return 0;
+
+err_socket:
+	close(s);
+	return -1;
+}
+
+static int ovpn_udp_socket(struct ovpn_ctx *ctx, sa_family_t family)
+{
+	return ovpn_socket(ctx, family, IPPROTO_UDP);
+}
+
+static int ovpn_listen(struct ovpn_ctx *ctx, sa_family_t family)
+{
+	int ret;
+
+	ret = ovpn_socket(ctx, family, IPPROTO_TCP);
+	if (ret < 0)
+		return ret;
+
+	ret = listen(ctx->socket, 10);
+	if (ret < 0) {
+		perror("listen");
+		close(ctx->socket);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int ovpn_accept(struct ovpn_ctx *ctx)
+{
+	socklen_t socklen;
+	int ret;
+
+	socklen = sizeof(ctx->remote);
+	ret = accept(ctx->socket, (struct sockaddr *)&ctx->remote, &socklen);
+	if (ret < 0) {
+		perror("accept");
+		goto err;
+	}
+
+	fprintf(stderr, "Connection received!\n");
+
+	switch (socklen) {
+	case sizeof(struct sockaddr_in):
+	case sizeof(struct sockaddr_in6):
+		break;
+	default:
+		fprintf(stderr, "error: expecting IPv4 or IPv6 connection\n");
+		close(ret);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	return ret;
+err:
+	close(ctx->socket);
+	return ret;
+}
+
+static int ovpn_connect(struct ovpn_ctx *ovpn)
+{
+	socklen_t socklen;
+	int s, ret;
+
+	s = socket(ovpn->remote.in4.sin_family, SOCK_STREAM, 0);
+	if (s < 0) {
+		perror("cannot create socket");
+		return -1;
+	}
+
+	switch (ovpn->remote.in4.sin_family) {
+	case AF_INET:
+		socklen = sizeof(struct sockaddr_in);
+		break;
+	case AF_INET6:
+		socklen = sizeof(struct sockaddr_in6);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	ret = connect(s, (struct sockaddr *)&ovpn->remote, socklen);
+	if (ret < 0) {
+		perror("connect");
+		goto err;
+	}
+
+	fprintf(stderr, "connected\n");
+
+	ovpn->socket = s;
+
+	return 0;
+err:
+	close(s);
+	return ret;
+}
+
+static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp)
+{
+	struct nlattr *attr;
+	struct nl_ctx *ctx;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_NEW);
+	if (!ctx)
+		return -ENOMEM;
+
+	attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+	if (ovpn->asymm_id)
+		NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_TX_ID, ovpn->tx_id);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket);
+
+	if (!is_tcp) {
+		switch (ovpn->remote.in4.sin_family) {
+		case AF_INET:
+			NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV4,
+				    ovpn->remote.in4.sin_addr.s_addr);
+			NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+				    ovpn->remote.in4.sin_port);
+			break;
+		case AF_INET6:
+			NLA_PUT(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV6,
+				sizeof(ovpn->remote.in6.sin6_addr),
+				&ovpn->remote.in6.sin6_addr);
+			NLA_PUT_U32(ctx->nl_msg,
+				    OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID,
+				    ovpn->remote.in6.sin6_scope_id);
+			NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+				    ovpn->remote.in6.sin6_port);
+			break;
+		default:
+			fprintf(stderr,
+				"Invalid family for remote socket address\n");
+			goto nla_put_failure;
+		}
+	}
+
+	if (ovpn->peer_ip_set) {
+		switch (ovpn->peer_ip.in4.sin_family) {
+		case AF_INET:
+			NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_VPN_IPV4,
+				    ovpn->peer_ip.in4.sin_addr.s_addr);
+			break;
+		case AF_INET6:
+			NLA_PUT(ctx->nl_msg, OVPN_A_PEER_VPN_IPV6,
+				sizeof(struct in6_addr),
+				&ovpn->peer_ip.in6.sin6_addr);
+			break;
+		default:
+			fprintf(stderr, "Invalid family for peer address\n");
+			goto nla_put_failure;
+		}
+	}
+
+	nla_nest_end(ctx->nl_msg, attr);
+
+	ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_set_peer(struct ovpn_ctx *ovpn)
+{
+	struct nlattr *attr;
+	struct nl_ctx *ctx;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_SET);
+	if (!ctx)
+		return -ENOMEM;
+
+	attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_INTERVAL,
+		    ovpn->keepalive_interval);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_TIMEOUT,
+		    ovpn->keepalive_timeout);
+	nla_nest_end(ctx->nl_msg, attr);
+
+	ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_del_peer(struct ovpn_ctx *ovpn)
+{
+	struct nlattr *attr;
+	struct nl_ctx *ctx;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_DEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+	nla_nest_end(ctx->nl_msg, attr);
+
+	ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused)
+{
+	struct nlattr *pattrs[OVPN_A_PEER_MAX + 1];
+	struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+	struct nlattr *attrs[OVPN_A_MAX + 1];
+	__u16 rport = 0, lport = 0;
+
+	nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+		  genlmsg_attrlen(gnlh, 0), NULL);
+
+	if (!attrs[OVPN_A_PEER]) {
+		fprintf(stderr, "no packet content in netlink message\n");
+		return NL_SKIP;
+	}
+
+	nla_parse(pattrs, OVPN_A_PEER_MAX, nla_data(attrs[OVPN_A_PEER]),
+		  nla_len(attrs[OVPN_A_PEER]), NULL);
+
+	if (pattrs[OVPN_A_PEER_ID])
+		fprintf(stderr, "* Peer %u\n",
+			nla_get_u32(pattrs[OVPN_A_PEER_ID]));
+
+	if (pattrs[OVPN_A_PEER_TX_ID])
+		fprintf(stderr, "\tTX peer ID %u\n",
+			nla_get_u32(pattrs[OVPN_A_PEER_TX_ID]));
+
+	if (pattrs[OVPN_A_PEER_SOCKET_NETNSID])
+		fprintf(stderr, "\tsocket NetNS ID: %d\n",
+			nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID]));
+
+	if (pattrs[OVPN_A_PEER_VPN_IPV4]) {
+		char buf[INET_ADDRSTRLEN];
+
+		inet_ntop(AF_INET, nla_data(pattrs[OVPN_A_PEER_VPN_IPV4]),
+			  buf, sizeof(buf));
+		fprintf(stderr, "\tVPN IPv4: %s\n", buf);
+	}
+
+	if (pattrs[OVPN_A_PEER_VPN_IPV6]) {
+		char buf[INET6_ADDRSTRLEN];
+
+		inet_ntop(AF_INET6, nla_data(pattrs[OVPN_A_PEER_VPN_IPV6]),
+			  buf, sizeof(buf));
+		fprintf(stderr, "\tVPN IPv6: %s\n", buf);
+	}
+
+	if (pattrs[OVPN_A_PEER_LOCAL_PORT])
+		lport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_LOCAL_PORT]));
+
+	if (pattrs[OVPN_A_PEER_REMOTE_PORT])
+		rport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_REMOTE_PORT]));
+
+	if (pattrs[OVPN_A_PEER_REMOTE_IPV6]) {
+		void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV6];
+		char buf[INET6_ADDRSTRLEN];
+		int scope_id = -1;
+
+		if (pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) {
+			void *p = pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID];
+
+			scope_id = nla_get_u32(p);
+		}
+
+		inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+		fprintf(stderr, "\tRemote: %s:%hu (scope-id: %u)\n", buf, rport,
+			scope_id);
+
+		if (pattrs[OVPN_A_PEER_LOCAL_IPV6]) {
+			void *ip = pattrs[OVPN_A_PEER_LOCAL_IPV6];
+
+			inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+			fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+		}
+	}
+
+	if (pattrs[OVPN_A_PEER_REMOTE_IPV4]) {
+		void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV4];
+		char buf[INET_ADDRSTRLEN];
+
+		inet_ntop(AF_INET, nla_data(ip), buf, sizeof(buf));
+		fprintf(stderr, "\tRemote: %s:%hu\n", buf, rport);
+
+		if (pattrs[OVPN_A_PEER_LOCAL_IPV4]) {
+			void *p = pattrs[OVPN_A_PEER_LOCAL_IPV4];
+
+			inet_ntop(AF_INET, nla_data(p), buf, sizeof(buf));
+			fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+		}
+	}
+
+	if (pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]) {
+		void *p = pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL];
+
+		fprintf(stderr, "\tKeepalive interval: %u sec\n",
+			nla_get_u32(p));
+	}
+
+	if (pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])
+		fprintf(stderr, "\tKeepalive timeout: %u sec\n",
+			nla_get_u32(pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]));
+
+	if (pattrs[OVPN_A_PEER_VPN_RX_BYTES])
+		fprintf(stderr, "\tVPN RX bytes: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_BYTES]));
+
+	if (pattrs[OVPN_A_PEER_VPN_TX_BYTES])
+		fprintf(stderr, "\tVPN TX bytes: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_BYTES]));
+
+	if (pattrs[OVPN_A_PEER_VPN_RX_PACKETS])
+		fprintf(stderr, "\tVPN RX packets: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_PACKETS]));
+
+	if (pattrs[OVPN_A_PEER_VPN_TX_PACKETS])
+		fprintf(stderr, "\tVPN TX packets: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_PACKETS]));
+
+	if (pattrs[OVPN_A_PEER_LINK_RX_BYTES])
+		fprintf(stderr, "\tLINK RX bytes: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_BYTES]));
+
+	if (pattrs[OVPN_A_PEER_LINK_TX_BYTES])
+		fprintf(stderr, "\tLINK TX bytes: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_BYTES]));
+
+	if (pattrs[OVPN_A_PEER_LINK_RX_PACKETS])
+		fprintf(stderr, "\tLINK RX packets: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_PACKETS]));
+
+	if (pattrs[OVPN_A_PEER_LINK_TX_PACKETS])
+		fprintf(stderr, "\tLINK TX packets: %" PRIu64 "\n",
+			ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_PACKETS]));
+
+	return NL_SKIP;
+}
+
+static int ovpn_get_peer(struct ovpn_ctx *ovpn)
+{
+	int flags = 0, ret = -1;
+	struct nlattr *attr;
+	struct nl_ctx *ctx;
+
+	if (ovpn->peer_id == PEER_ID_UNDEF)
+		flags = NLM_F_DUMP;
+
+	ctx = nl_ctx_alloc_flags(ovpn, OVPN_CMD_PEER_GET, flags);
+	if (!ctx)
+		return -ENOMEM;
+
+	if (ovpn->peer_id != PEER_ID_UNDEF) {
+		attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+		NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+		nla_nest_end(ctx->nl_msg, attr);
+	}
+
+	ret = ovpn_nl_msg_send(ctx, ovpn_handle_peer);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_new_key(struct ovpn_ctx *ovpn)
+{
+	struct nlattr *keyconf, *key_dir;
+	struct nl_ctx *ctx;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_NEW);
+	if (!ctx)
+		return -ENOMEM;
+
+	keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_KEY_ID, ovpn->key_id);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_CIPHER_ALG, ovpn->cipher);
+
+	key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_ENCRYPT_DIR);
+	NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_enc);
+	NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+	nla_nest_end(ctx->nl_msg, key_dir);
+
+	key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_DECRYPT_DIR);
+	NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_dec);
+	NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+	nla_nest_end(ctx->nl_msg, key_dir);
+
+	nla_nest_end(ctx->nl_msg, keyconf);
+
+	ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_del_key(struct ovpn_ctx *ovpn)
+{
+	struct nlattr *keyconf;
+	struct nl_ctx *ctx;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_DEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+	nla_nest_end(ctx->nl_msg, keyconf);
+
+	ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_handle_key(struct nl_msg *msg, void (*arg)__always_unused)
+{
+	struct nlattr *kattrs[OVPN_A_KEYCONF_MAX + 1];
+	struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+	struct nlattr *attrs[OVPN_A_MAX + 1];
+
+	nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+		  genlmsg_attrlen(gnlh, 0), NULL);
+
+	if (!attrs[OVPN_A_KEYCONF]) {
+		fprintf(stderr, "no packet content in netlink message\n");
+		return NL_SKIP;
+	}
+
+	nla_parse(kattrs, OVPN_A_KEYCONF_MAX, nla_data(attrs[OVPN_A_KEYCONF]),
+		  nla_len(attrs[OVPN_A_KEYCONF]), NULL);
+
+	if (kattrs[OVPN_A_KEYCONF_PEER_ID])
+		fprintf(stderr, "* Peer %u\n",
+			nla_get_u32(kattrs[OVPN_A_KEYCONF_PEER_ID]));
+	if (kattrs[OVPN_A_KEYCONF_SLOT]) {
+		fprintf(stderr, "\t- Slot: ");
+		switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])) {
+		case OVPN_KEY_SLOT_PRIMARY:
+			fprintf(stderr, "primary\n");
+			break;
+		case OVPN_KEY_SLOT_SECONDARY:
+			fprintf(stderr, "secondary\n");
+			break;
+		default:
+			fprintf(stderr, "invalid (%u)\n",
+				nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT]));
+			break;
+		}
+	}
+	if (kattrs[OVPN_A_KEYCONF_KEY_ID])
+		fprintf(stderr, "\t- Key ID: %u\n",
+			nla_get_u32(kattrs[OVPN_A_KEYCONF_KEY_ID]));
+	if (kattrs[OVPN_A_KEYCONF_CIPHER_ALG]) {
+		fprintf(stderr, "\t- Cipher: ");
+		switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])) {
+		case OVPN_CIPHER_ALG_NONE:
+			fprintf(stderr, "none\n");
+			break;
+		case OVPN_CIPHER_ALG_AES_GCM:
+			fprintf(stderr, "aes-gcm\n");
+			break;
+		case OVPN_CIPHER_ALG_CHACHA20_POLY1305:
+			fprintf(stderr, "chacha20poly1305\n");
+			break;
+		default:
+			fprintf(stderr, "invalid (%u)\n",
+				nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG]));
+			break;
+		}
+	}
+
+	return NL_SKIP;
+}
+
+static int ovpn_get_key(struct ovpn_ctx *ovpn)
+{
+	struct nlattr *keyconf;
+	struct nl_ctx *ctx;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_GET);
+	if (!ctx)
+		return -ENOMEM;
+
+	keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+	nla_nest_end(ctx->nl_msg, keyconf);
+
+	ret = ovpn_nl_msg_send(ctx, ovpn_handle_key);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+static int ovpn_swap_keys(struct ovpn_ctx *ovpn)
+{
+	struct nl_ctx *ctx;
+	struct nlattr *kc;
+	int ret = -1;
+
+	ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_SWAP);
+	if (!ctx)
+		return -ENOMEM;
+
+	kc = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+	NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+	nla_nest_end(ctx->nl_msg, kc);
+
+	ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+	nl_ctx_free(ctx);
+	return ret;
+}
+
+/* Helper function used to easily add attributes to a rtnl message */
+static int ovpn_addattr(struct nlmsghdr *n, int maxlen, int type,
+			const void *data, int alen)
+{
+	int len = RTA_LENGTH(alen);
+	struct rtattr *rta;
+
+	if ((int)(NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len)) > maxlen)	{
+		fprintf(stderr, "%s: rtnl: message exceeded bound of %d\n",
+			__func__, maxlen);
+		return -EMSGSIZE;
+	}
+
+	rta = nlmsg_tail(n);
+	rta->rta_type = type;
+	rta->rta_len = len;
+
+	if (!data)
+		memset(RTA_DATA(rta), 0, alen);
+	else
+		memcpy(RTA_DATA(rta), data, alen);
+
+	n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+
+	return 0;
+}
+
+static struct rtattr *ovpn_nest_start(struct nlmsghdr *msg, size_t max_size,
+				      int attr)
+{
+	struct rtattr *nest = nlmsg_tail(msg);
+
+	if (ovpn_addattr(msg, max_size, attr, NULL, 0) < 0)
+		return NULL;
+
+	return nest;
+}
+
+static void ovpn_nest_end(struct nlmsghdr *msg, struct rtattr *nest)
+{
+	nest->rta_len = (uint8_t *)nlmsg_tail(msg) - (uint8_t *)nest;
+}
+
+#define RT_SNDBUF_SIZE (1024 * 2)
+#define RT_RCVBUF_SIZE (1024 * 4)
+
+/* Open RTNL socket */
+static int ovpn_rt_socket(void)
+{
+	int sndbuf = RT_SNDBUF_SIZE, rcvbuf = RT_RCVBUF_SIZE, fd;
+
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (fd < 0) {
+		fprintf(stderr, "%s: cannot open netlink socket\n", __func__);
+		return fd;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf,
+		       sizeof(sndbuf)) < 0) {
+		fprintf(stderr, "%s: SO_SNDBUF\n", __func__);
+		close(fd);
+		return -1;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf,
+		       sizeof(rcvbuf)) < 0) {
+		fprintf(stderr, "%s: SO_RCVBUF\n", __func__);
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
+/* Bind socket to Netlink subsystem */
+static int ovpn_rt_bind(int fd, uint32_t groups)
+{
+	struct sockaddr_nl local = { 0 };
+	socklen_t addr_len;
+
+	local.nl_family = AF_NETLINK;
+	local.nl_groups = groups;
+
+	if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
+		fprintf(stderr, "%s: cannot bind netlink socket: %d\n",
+			__func__, errno);
+		return -errno;
+	}
+
+	addr_len = sizeof(local);
+	if (getsockname(fd, (struct sockaddr *)&local, &addr_len) < 0) {
+		fprintf(stderr, "%s: cannot getsockname: %d\n", __func__,
+			errno);
+		return -errno;
+	}
+
+	if (addr_len != sizeof(local)) {
+		fprintf(stderr, "%s: wrong address length %d\n", __func__,
+			addr_len);
+		return -EINVAL;
+	}
+
+	if (local.nl_family != AF_NETLINK) {
+		fprintf(stderr, "%s: wrong address family %d\n", __func__,
+			local.nl_family);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+typedef int (*ovpn_parse_reply_cb)(struct nlmsghdr *msg, void *arg);
+
+/* Send Netlink message and run callback on reply (if specified) */
+static int ovpn_rt_send(struct nlmsghdr *payload, pid_t peer,
+			unsigned int groups, ovpn_parse_reply_cb cb,
+			void *arg_cb)
+{
+	int len, rem_len, fd, ret, rcv_len;
+	struct sockaddr_nl nladdr = { 0 };
+	struct nlmsgerr *err;
+	struct nlmsghdr *h;
+	char buf[1024 * 16];
+	struct iovec iov = {
+		.iov_base = payload,
+		.iov_len = payload->nlmsg_len,
+	};
+	struct msghdr nlmsg = {
+		.msg_name = &nladdr,
+		.msg_namelen = sizeof(nladdr),
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+	};
+
+	nladdr.nl_family = AF_NETLINK;
+	nladdr.nl_pid = peer;
+	nladdr.nl_groups = groups;
+
+	payload->nlmsg_seq = time(NULL);
+
+	/* no need to send reply */
+	if (!cb)
+		payload->nlmsg_flags |= NLM_F_ACK;
+
+	fd = ovpn_rt_socket();
+	if (fd < 0) {
+		fprintf(stderr, "%s: can't open rtnl socket\n", __func__);
+		return -errno;
+	}
+
+	ret = ovpn_rt_bind(fd, 0);
+	if (ret < 0) {
+		fprintf(stderr, "%s: can't bind rtnl socket\n", __func__);
+		ret = -errno;
+		goto out;
+	}
+
+	ret = sendmsg(fd, &nlmsg, 0);
+	if (ret < 0) {
+		fprintf(stderr, "%s: rtnl: error on sendmsg()\n", __func__);
+		ret = -errno;
+		goto out;
+	}
+
+	/* prepare buffer to store RTNL replies */
+	memset(buf, 0, sizeof(buf));
+	iov.iov_base = buf;
+
+	while (1) {
+		/*
+		 * iov_len is modified by recvmsg(), therefore has to be initialized before
+		 * using it again
+		 */
+		iov.iov_len = sizeof(buf);
+		rcv_len = recvmsg(fd, &nlmsg, 0);
+		if (rcv_len < 0) {
+			if (errno == EINTR || errno == EAGAIN) {
+				fprintf(stderr, "%s: interrupted call\n",
+					__func__);
+				continue;
+			}
+			fprintf(stderr, "%s: rtnl: error on recvmsg()\n",
+				__func__);
+			ret = -errno;
+			goto out;
+		}
+
+		if (rcv_len == 0) {
+			fprintf(stderr,
+				"%s: rtnl: socket reached unexpected EOF\n",
+				__func__);
+			ret = -EIO;
+			goto out;
+		}
+
+		if (nlmsg.msg_namelen != sizeof(nladdr)) {
+			fprintf(stderr,
+				"%s: sender address length: %u (expected %zu)\n",
+				__func__, nlmsg.msg_namelen, sizeof(nladdr));
+			ret = -EIO;
+			goto out;
+		}
+
+		h = (struct nlmsghdr *)buf;
+		while (rcv_len >= (int)sizeof(*h)) {
+			len = h->nlmsg_len;
+			rem_len = len - sizeof(*h);
+
+			if (rem_len < 0 || len > rcv_len) {
+				if (nlmsg.msg_flags & MSG_TRUNC) {
+					fprintf(stderr, "%s: truncated message\n",
+						__func__);
+					ret = -EIO;
+					goto out;
+				}
+				fprintf(stderr, "%s: malformed message: len=%d\n",
+					__func__, len);
+				ret = -EIO;
+				goto out;
+			}
+
+			if (h->nlmsg_type == NLMSG_DONE) {
+				ret = 0;
+				goto out;
+			}
+
+			if (h->nlmsg_type == NLMSG_ERROR) {
+				err = (struct nlmsgerr *)NLMSG_DATA(h);
+				if (rem_len < (int)sizeof(struct nlmsgerr)) {
+					fprintf(stderr, "%s: ERROR truncated\n",
+						__func__);
+					ret = -EIO;
+					goto out;
+				}
+
+				if (err->error) {
+					fprintf(stderr, "%s: (%d) %s\n",
+						__func__, err->error,
+						strerror(-err->error));
+					ret = err->error;
+					goto out;
+				}
+
+				ret = 0;
+				if (cb)	{
+					int r = cb(h, arg_cb);
+
+					if (r <= 0)
+						ret = r;
+				}
+				goto out;
+			}
+
+			if (cb) {
+				int r = cb(h, arg_cb);
+
+				if (r <= 0) {
+					ret = r;
+					goto out;
+				}
+			} else {
+				fprintf(stderr, "%s: RTNL: unexpected reply\n",
+					__func__);
+			}
+
+			rcv_len -= NLMSG_ALIGN(len);
+			h = (struct nlmsghdr *)((uint8_t *)h +
+						NLMSG_ALIGN(len));
+		}
+
+		if (nlmsg.msg_flags & MSG_TRUNC) {
+			fprintf(stderr, "%s: message truncated\n", __func__);
+			continue;
+		}
+
+		if (rcv_len) {
+			fprintf(stderr, "%s: rtnl: %d not parsed bytes\n",
+				__func__, rcv_len);
+			ret = -1;
+			goto out;
+		}
+	}
+out:
+	close(fd);
+
+	return ret;
+}
+
+struct ovpn_link_req {
+	struct nlmsghdr n;
+	struct ifinfomsg i;
+	char buf[256];
+};
+
+static int ovpn_new_iface(struct ovpn_ctx *ovpn)
+{
+	struct rtattr *linkinfo, *data;
+	struct ovpn_link_req req = { 0 };
+	int ret = -1;
+
+	fprintf(stdout, "Creating interface %s with mode %u\n", ovpn->ifname,
+		ovpn->mode);
+
+	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+	req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+	req.n.nlmsg_type = RTM_NEWLINK;
+
+	if (ovpn_addattr(&req.n, sizeof(req), IFLA_IFNAME, ovpn->ifname,
+			 strlen(ovpn->ifname) + 1) < 0)
+		goto err;
+
+	linkinfo = ovpn_nest_start(&req.n, sizeof(req), IFLA_LINKINFO);
+	if (!linkinfo)
+		goto err;
+
+	if (ovpn_addattr(&req.n, sizeof(req), IFLA_INFO_KIND, OVPN_FAMILY_NAME,
+			 strlen(OVPN_FAMILY_NAME) + 1) < 0)
+		goto err;
+
+	if (ovpn->mode_set) {
+		data = ovpn_nest_start(&req.n, sizeof(req), IFLA_INFO_DATA);
+		if (!data)
+			goto err;
+
+		if (ovpn_addattr(&req.n, sizeof(req), IFLA_OVPN_MODE,
+				 &ovpn->mode, sizeof(uint8_t)) < 0)
+			goto err;
+
+		ovpn_nest_end(&req.n, data);
+	}
+
+	ovpn_nest_end(&req.n, linkinfo);
+
+	req.i.ifi_family = AF_PACKET;
+
+	ret = ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+err:
+	return ret;
+}
+
+static int ovpn_del_iface(struct ovpn_ctx *ovpn)
+{
+	struct ovpn_link_req req = { 0 };
+
+	fprintf(stdout, "Deleting interface %s ifindex %u\n", ovpn->ifname,
+		ovpn->ifindex);
+
+	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+	req.n.nlmsg_flags = NLM_F_REQUEST;
+	req.n.nlmsg_type = RTM_DELLINK;
+
+	req.i.ifi_family = AF_PACKET;
+	req.i.ifi_index = ovpn->ifindex;
+
+	return ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+}
+
+static int nl_seq_check(struct nl_msg (*msg)__always_unused,
+			void (*arg)__always_unused)
+{
+	return NL_OK;
+}
+
+struct mcast_handler_args {
+	const char *group;
+	int id;
+};
+
+static int mcast_family_handler(struct nl_msg *msg, void *arg)
+{
+	struct mcast_handler_args *grp = arg;
+	struct nlattr *tb[CTRL_ATTR_MAX + 1];
+	struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+	struct nlattr *mcgrp;
+	int rem_mcgrp;
+
+	nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+		  genlmsg_attrlen(gnlh, 0), NULL);
+
+	if (!tb[CTRL_ATTR_MCAST_GROUPS])
+		return NL_SKIP;
+
+	nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) {
+		struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1];
+
+		nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX,
+			  nla_data(mcgrp), nla_len(mcgrp), NULL);
+
+		if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] ||
+		    !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID])
+			continue;
+		if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]),
+			    grp->group, nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME])))
+			continue;
+		grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]);
+		break;
+	}
+
+	return NL_SKIP;
+}
+
+static int mcast_error_handler(struct sockaddr_nl (*nla)__always_unused,
+			       struct nlmsgerr *err, void *arg)
+{
+	int *ret = arg;
+
+	*ret = err->error;
+	return NL_STOP;
+}
+
+static int mcast_ack_handler(struct nl_msg (*msg)__always_unused, void *arg)
+{
+	int *ret = arg;
+
+	*ret = 0;
+	return NL_STOP;
+}
+
+static int ovpn_handle_msg(struct nl_msg *msg, void *arg)
+{
+	struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+	struct nlattr *attrs[OVPN_A_MAX + 1];
+	struct nlmsghdr *nlh = nlmsg_hdr(msg);
+	char ifname[IF_NAMESIZE];
+	int *ret = arg;
+	__u32 ifindex;
+
+	fprintf(stderr, "received message from ovpn-dco\n");
+
+	*ret = -1;
+
+	if (!genlmsg_valid_hdr(nlh, 0)) {
+		fprintf(stderr, "invalid header\n");
+		return NL_STOP;
+	}
+
+	if (nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+		      genlmsg_attrlen(gnlh, 0), NULL)) {
+		fprintf(stderr, "received bogus data from ovpn-dco\n");
+		return NL_STOP;
+	}
+
+	if (!attrs[OVPN_A_IFINDEX]) {
+		fprintf(stderr, "no ifindex in this message\n");
+		return NL_STOP;
+	}
+
+	ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]);
+	if (!if_indextoname(ifindex, ifname)) {
+		fprintf(stderr, "cannot resolve ifname for ifindex: %u\n",
+			ifindex);
+		return NL_STOP;
+	}
+
+	switch (gnlh->cmd) {
+	case OVPN_CMD_PEER_DEL_NTF:
+		fprintf(stdout, "received CMD_PEER_DEL_NTF\n");
+		break;
+	case OVPN_CMD_PEER_FLOAT_NTF:
+		fprintf(stdout, "received CMD_PEER_FLOAT_NTF\n");
+		break;
+	case OVPN_CMD_KEY_SWAP_NTF:
+		fprintf(stdout, "received CMD_KEY_SWAP_NTF\n");
+		break;
+	default:
+		fprintf(stderr, "received unknown command: %d\n", gnlh->cmd);
+		return NL_STOP;
+	}
+
+	*ret = 0;
+	return NL_OK;
+}
+
+static int ovpn_get_mcast_id(struct nl_sock *sock, const char *family,
+			     const char *group)
+{
+	struct nl_msg *msg;
+	struct nl_cb *cb;
+	int ret, ctrlid;
+	struct mcast_handler_args grp = {
+		.group = group,
+		.id = -ENOENT,
+	};
+
+	msg = nlmsg_alloc();
+	if (!msg)
+		return -ENOMEM;
+
+	cb = nl_cb_alloc(NL_CB_DEFAULT);
+	if (!cb) {
+		ret = -ENOMEM;
+		goto out_fail_cb;
+	}
+
+	ctrlid = genl_ctrl_resolve(sock, "nlctrl");
+
+	genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0);
+
+	ret = -ENOBUFS;
+	NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+	ret = nl_send_auto_complete(sock, msg);
+	if (ret < 0)
+		goto nla_put_failure;
+
+	ret = 1;
+
+	nl_cb_err(cb, NL_CB_CUSTOM, mcast_error_handler, &ret);
+	nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, mcast_ack_handler, &ret);
+	nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, mcast_family_handler, &grp);
+
+	while (ret > 0)
+		nl_recvmsgs(sock, cb);
+
+	if (ret == 0)
+		ret = grp.id;
+ nla_put_failure:
+	nl_cb_put(cb);
+ out_fail_cb:
+	nlmsg_free(msg);
+	return ret;
+}
+
+static int ovpn_listen_mcast(void)
+{
+	struct nl_sock *sock;
+	struct nl_cb *cb;
+	int mcid, ret;
+
+	sock = nl_socket_alloc();
+	if (!sock) {
+		fprintf(stderr, "cannot allocate netlink socket\n");
+		ret = -ENOMEM;
+		goto err_free;
+	}
+
+	nl_socket_set_buffer_size(sock, 8192, 8192);
+
+	ret = genl_connect(sock);
+	if (ret < 0) {
+		fprintf(stderr, "cannot connect to generic netlink: %s\n",
+			nl_geterror(ret));
+		goto err_free;
+	}
+
+	mcid = ovpn_get_mcast_id(sock, OVPN_FAMILY_NAME, OVPN_MCGRP_PEERS);
+	if (mcid < 0) {
+		fprintf(stderr, "cannot get mcast group: %s\n",
+			nl_geterror(mcid));
+		goto err_free;
+	}
+
+	ret = nl_socket_add_membership(sock, mcid);
+	if (ret) {
+		fprintf(stderr, "failed to join mcast group: %d\n", ret);
+		goto err_free;
+	}
+
+	ret = 1;
+	cb = nl_cb_alloc(NL_CB_DEFAULT);
+	nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check, NULL);
+	nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, &ret);
+	nl_cb_err(cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &ret);
+
+	while (ret == 1) {
+		int err = nl_recvmsgs(sock, cb);
+
+		if (err < 0) {
+			fprintf(stderr,
+				"cannot receive netlink message: (%d) %s\n",
+				err, nl_geterror(-err));
+			ret = -1;
+			break;
+		}
+	}
+
+	nl_cb_put(cb);
+err_free:
+	nl_socket_free(sock);
+	return ret;
+}
+
+static void usage(const char *cmd)
+{
+	fprintf(stderr,
+		"Usage %s <command> <iface> [arguments..]\n",
+		cmd);
+	fprintf(stderr, "where <command> can be one of the following\n\n");
+
+	fprintf(stderr, "* new_iface <iface> [mode]: create new ovpn interface\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tmode:\n");
+	fprintf(stderr, "\t\t- P2P for peer-to-peer mode (i.e. client)\n");
+	fprintf(stderr, "\t\t- MP for multi-peer mode (i.e. server)\n");
+
+	fprintf(stderr, "* del_iface <iface>: delete ovpn interface\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+
+	fprintf(stderr,
+		"* listen <iface> <lport> <id_type> <peers_file> [ipv6]: listen for incoming peer TCP connections\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tlport: TCP port to listen to\n");
+	fprintf(stderr, "\tid_type:\n");
+	fprintf(stderr,
+		"\t\t- SYMM for ignoring the TX peer ID from the peers_file\n");
+	fprintf(stderr,
+		"\t\t- ASYMM for using the TX peer ID from the peers_file\n");
+	fprintf(stderr,
+		"\tpeers_file: file containing one peer per line: Line format:\n");
+	fprintf(stderr, "\t\t<peer_id> <tx_id> <vpnaddr>\n");
+	fprintf(stderr,
+		"\tipv6: whether the socket should listen to the IPv6 wildcard address\n");
+
+	fprintf(stderr,
+		"* connect <iface> <peer_id> <tx_id> <raddr> <rport> [key_file]: start connecting peer of TCP-based VPN session\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr,
+		"\tpeer_id: peer ID found in data packets received from this peer\n");
+	fprintf(stderr,
+		"\ttx_id: peer ID to be used when sending to this peer, 'none' for symmetric peer ID\n");
+	fprintf(stderr, "\traddr: peer IP address to connect to\n");
+	fprintf(stderr, "\trport: peer TCP port to connect to\n");
+	fprintf(stderr,
+		"\tkey_file: file containing the symmetric key for encryption\n");
+
+	fprintf(stderr,
+		"* new_peer <iface> <peer_id> <tx_id> <lport> <raddr> <rport> [vpnaddr]: add new peer\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr,
+		"\tpeer_id: peer ID found in data packets received from this peer\n");
+	fprintf(stderr,
+		"\ttx_id: peer ID to be used when sending to this peer, 'none' for symmetric peer ID\n");
+	fprintf(stderr, "\tlport: local UDP port to bind to\n");
+	fprintf(stderr, "\traddr: peer IP address\n");
+	fprintf(stderr, "\trport: peer UDP port\n");
+	fprintf(stderr, "\tvpnaddr: peer VPN IP\n");
+
+	fprintf(stderr,
+		"* new_multi_peer <iface> <lport> <id_type> <peers_file> [mark]: add multiple peers as listed in the file\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tlport: local UDP port to bind to\n");
+	fprintf(stderr, "\tid_type:\n");
+	fprintf(stderr,
+		"\t\t- SYMM for ignoring the TX peer ID from the peers_file\n");
+	fprintf(stderr,
+		"\t\t- ASYMM for using the TX peer ID from the peers_file\n");
+	fprintf(stderr,
+		"\tpeers_file: text file containing one peer per line. Line format:\n");
+	fprintf(stderr,
+		"\t\t<peer_id> <tx_id> <raddr> <rport> <laddr> <lport> <vpnaddr>\n");
+	fprintf(stderr, "\tmark: socket FW mark value\n");
+
+	fprintf(stderr,
+		"* set_peer <iface> <peer_id> <keepalive_interval> <keepalive_timeout>: set peer attributes\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+	fprintf(stderr,
+		"\tkeepalive_interval: interval for sending ping messages\n");
+	fprintf(stderr,
+		"\tkeepalive_timeout: time after which a peer is timed out\n");
+
+	fprintf(stderr, "* del_peer <iface> <peer_id>: delete peer\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tpeer_id: peer ID of the peer to delete\n");
+
+	fprintf(stderr, "* get_peer <iface> [peer_id]: retrieve peer(s) status\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr,
+		"\tpeer_id: peer ID of the peer to query. All peers are returned if omitted\n");
+
+	fprintf(stderr,
+		"* new_key <iface> <peer_id> <slot> <key_id> <cipher> <key_dir> <key_file>: set data channel key\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr,
+		"\tpeer_id: peer ID of the peer to configure the key for\n");
+	fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+	fprintf(stderr, "\tkey_id: an ID from 0 to 7\n");
+	fprintf(stderr,
+		"\tcipher: cipher to use, supported: aes (AES-GCM), chachapoly (CHACHA20POLY1305)\n");
+	fprintf(stderr,
+		"\tkey_dir: key direction, must 0 on one host and 1 on the other\n");
+	fprintf(stderr, "\tkey_file: file containing the pre-shared key\n");
+
+	fprintf(stderr,
+		"* del_key <iface> <peer_id> [slot]: erase existing data channel key\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+	fprintf(stderr, "\tslot: slot to erase. PRIMARY if omitted\n");
+
+	fprintf(stderr,
+		"* get_key <iface> <peer_id> <slot>: retrieve non sensible key data\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tpeer_id: peer ID of the peer to query\n");
+	fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+
+	fprintf(stderr,
+		"* swap_keys <iface> <peer_id>: swap content of primary and secondary key slots\n");
+	fprintf(stderr, "\tiface: ovpn interface name\n");
+	fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+
+	fprintf(stderr,
+		"* listen_mcast: listen to ovpn netlink multicast messages\n");
+}
+
+static int ovpn_parse_remote(struct ovpn_ctx *ovpn, const char *host,
+			     const char *service, const char *vpnip)
+{
+	int ret;
+	struct addrinfo *result;
+	struct addrinfo hints = {
+		.ai_family = ovpn->sa_family,
+		.ai_socktype = SOCK_DGRAM,
+		.ai_protocol = IPPROTO_UDP
+	};
+
+	if (host) {
+		ret = getaddrinfo(host, service, &hints, &result);
+		if (ret) {
+			fprintf(stderr, "getaddrinfo on remote error: %s\n",
+				gai_strerror(ret));
+			return -1;
+		}
+
+		if (!(result->ai_family == AF_INET &&
+		      result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+		    !(result->ai_family == AF_INET6 &&
+		      result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		memcpy(&ovpn->remote, result->ai_addr, result->ai_addrlen);
+	}
+
+	if (vpnip) {
+		ret = getaddrinfo(vpnip, NULL, &hints, &result);
+		if (ret) {
+			fprintf(stderr, "getaddrinfo on vpnip error: %s\n",
+				gai_strerror(ret));
+			return -1;
+		}
+
+		if (!(result->ai_family == AF_INET &&
+		      result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+		    !(result->ai_family == AF_INET6 &&
+		      result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		memcpy(&ovpn->peer_ip, result->ai_addr, result->ai_addrlen);
+		ovpn->sa_family = result->ai_family;
+
+		ovpn->peer_ip_set = true;
+	}
+
+	ret = 0;
+out:
+	freeaddrinfo(result);
+	return ret;
+}
+
+static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id,
+			       const char *tx_id, const char *raddr,
+			       const char *rport, const char *vpnip)
+{
+	ovpn->peer_id = strtoul(peer_id, NULL, 10);
+	if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+		fprintf(stderr, "rx peer ID value out of range\n");
+		return -1;
+	}
+
+	if (ovpn->asymm_id) {
+		ovpn->tx_id = strtoul(tx_id, NULL, 10);
+		if (errno == ERANGE || ovpn->tx_id > PEER_ID_UNDEF) {
+			fprintf(stderr, "tx peer ID value out of range\n");
+			return -1;
+		}
+	}
+
+	return ovpn_parse_remote(ovpn, raddr, rport, vpnip);
+}
+
+static int ovpn_parse_key_slot(const char *arg, struct ovpn_ctx *ovpn)
+{
+	int slot = strtoul(arg, NULL, 10);
+
+	if (errno == ERANGE || slot < 1 || slot > 2) {
+		fprintf(stderr, "key slot out of range\n");
+		return -1;
+	}
+
+	switch (slot) {
+	case 1:
+		ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+		break;
+	case 2:
+		ovpn->key_slot = OVPN_KEY_SLOT_SECONDARY;
+		break;
+	}
+
+	return 0;
+}
+
+static int ovpn_send_tcp_data(int socket)
+{
+	uint16_t len = htons(1000);
+	uint8_t buf[1002];
+	int ret;
+
+	memcpy(buf, &len, sizeof(len));
+	memset(buf + sizeof(len), 0x86, sizeof(buf) - sizeof(len));
+
+	ret = send(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+	fprintf(stdout, "Sent %u bytes over TCP socket\n", ret);
+
+	return ret > 0 ? 0 : ret;
+}
+
+static int ovpn_recv_tcp_data(int socket)
+{
+	uint8_t buf[1002];
+	uint16_t len;
+	int ret;
+
+	ret = recv(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+	if (ret < 2) {
+		fprintf(stderr, ">>>> Error while reading TCP data: %d\n", ret);
+		return ret;
+	}
+
+	memcpy(&len, buf, sizeof(len));
+	len = ntohs(len);
+
+	fprintf(stdout, ">>>> Received %u bytes over TCP socket, header: %u\n",
+		ret, len);
+
+	return 0;
+}
+
+static enum ovpn_cmd ovpn_parse_cmd(const char *cmd)
+{
+	if (!strcmp(cmd, "new_iface"))
+		return CMD_NEW_IFACE;
+
+	if (!strcmp(cmd, "del_iface"))
+		return CMD_DEL_IFACE;
+
+	if (!strcmp(cmd, "listen"))
+		return CMD_LISTEN;
+
+	if (!strcmp(cmd, "connect"))
+		return CMD_CONNECT;
+
+	if (!strcmp(cmd, "new_peer"))
+		return CMD_NEW_PEER;
+
+	if (!strcmp(cmd, "new_multi_peer"))
+		return CMD_NEW_MULTI_PEER;
+
+	if (!strcmp(cmd, "set_peer"))
+		return CMD_SET_PEER;
+
+	if (!strcmp(cmd, "del_peer"))
+		return CMD_DEL_PEER;
+
+	if (!strcmp(cmd, "get_peer"))
+		return CMD_GET_PEER;
+
+	if (!strcmp(cmd, "new_key"))
+		return CMD_NEW_KEY;
+
+	if (!strcmp(cmd, "del_key"))
+		return CMD_DEL_KEY;
+
+	if (!strcmp(cmd, "get_key"))
+		return CMD_GET_KEY;
+
+	if (!strcmp(cmd, "swap_keys"))
+		return CMD_SWAP_KEYS;
+
+	if (!strcmp(cmd, "listen_mcast"))
+		return CMD_LISTEN_MCAST;
+
+	return CMD_INVALID;
+}
+
+/* Send process to background and waits for signal.
+ *
+ * This helper is called at the end of commands
+ * creating sockets, so that the latter stay alive
+ * along with the process that created them.
+ *
+ * A signal is expected to be delivered in order to
+ * terminate the waiting processes
+ */
+static void ovpn_waitbg(void)
+{
+	daemon(1, 1);
+	pause();
+}
+
+static int ovpn_run_cmd(struct ovpn_ctx *ovpn)
+{
+	char peer_id[10], tx_id[10], vpnip[INET6_ADDRSTRLEN], laddr[128];
+	char lport[10], raddr[128], rport[10];
+	int n, ret;
+	FILE *fp;
+
+	switch (ovpn->cmd) {
+	case CMD_NEW_IFACE:
+		ret = ovpn_new_iface(ovpn);
+		break;
+	case CMD_DEL_IFACE:
+		ret = ovpn_del_iface(ovpn);
+		break;
+	case CMD_LISTEN:
+		ret = ovpn_listen(ovpn, ovpn->sa_family);
+		if (ret < 0) {
+			fprintf(stderr, "cannot listen on TCP socket\n");
+			return ret;
+		}
+
+		fp = fopen(ovpn->peers_file, "r");
+		if (!fp) {
+			fprintf(stderr, "cannot open file: %s\n",
+				ovpn->peers_file);
+			return -1;
+		}
+
+		int num_peers = 0;
+
+		while ((n = fscanf(fp, "%s %s %s\n", peer_id, tx_id,
+				   vpnip)) == 3) {
+			struct ovpn_ctx peer_ctx = { 0 };
+
+			if (num_peers == MAX_PEERS) {
+				fprintf(stderr, "max peers reached!\n");
+				return -E2BIG;
+			}
+
+			peer_ctx.ifindex = ovpn->ifindex;
+			peer_ctx.sa_family = ovpn->sa_family;
+			peer_ctx.asymm_id = ovpn->asymm_id;
+
+			peer_ctx.socket = ovpn_accept(ovpn);
+			if (peer_ctx.socket < 0) {
+				fprintf(stderr, "cannot accept connection!\n");
+				return -1;
+			}
+
+			/* store peer sockets to test TCP I/O */
+			ovpn->cli_sockets[num_peers] = peer_ctx.socket;
+
+			ret = ovpn_parse_new_peer(&peer_ctx, peer_id, tx_id,
+						  NULL, NULL, vpnip);
+			if (ret < 0) {
+				fprintf(stderr, "error while parsing line\n");
+				return -1;
+			}
+
+			ret = ovpn_new_peer(&peer_ctx, true);
+			if (ret < 0) {
+				fprintf(stderr,
+					"cannot add peer to VPN: %s %s\n",
+					peer_id, vpnip);
+				return ret;
+			}
+			num_peers++;
+		}
+
+		for (int i = 0; i < num_peers; i++) {
+			ret = ovpn_recv_tcp_data(ovpn->cli_sockets[i]);
+			if (ret < 0)
+				break;
+		}
+		ovpn_waitbg();
+		break;
+	case CMD_CONNECT:
+		ret = ovpn_connect(ovpn);
+		if (ret < 0) {
+			fprintf(stderr, "cannot connect TCP socket\n");
+			return ret;
+		}
+
+		ret = ovpn_new_peer(ovpn, true);
+		if (ret < 0) {
+			fprintf(stderr, "cannot add peer to VPN\n");
+			close(ovpn->socket);
+			return ret;
+		}
+
+		if (ovpn->cipher != OVPN_CIPHER_ALG_NONE) {
+			ret = ovpn_new_key(ovpn);
+			if (ret < 0) {
+				fprintf(stderr, "cannot set key\n");
+				return ret;
+			}
+		}
+
+		ret = ovpn_send_tcp_data(ovpn->socket);
+		ovpn_waitbg();
+		break;
+	case CMD_NEW_PEER:
+		ret = ovpn_udp_socket(ovpn, AF_INET6);
+		if (ret < 0)
+			return ret;
+
+		ret = ovpn_new_peer(ovpn, false);
+		ovpn_waitbg();
+		break;
+	case CMD_NEW_MULTI_PEER:
+		ret = ovpn_udp_socket(ovpn, AF_INET6);
+		if (ret < 0)
+			return ret;
+
+		fp = fopen(ovpn->peers_file, "r");
+		if (!fp) {
+			fprintf(stderr, "cannot open file: %s\n",
+				ovpn->peers_file);
+			return -1;
+		}
+
+		while ((n = fscanf(fp, "%s %s %s %s %s %s %s\n", peer_id, tx_id,
+				   laddr, lport, raddr, rport, vpnip)) == 7) {
+			struct ovpn_ctx peer_ctx = { 0 };
+
+			peer_ctx.ifindex = ovpn->ifindex;
+			peer_ctx.socket = ovpn->socket;
+			peer_ctx.sa_family = AF_UNSPEC;
+			peer_ctx.asymm_id = ovpn->asymm_id;
+
+			ret = ovpn_parse_new_peer(&peer_ctx, peer_id, tx_id,
+						  raddr, rport, vpnip);
+			if (ret < 0) {
+				fprintf(stderr, "error while parsing line\n");
+				return -1;
+			}
+
+			ret = ovpn_new_peer(&peer_ctx, false);
+			if (ret < 0) {
+				fprintf(stderr,
+					"cannot add peer to VPN: %s %s %s %s\n",
+					peer_id, raddr, rport, vpnip);
+				return ret;
+			}
+		}
+		ovpn_waitbg();
+		break;
+	case CMD_SET_PEER:
+		ret = ovpn_set_peer(ovpn);
+		break;
+	case CMD_DEL_PEER:
+		ret = ovpn_del_peer(ovpn);
+		break;
+	case CMD_GET_PEER:
+		if (ovpn->peer_id == PEER_ID_UNDEF)
+			fprintf(stderr, "List of peers connected to: %s\n",
+				ovpn->ifname);
+
+		ret = ovpn_get_peer(ovpn);
+		break;
+	case CMD_NEW_KEY:
+		ret = ovpn_new_key(ovpn);
+		break;
+	case CMD_DEL_KEY:
+		ret = ovpn_del_key(ovpn);
+		break;
+	case CMD_GET_KEY:
+		ret = ovpn_get_key(ovpn);
+		break;
+	case CMD_SWAP_KEYS:
+		ret = ovpn_swap_keys(ovpn);
+		break;
+	case CMD_LISTEN_MCAST:
+		ret = ovpn_listen_mcast();
+		break;
+	case CMD_INVALID:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[])
+{
+	int ret;
+
+	/* no args required for LISTEN_MCAST */
+	if (ovpn->cmd == CMD_LISTEN_MCAST)
+		return 0;
+
+	/* all commands need an ifname */
+	if (argc < 3)
+		return -EINVAL;
+
+	strscpy(ovpn->ifname, argv[2], IFNAMSIZ - 1);
+	ovpn->ifname[IFNAMSIZ - 1] = '\0';
+
+	/* all commands, except NEW_IFNAME, needs an ifindex */
+	if (ovpn->cmd != CMD_NEW_IFACE) {
+		ovpn->ifindex = if_nametoindex(ovpn->ifname);
+		if (!ovpn->ifindex) {
+			fprintf(stderr, "cannot find interface: %s\n",
+				strerror(errno));
+			return -1;
+		}
+	}
+
+	switch (ovpn->cmd) {
+	case CMD_NEW_IFACE:
+		if (argc < 4)
+			break;
+
+		if (!strcmp(argv[3], "P2P")) {
+			ovpn->mode = OVPN_MODE_P2P;
+		} else if (!strcmp(argv[3], "MP")) {
+			ovpn->mode = OVPN_MODE_MP;
+		} else {
+			fprintf(stderr, "Cannot parse iface mode: %s\n",
+				argv[3]);
+			return -1;
+		}
+		ovpn->mode_set = true;
+		break;
+	case CMD_DEL_IFACE:
+		break;
+	case CMD_LISTEN:
+		if (argc < 6)
+			return -EINVAL;
+
+		ovpn->lport = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE || ovpn->lport > 65535) {
+			fprintf(stderr, "lport value out of range\n");
+			return -1;
+		}
+
+		if (strcmp(argv[4], "SYMM") == 0) {
+			ovpn->asymm_id = false;
+		} else if (strcmp(argv[4], "ASYMM") == 0) {
+			ovpn->asymm_id = true;
+		} else {
+			fprintf(stderr, "Cannot parse id type: %s\n", argv[4]);
+			return -1;
+		}
+
+		ovpn->peers_file = argv[5];
+
+		ovpn->sa_family = AF_INET;
+		if (argc > 6 && !strcmp(argv[6], "ipv6"))
+			ovpn->sa_family = AF_INET6;
+		break;
+	case CMD_CONNECT:
+		if (argc < 7)
+			return -EINVAL;
+
+		ovpn->sa_family = AF_INET;
+		ovpn->asymm_id = strcmp(argv[4], "none");
+
+		ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5],
+					  argv[6], NULL);
+		if (ret < 0) {
+			fprintf(stderr, "Cannot parse remote peer data\n");
+			return -1;
+		}
+
+		if (argc > 7) {
+			ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+			ovpn->key_id = 0;
+			ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM;
+			ovpn->key_dir = KEY_DIR_OUT;
+
+			ret = ovpn_parse_key(argv[7], ovpn);
+			if (ret)
+				return -1;
+		}
+		break;
+	case CMD_NEW_PEER:
+		if (argc < 8)
+			return -EINVAL;
+
+		ovpn->asymm_id = strcmp(argv[4], "none");
+
+		ovpn->lport = strtoul(argv[5], NULL, 10);
+		if (errno == ERANGE || ovpn->lport > 65535) {
+			fprintf(stderr, "lport value out of range\n");
+			return -1;
+		}
+
+		const char *vpnip = (argc > 8) ? argv[8] : NULL;
+
+		ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[6],
+					  argv[7], vpnip);
+		if (ret < 0)
+			return -1;
+		break;
+	case CMD_NEW_MULTI_PEER:
+		if (argc < 6)
+			return -EINVAL;
+
+		ovpn->lport = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE || ovpn->lport > 65535) {
+			fprintf(stderr, "lport value out of range\n");
+			return -1;
+		}
+
+		if (!strcmp(argv[4], "SYMM")) {
+			ovpn->asymm_id = false;
+		} else if (!strcmp(argv[4], "ASYMM")) {
+			ovpn->asymm_id = true;
+		} else {
+			fprintf(stderr, "Cannot parse id type: %s\n", argv[4]);
+			return -1;
+		}
+
+		ovpn->peers_file = argv[5];
+
+		ovpn->mark = 0;
+		if (argc > 6) {
+			ovpn->mark = strtoul(argv[6], NULL, 10);
+			if (errno == ERANGE || ovpn->mark > UINT32_MAX) {
+				fprintf(stderr, "mark value out of range\n");
+				return -1;
+			}
+		}
+		break;
+	case CMD_SET_PEER:
+		if (argc < 6)
+			return -EINVAL;
+
+		ovpn->peer_id = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+			fprintf(stderr, "peer ID value out of range\n");
+			return -1;
+		}
+
+		ovpn->keepalive_interval = strtoul(argv[4], NULL, 10);
+		if (errno == ERANGE) {
+			fprintf(stderr,
+				"keepalive interval value out of range\n");
+			return -1;
+		}
+
+		ovpn->keepalive_timeout = strtoul(argv[5], NULL, 10);
+		if (errno == ERANGE) {
+			fprintf(stderr,
+				"keepalive interval value out of range\n");
+			return -1;
+		}
+		break;
+	case CMD_DEL_PEER:
+		if (argc < 4)
+			return -EINVAL;
+
+		ovpn->peer_id = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+			fprintf(stderr, "peer ID value out of range\n");
+			return -1;
+		}
+		break;
+	case CMD_GET_PEER:
+		ovpn->peer_id = PEER_ID_UNDEF;
+		if (argc > 3) {
+			ovpn->peer_id = strtoul(argv[3], NULL, 10);
+			if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+				fprintf(stderr, "peer ID value out of range\n");
+				return -1;
+			}
+		}
+		break;
+	case CMD_NEW_KEY:
+		if (argc < 9)
+			return -EINVAL;
+
+		ovpn->peer_id = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE) {
+			fprintf(stderr, "peer ID value out of range\n");
+			return -1;
+		}
+
+		ret = ovpn_parse_key_slot(argv[4], ovpn);
+		if (ret)
+			return -1;
+
+		ovpn->key_id = strtoul(argv[5], NULL, 10);
+		if (errno == ERANGE || ovpn->key_id > 2) {
+			fprintf(stderr, "key ID out of range\n");
+			return -1;
+		}
+
+		ret = ovpn_parse_cipher(argv[6], ovpn);
+		if (ret < 0)
+			return -1;
+
+		ret = ovpn_parse_key_direction(argv[7], ovpn);
+		if (ret < 0)
+			return -1;
+
+		ret = ovpn_parse_key(argv[8], ovpn);
+		if (ret)
+			return -1;
+		break;
+	case CMD_DEL_KEY:
+		if (argc < 4)
+			return -EINVAL;
+
+		ovpn->peer_id = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE) {
+			fprintf(stderr, "peer ID value out of range\n");
+			return -1;
+		}
+
+		ret = ovpn_parse_key_slot(argv[4], ovpn);
+		if (ret)
+			return ret;
+		break;
+	case CMD_GET_KEY:
+		if (argc < 5)
+			return -EINVAL;
+
+		ovpn->peer_id = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE) {
+			fprintf(stderr, "peer ID value out of range\n");
+			return -1;
+		}
+
+		ret = ovpn_parse_key_slot(argv[4], ovpn);
+		if (ret)
+			return ret;
+		break;
+	case CMD_SWAP_KEYS:
+		if (argc < 4)
+			return -EINVAL;
+
+		ovpn->peer_id = strtoul(argv[3], NULL, 10);
+		if (errno == ERANGE) {
+			fprintf(stderr, "peer ID value out of range\n");
+			return -1;
+		}
+		break;
+	case CMD_LISTEN_MCAST:
+		break;
+	case CMD_INVALID:
+		break;
+	}
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	struct ovpn_ctx ovpn;
+	int ret;
+
+	if (argc < 2) {
+		usage(argv[0]);
+		return -1;
+	}
+
+	memset(&ovpn, 0, sizeof(ovpn));
+	ovpn.sa_family = AF_UNSPEC;
+	ovpn.cipher = OVPN_CIPHER_ALG_NONE;
+
+	ovpn.cmd = ovpn_parse_cmd(argv[1]);
+	if (ovpn.cmd == CMD_INVALID) {
+		fprintf(stderr, "Error: unknown command.\n\n");
+		usage(argv[0]);
+		return -1;
+	}
+
+	ret = ovpn_parse_cmd_args(&ovpn, argc, argv);
+	if (ret < 0) {
+		fprintf(stderr, "Error: invalid arguments.\n\n");
+		if (ret == -EINVAL)
+			usage(argv[0]);
+		return ret;
+	}
+
+	ret = ovpn_run_cmd(&ovpn);
+	if (ret)
+		fprintf(stderr, "Cannot execute command: %s (%d)\n",
+			strerror(-ret), ret);
+
+	return ret;
+}
diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt
new file mode 100644
index 000000000000..3cb67b560705
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt
@@ -0,0 +1,6 @@
+1 10 5.5.5.2
+2 11 5.5.5.3
+3 12 5.5.5.4
+4 13 5.5.5.5
+5 14 5.5.5.6
+6 15 5.5.5.7
diff --git a/tools/testing/selftests/net/ovpn/test-chachapoly.sh b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
new file mode 100755
index 000000000000..32504079a2b8
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+ALG="chachapoly"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
new file mode 100755
index 000000000000..093d44772ffd
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test-close-socket.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh
new file mode 100755
index 000000000000..0d09df14fe8e
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	setup_ns ${p} 5.5.5.$((${p} + 1))/24
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+	ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+	ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} $((${p}+9)) 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+	ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+done
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/test-float.sh b/tools/testing/selftests/net/ovpn/test-float.sh
new file mode 100755
index 000000000000..ba5d725e18b0
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-float.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+FLOAT="1"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-large-mtu.sh b/tools/testing/selftests/net/ovpn/test-large-mtu.sh
new file mode 100755
index 000000000000..ce2a2cb64f72
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-large-mtu.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+MTU="1500"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-mark.sh b/tools/testing/selftests/net/ovpn/test-mark.sh
new file mode 100755
index 000000000000..8534428ed3eb
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-mark.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+#	Author:	Ralf Lici <ralf@mandelbit.com>
+#		Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+MARK=1056
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 "${NUM_PEERS}"); do
+	create_ns "${p}"
+done
+
+for p in $(seq 0 3); do
+	setup_ns "${p}" 5.5.5.$((p + 1))/24
+done
+
+# add peer0 with mark
+ip netns exec peer0 "${OVPN_CLI}" new_multi_peer tun0 1 ASYMM \
+	"${UDP_PEERS_FILE}" \
+	${MARK}
+for p in $(seq 1 3); do
+	ip netns exec peer0 "${OVPN_CLI}" new_key tun0 "${p}" 1 0 "${ALG}" 0 \
+		data64.key
+done
+
+for p in $(seq 1 3); do
+	add_peer "${p}"
+done
+
+for p in $(seq 1 3); do
+	ip netns exec peer0 "${OVPN_CLI}" set_peer tun0 "${p}" 60 120
+	ip netns exec peer"${p}" "${OVPN_CLI}" set_peer tun"${p}" \
+		$((p + 9)) 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 3); do
+	ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((p + 1))
+done
+
+echo "Adding an nftables drop rule based on mark value ${MARK}"
+ip netns exec peer0 nft flush ruleset
+ip netns exec peer0 nft 'add table inet filter'
+ip netns exec peer0 nft 'add chain inet filter output {
+	type filter hook output priority 0;
+	policy accept;
+}'
+ip netns exec peer0 nft add rule inet filter output \
+	meta mark == ${MARK} \
+	counter drop
+
+DROP_COUNTER=$(ip netns exec peer0 nft list chain inet filter output \
+	| sed -n 's/.*packets \([0-9]*\).*/\1/p')
+sleep 1
+
+# ping should fail
+for p in $(seq 1 3); do
+	PING_OUTPUT=$(ip netns exec peer0 ping \
+		-qfc 500 -w 1 5.5.5.$((p + 1)) 2>&1) && exit 1
+	echo "${PING_OUTPUT}"
+	LOST_PACKETS=$(echo "$PING_OUTPUT" \
+		| awk '/packets transmitted/ { print $1 }')
+	# increment the drop counter by the amount of lost packets
+	DROP_COUNTER=$((DROP_COUNTER + LOST_PACKETS))
+done
+
+# check if the final nft counter matches our counter
+TOTAL_COUNT=$(ip netns exec peer0 nft list chain inet filter output \
+	| sed -n 's/.*packets \([0-9]*\).*/\1/p')
+if [ "${DROP_COUNTER}" -ne "${TOTAL_COUNT}" ]; then
+	echo "Expected ${TOTAL_COUNT} drops, got ${DROP_COUNTER}"
+	exit 1
+fi
+
+echo "Removing the drop rule"
+ip netns exec peer0 nft flush ruleset
+sleep 1
+
+for p in $(seq 1 3); do
+	ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((p + 1))
+done
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/test-symmetric-id-float.sh b/tools/testing/selftests/net/ovpn/test-symmetric-id-float.sh
new file mode 100755
index 000000000000..b3711a81b463
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-symmetric-id-float.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+#	Author:	Ralf Lici <ralf@mandelbit.com>
+#		Antonio Quartulli <antonio@openvpn.net>
+
+SYMMETRIC_ID="1"
+FLOAT="1"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-symmetric-id-tcp.sh b/tools/testing/selftests/net/ovpn/test-symmetric-id-tcp.sh
new file mode 100755
index 000000000000..188cafb67b2f
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-symmetric-id-tcp.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+#	Author:	Ralf Lici <ralf@mandelbit.com>
+#		Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+SYMMETRIC_ID=1
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-symmetric-id.sh b/tools/testing/selftests/net/ovpn/test-symmetric-id.sh
new file mode 100755
index 000000000000..35b119c72e4f
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-symmetric-id.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+#	Author:	Ralf Lici <ralf@mandelbit.com>
+#		Antonio Quartulli <antonio@openvpn.net>
+
+SYMMETRIC_ID="1"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-tcp.sh b/tools/testing/selftests/net/ovpn/test-tcp.sh
new file mode 100755
index 000000000000..ba3f1f315a34
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh
new file mode 100755
index 000000000000..b60e94a4094e
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test.sh
@@ -0,0 +1,165 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+#  Author:	Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	setup_listener ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	setup_ns ${p} 5.5.5.$((${p} + 1))/24 ${MTU}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+	ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+	ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} \
+		$((${p}+ID_OFFSET)) 60 120
+done
+
+sleep 1
+
+TCPDUMP_TIMEOUT="1.5s"
+for p in $(seq 1 ${NUM_PEERS}); do
+	# The first part of the data packet header consists of:
+	# - TCP only: 2 bytes for the packet length
+	# - 5 bits for opcode ("9" for DATA_V2)
+	# - 3 bits for key-id ("0" at this point)
+	# - 12 bytes for peer-id:
+	#     - with asymmetric ID: "${p}" one way and "${p} + 9" the other way
+	#     - with symmetric ID: "${p}" both ways
+	HEADER1=$(printf "0x4800000%x" ${p})
+	HEADER2=$(printf "0x4800000%x" $((${p} + ID_OFFSET)))
+	RADDR=""
+	if [ "${PROTO}" == "UDP" ]; then
+		RADDR=$(awk "NR == ${p} {print \$3}" ${UDP_PEERS_FILE})
+	fi
+
+	timeout ${TCPDUMP_TIMEOUT} ip netns exec peer${p} \
+		tcpdump --immediate-mode -p -ni veth${p} -c 1 \
+		"$(build_capture_filter "${HEADER1}" "${RADDR}")" \
+		>/dev/null 2>&1 &
+	TCPDUMP_PID1=$!
+	timeout ${TCPDUMP_TIMEOUT} ip netns exec peer${p} \
+		tcpdump --immediate-mode -p -ni veth${p} -c 1 \
+		"$(build_capture_filter "${HEADER2}" "${RADDR}")" \
+		>/dev/null 2>&1 &
+	TCPDUMP_PID2=$!
+
+	sleep 0.3
+	ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+	ip netns exec peer0 ping -qfc 500 -s 3000 -w 3 5.5.5.$((${p} + 1))
+
+	wait ${TCPDUMP_PID1}
+	wait ${TCPDUMP_PID2}
+done
+
+# ping LAN behind client 1
+ip netns exec peer0 ping -qfc 500 -w 3 ${LAN_IP}
+
+if [ "$FLOAT" == "1" ]; then
+	# make clients float..
+	for p in $(seq 1 ${NUM_PEERS}); do
+		ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p}
+		ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p}
+	done
+	for p in $(seq 1 ${NUM_PEERS}); do
+		ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1
+	done
+fi
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+echo "Adding secondary key and then swap:"
+for p in $(seq 1 ${NUM_PEERS}); do
+	ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 \
+		data64.key
+	ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} \
+		$((${p} + ID_OFFSET)) 2 1 ${ALG} 1 data64.key
+	ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} \
+		$((${p} + ID_OFFSET))
+done
+
+sleep 1
+
+echo "Querying all peers:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0
+ip netns exec peer1 ${OVPN_CLI} get_peer tun1
+
+echo "Querying peer 1:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1
+
+echo "Querying non-existent peer 20:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 20 || true
+
+echo "Deleting peer 1:"
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1
+ip netns exec peer1 ${OVPN_CLI} del_peer tun1 $((1 + ID_OFFSET))
+
+echo "Querying keys:"
+for p in $(seq 2 ${NUM_PEERS}); do
+	ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} \
+		$((${p} + ID_OFFSET)) 1
+	ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} \
+		$((${p} + ID_OFFSET)) 2
+done
+
+echo "Deleting peer while sending traffic:"
+(ip netns exec peer2 ping -qf -w 4 5.5.5.1)&
+sleep 2
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2
+# following command fails in TCP mode
+# (both ends get conn reset when one peer disconnects)
+ip netns exec peer2 ${OVPN_CLI} del_peer tun2 $((2 + ID_OFFSET)) || true
+
+echo "Deleting keys:"
+for p in $(seq 3 ${NUM_PEERS}); do
+	ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} \
+		$((${p} + ID_OFFSET)) 1
+	ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} \
+		$((${p} + ID_OFFSET)) 2
+done
+
+echo "Setting timeout to 3s MP:"
+for p in $(seq 3 ${NUM_PEERS}); do
+	ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true
+	ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} \
+		$((${p} + ID_OFFSET)) 0 0
+done
+# wait for peers to timeout
+sleep 5
+
+echo "Setting timeout to 3s P2P:"
+for p in $(seq 3 ${NUM_PEERS}); do
+	ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} \
+		$((${p} + ID_OFFSET)) 3 3
+done
+sleep 5
+
+for p in $(seq 0 ${NUM_PEERS}); do
+	compare_ntfs ${p}
+done
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt
new file mode 100644
index 000000000000..93de6465353c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/udp_peers.txt
@@ -0,0 +1,6 @@
+1 10 10.10.1.1 1 10.10.1.2 1 5.5.5.2
+2 11 10.10.2.1 1 10.10.2.2 1 5.5.5.3
+3 12 10.10.3.1 1 10.10.3.2 1 5.5.5.4
+4 13 fd00:0:0:4::1 1 fd00:0:0:4::2 1 5.5.5.5
+5 14 fd00:0:0:5::1 1 fd00:0:0:5::2 1 5.5.5.6
+6 15 fd00:0:0:6::1 1 fd00:0:0:6::2 1 5.5.5.7
diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile
index 31cfb666ba8b..ff54641493e9 100644
--- a/tools/testing/selftests/net/packetdrill/Makefile
+++ b/tools/testing/selftests/net/packetdrill/Makefile
@@ -1,9 +1,11 @@
 # SPDX-License-Identifier: GPL-2.0
 
-TEST_INCLUDES := ksft_runner.sh \
-		 defaults.sh \
-		 set_sysctls.py \
-		 ../../kselftest/ktap_helpers.sh
+TEST_INCLUDES := \
+	defaults.sh \
+	ksft_runner.sh \
+	set_sysctls.py \
+	../../kselftest/ktap_helpers.sh \
+# end of TEST_INCLUDES
 
 TEST_PROGS := $(wildcard *.pkt)
 
diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config
index 0237ed98f3c0..c4a19a785521 100644
--- a/tools/testing/selftests/net/packetdrill/config
+++ b/tools/testing/selftests/net/packetdrill/config
@@ -1,6 +1,6 @@
-CONFIG_IPV6=y
-CONFIG_HZ_1000=y
 CONFIG_HZ=1000
+CONFIG_HZ_1000=y
+CONFIG_IPV6=y
 CONFIG_NET_NS=y
 CONFIG_NET_SCH_FIFO=y
 CONFIG_NET_SCH_FQ=y
diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh
index 1095a7b22f44..37edd3dc3b07 100755
--- a/tools/testing/selftests/net/packetdrill/defaults.sh
+++ b/tools/testing/selftests/net/packetdrill/defaults.sh
@@ -51,7 +51,8 @@ sysctl -q net.ipv4.tcp_pacing_ss_ratio=200
 sysctl -q net.ipv4.tcp_pacing_ca_ratio=120
 sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1
 
-sysctl -q net.ipv4.tcp_fastopen=0x70403
+sysctl -q net.ipv4.tcp_fastopen=0x3
+# Use TFO_COOKIE in ksft_runner.sh for this key.
 sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4
 
 sysctl -q net.ipv4.tcp_syncookies=1
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index ef8b25a606d8..0a97d5ae3469 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -3,21 +3,35 @@
 
 source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh"
 
-readonly ipv4_args=('--ip_version=ipv4 '
-		    '--local_ip=192.168.0.1 '
-		    '--gateway_ip=192.168.0.1 '
-		    '--netmask_ip=255.255.0.0 '
-		    '--remote_ip=192.0.2.1 '
-		    '-D CMSG_LEVEL_IP=SOL_IP '
-		    '-D CMSG_TYPE_RECVERR=IP_RECVERR ')
-
-readonly ipv6_args=('--ip_version=ipv6 '
-		    '--mtu=1520 '
-		    '--local_ip=fd3d:0a0b:17d6::1 '
-		    '--gateway_ip=fd3d:0a0b:17d6:8888::1 '
-		    '--remote_ip=fd3d:fa7b:d17d::1 '
-		    '-D CMSG_LEVEL_IP=SOL_IPV6 '
-		    '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ')
+declare -A ip_args=(
+	[ipv4]="--ip_version=ipv4
+		--local_ip=192.168.0.1
+		--gateway_ip=192.168.0.1
+		--netmask_ip=255.255.0.0
+		--remote_ip=192.0.2.1
+		-D TFO_COOKIE=3021b9d889017eeb
+		-D TFO_COOKIE_ZERO=b7c12350a90dc8f5
+		-D CMSG_LEVEL_IP=SOL_IP
+		-D CMSG_TYPE_RECVERR=IP_RECVERR"
+	[ipv4-mapped-ipv6]="--ip_version=ipv4-mapped-ipv6
+		--local_ip=192.168.0.1
+		--gateway_ip=192.168.0.1
+		--netmask_ip=255.255.0.0
+		--remote_ip=192.0.2.1
+		-D TFO_COOKIE=3021b9d889017eeb
+		-D TFO_COOKIE_ZERO=b7c12350a90dc8f5
+		-D CMSG_LEVEL_IP=SOL_IPV6
+		-D CMSG_TYPE_RECVERR=IPV6_RECVERR"
+	[ipv6]="--ip_version=ipv6
+		--mtu=1520
+		--local_ip=fd3d:0a0b:17d6::1
+		--gateway_ip=fd3d:0a0b:17d6:8888::1
+		--remote_ip=fd3d:fa7b:d17d::1
+		-D TFO_COOKIE=c1d1e9742a47a9bc
+		-D TFO_COOKIE_ZERO=82af1a8f9a205c34
+		-D CMSG_LEVEL_IP=SOL_IPV6
+		-D CMSG_TYPE_RECVERR=IPV6_RECVERR"
+)
 
 if [ $# -ne 1 ]; then
 	ktap_exit_fail_msg "usage: $0 <script>"
@@ -35,28 +49,23 @@ failfunc=ktap_test_fail
 
 if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
 	optargs+=('--tolerance_usecs=14000')
+	failfunc=ktap_test_xfail
+fi
 
-	# xfail tests that are known flaky with dbg config, not fixable.
-	# still run them for coverage (and expect 100% pass without dbg).
-	declare -ar xfail_list=(
-		"tcp_eor_no-coalesce-retrans.pkt"
-		"tcp_fast_recovery_prr-ss.*.pkt"
-		"tcp_slow_start_slow-start-after-win-update.pkt"
-		"tcp_timestamping.*.pkt"
-		"tcp_user_timeout_user-timeout-probe.pkt"
-		"tcp_zerocopy_epoll_.*.pkt"
-		"tcp_tcp_info_tcp-info-.*-limited.pkt"
-	)
-	readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$"
-	[[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail
+ip_versions=$(grep -E '^--ip_version=' $script | cut -d '=' -f 2)
+if [[ -z $ip_versions ]]; then
+	ip_versions="ipv4 ipv6 ipv4-mapped-ipv6"
+elif [[ ! "$ip_versions" =~ ^ipv[46]$ ]]; then
+	ktap_exit_fail_msg "Too many or unsupported --ip_version: $ip_versions"
+	exit "$KSFT_FAIL"
 fi
 
 ktap_print_header
-ktap_set_plan 2
+ktap_set_plan $(echo $ip_versions | wc -w)
 
-unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \
-	&& ktap_test_pass "ipv4" || $failfunc "ipv4"
-unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \
-	&& ktap_test_pass "ipv6" || $failfunc "ipv6"
+for ip_version in $ip_versions; do
+	unshare -n packetdrill ${ip_args[$ip_version]} ${optargs[@]} $script > /dev/null \
+		&& ktap_test_pass $ip_version || $failfunc $ip_version
+done
 
 ktap_finished
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt
new file mode 100644
index 000000000000..07e9936e70e6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first.pkt
@@ -0,0 +1,24 @@
+// 3rd ACK + 1st data segment lost, data segments with ce
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// 3rd ACK lost
+// 1st data segment lost
++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1 ceb 1000 e0b 1,nop,nop,nop,sack 1001:2001>
++.002 accept(3, ..., ...) = 4
+
++0.2 < [ce] EAP. 1:1001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.001 > [ect0] EWA. 1:1(0) ack 2001 <ECN e1b 1 ceb 2000 e0b 1,nop>
+
++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 264
++.001 > [ect0] . 1:1(0) ack 3001 <ECN e1b 1 ceb 3000 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt
new file mode 100644
index 000000000000..76b8422b34dc
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_2nd_data_as_first_connect.pkt
@@ -0,0 +1,30 @@
+// 3rd ACK + 1st data segment lost, 2nd data segments with ce
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1016,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// 3rd ACK lost
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 2000) = 2000
+// 1st data segment lost + 2nd gets CE
++.002 > [ect0] .5 1:1005(1004) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.000 > [ect0] P.5 1005:2001(996) ack 1 <ECN e1b 1 ceb 0 e0b 1, nop>
++0.05 < [ect0] .6 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 996 e1b 1,nop,nop,nop,sack 1005:2001>
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
+
++0.002~+0.1 > [ect0] .5 1:1005(1004) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.05 < [ect0] .6 1:1(0) ack 2001 win 264 <ECN e0b 1005 ceb 996 e1b 1,nop>
+
++0.01 write(4, ..., 1000) = 1000
++0~+0.002 > [ect0] P.5 2001:3001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.1 < [ect0] .5 1:1001(1000) ack 3001 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++0~+0.01 > [ect0] .5 3001:3001(0) ack 1001 <ECN e1b 1 ceb 0 e0b 1001,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt
new file mode 100644
index 000000000000..84060e490589
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_after_synack_rxmt.pkt
@@ -0,0 +1,19 @@
+// Test 3rd ACK flags when SYN-ACK is rexmitted
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.1 < [ect0] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Our code currently sends a challenge ACK
+// when it receives a SYN in ESTABLISHED state
+// based on the latest SYN
++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt
new file mode 100644
index 000000000000..d3fe09d0606f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_ce_updates_received_ce.pkt
@@ -0,0 +1,18 @@
+// Third ACK CE increases r.cep
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ce] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] WAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt
new file mode 100644
index 000000000000..d28722db42b1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_ack_lost_data_ce.pkt
@@ -0,0 +1,22 @@
+// 3rd ACK lost, CE for the first data segment
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// 3rd ACK lost
++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.05 < [ce] EAP. 1001:2001(1000) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.001 > [ect0] EWA. 1:1(0) ack 2001 <ECN e1b 1 ceb 2000 e0b 1 ,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt
new file mode 100644
index 000000000000..a4d808116e34
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_3rd_dups.pkt
@@ -0,0 +1,26 @@
+// Test SYN/ACK rexmit triggered 3rd ACK duplicate + CE on first data seg
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// SYN/ACK rexmitted => two 3rd ACKs in-flight
++1.0~+1.1 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// Delivered 1st 3rd ACK
++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// Duplicate 3rd ACK delivered
++1.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
+
++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop>
+   +0 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt
new file mode 100644
index 000000000000..410a303c6d49
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_acc_ecn_disabled.pkt
@@ -0,0 +1,13 @@
+// Test that when accurate ECN is disabled,
+// client uses RFC3168 ECN for SYN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,nop,nop,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt
new file mode 100644
index 000000000000..10728114b11b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_then_notecn_syn.pkt
@@ -0,0 +1,28 @@
+// Test that SYN-ACK with ACE flags and without
+// ACE flags got dropped. Although we disable ECN,
+// we shouldn't consider this as blackholed as
+// these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [ect0] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN
++0.1 < [noecn] S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0
++0.01 write(4, ..., 100) = 100
++.002 > [noecn] P5. 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt
new file mode 100644
index 000000000000..04d928f0d44d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_accecn_to_rfc3168.pkt
@@ -0,0 +1,18 @@
+// Test AccECN -> RFC3168 fallback when sysctl asks for RFC3168 ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt
new file mode 100644
index 000000000000..788af6bea69c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_drop.pkt
@@ -0,0 +1,34 @@
+// Client negotiates AccECN and starts sending
+// AccECN option in last ACK and data segments
+// Middlebox drops AccECN option and client
+// reverts to ACE flags only
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+sysctl -q net.ipv4.tcp_ecn_option_beacon=1
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 1001 <ECN e1b 1 ceb 0 e0b 1001,nop>
+   +0 read(4, ..., 1000) = 1000
+
++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 1001 <ECN e1b 1 ceb 0 e0b 2001,nop,nop,nop,sack 1:1001>
+
++0.05 < [ect0] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 1001 <nop,nop,sack 1:1001>
+
++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EA. 1:1(0) ack 2001
+   +0 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt
new file mode 100644
index 000000000000..f5839c2e682d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_client_accecn_options_lost.pkt
@@ -0,0 +1,38 @@
+// Client negotiates AccECN and starts sending
+// AccECN option in last ACK and data segments
+// Middlebox accepts AccECN option but some packets
+// are lost due to congestion. Client should
+// continue to send AccECN option
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.102 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.1  < [ect0] SW. 0:0(0) ack 1 win 32767 <mss 1024,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
+// Send
++0.01 write(4, ..., 3000) = 3000
++.002 > [ect0] .5 1:1013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.002 > [ect0] P.5 1013:2025(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.002 > [ect0] P.5 2025:3001(976) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
+// First two segments were lost due to congestion as SACK was
+// received acknowledging 3rd segment
++0.1 < [ect0] .5 1:1(0) ack 1 win 264 <ECN e1b 1 ceb 0 e0b 977,nop,nop,nop,sack 2025:3001>
+
+// Since data with option was SACKed, we can
+// continue to use AccECN option for the rest of
+// the connection. This one is a rexmt
++.02~+0.5 > [ect0] .5 1:1013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.1 < [ect0] .5 1:1(0) ack 3001 win 264 <ECN e1b 1 ceb 0 e0b 3000,nop>
+
+// Send new data, it should contain AccECN option
++0.01 write(4, ..., 2000) = 2000
++.002 > [ect0] .5 3001:4013(1012) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.002 > [ect0] P.5 4013:5001(988) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt
new file mode 100644
index 000000000000..c00b36d6a833
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_clientside_disabled.pkt
@@ -0,0 +1,12 @@
+// AccECN sysctl server-side only, no ECN/AccECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=5
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,nop,nop,nop,wscale 8>
++.002 > . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt
new file mode 100644
index 000000000000..f9c27f39f354
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_close_local_close_then_remote_fin.pkt
@@ -0,0 +1,25 @@
+// Test basic connection teardown where local process closes first:
+// the local process calls close() first, so we send a FIN, and receive an ACK.
+// Then we receive a FIN and ACK it.
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +.01...0.011 connect(3, ..., ...) = 0
+   +0 > [noecn] SEWA 0:0(0) <...>
+   +0 < [ect1] SW. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK>
+   +0 > [ect0] EW. 1:1(0) ack 1
+
+   +0 write(3, ..., 1000) = 1000
+   +0 > [ect0] P5. 1:1001(1000) ack 1
+   +0 < [ect0] .5 1:1(0) ack 1001 win 257
+
+   +0 close(3) = 0
+   +0 > [ect0] F5. 1001:1001(0) ack 1
+   +0 < [ect0] .5 1:1(0) ack 1002 win 257
+
+   +0 < [ect0] F5. 1:1(0) ack 1002 win 257
+   +0 > [ect0] . 1002:1002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt
new file mode 100644
index 000000000000..6d771234124a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_2ndlargeack.pkt
@@ -0,0 +1,25 @@
+// Test a large ACK (> ACE field max)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
++0.05 < [ect0] .5 1:1(0) ack 1461 win 264
++0.05 < [ect0] .5 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 8, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt
new file mode 100644
index 000000000000..76384f52b021
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_falseoverflow_detect.pkt
@@ -0,0 +1,31 @@
+// Test false overflow detection with option used to rule out overflow
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
+// Stop sending option to allow easier testing
++0 `sysctl -q net.ipv4.tcp_ecn_option=0`
+
++0.002 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
+
++0.05 < [ect0] .5 1:1(0) ack 1460 win 264 <ECN e0b 1461 ceb 0 e1b 1,nop>
++0.05 < [ect0] .5 1:1(0) ack 14601 win 264 <ECN e0b 14601 ceb 0 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 0, tcpi_delivered_ce
+assert tcpi_delivered_e0_bytes == 14600, tcpi_delivered_e0_bytes
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt
new file mode 100644
index 000000000000..8bce5dce35a2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack.pkt
@@ -0,0 +1,24 @@
+// Test a large ACK (> ACE field max)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
++0.05 < [ect0] .5 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt
new file mode 100644
index 000000000000..5f2b147214f4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_largeack2.pkt
@@ -0,0 +1,25 @@
+// Test a large ACK (> ACE field max)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
+  // Fake CE
++0.05 < [ect0] .6 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt
new file mode 100644
index 000000000000..fd07bdc14f37
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_maxack.pkt
@@ -0,0 +1,25 @@
+// Test a large ACK (at ACE field max delta)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 14600) = 14600
++.002 > [ect0] P.5 1:14601(14600) ack 1
+  // Fake CE
++0.05 < [ect0] .4 1:1(0) ack 14601 win 264
+
++0.01 %{ assert tcpi_delivered_ce == 7, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt
new file mode 100644
index 000000000000..cb1e70ff2d26
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_delivered_updates.pkt
@@ -0,0 +1,70 @@
+// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{
+assert tcpi_delivered_ce == 0, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+  // Fake CE
++0.05 < [ect0] WA. 1:1(0) ack 1001 win 264 <ECN e0b 1 ceb 1000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 1, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 1000, tcpi_delivered_ce_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+  // Fake ect0
++0.05 < [ect0] WA. 1:1(0) ack 2001 win 264 <ECN e0b 1001 ceb 1000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 1, tcpi_delivered_ce
+assert tcpi_delivered_e0_bytes == 1000, tcpi_delivered_e0_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 2001:3001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+  // Fake ce
++0.05 < [ect0] EWA. 1:1(0) ack 3001 win 264 <ECN e0b 1001 ceb 2000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 2, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 2000, tcpi_delivered_ce_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 3001:4001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+  // Fake ect1
++0.05 < [ect0] EWA. 1:1(0) ack 4001 win 264 <ECN e0b 1001 ceb 2000 e1b 1001,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 2, tcpi_delivered_ce
+assert tcpi_delivered_e1_bytes == 1000, tcpi_delivered_e1_bytes
+}%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 4001:5001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+  // Fake ce
++0.05 < [ect0] . 1:1(0) ack 5001 win 264 <ECN e0b 1001 ceb 3000 e1b 1001,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 3, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 3000, tcpi_delivered_ce_bytes
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt
new file mode 100644
index 000000000000..6627c7bb2d26
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn3.pkt
@@ -0,0 +1,12 @@
+// Test that tcp_ecn=4 uses RFC3168 ECN for SYN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=4
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.05 connect(4, ..., ...) = 0
+
++.002 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt
new file mode 100644
index 000000000000..51879477bb50
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ecn_field_updates_opt.pkt
@@ -0,0 +1,35 @@
+// Test basic AccECN CEP/CEB/E0B/E1B functionality & CEP wrapping
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.05 < [ce] EAP. 1:1001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 1001 <ECN e1b 1 ceb 1000 e0b 1,nop>
+   +0 read(4, ..., 1000) = 1000
+
++0.05 < [ect0] EAP. 1001:2001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] WA. 1:1(0) ack 2001 <ECN e1b 1 ceb 1000 e0b 1001,nop>
+   +0 read(4, ..., 1000) = 1000
+
++0.05 < [ce] EAP. 2001:3001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EWA. 1:1(0) ack 3001 <ECN e1b 1 ceb 2000 e0b 1001,nop>
+   +0 read(4, ..., 1000) = 1000
+
++0.05 < [ect1] EAP. 3001:4001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] EWA. 1:1(0) ack 4001 <ECN e1b 1001 ceb 2000 e0b 1001,nop>
+   +0 read(4, ..., 1000) = 1000
+
++0.05 < [ce] EAP. 4001:5001(1000) ack 1 win 257 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] . 1:1(0) ack 5001 <ECN e1b 1001 ceb 3000 e0b 1001,nop>
+   +0 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt
new file mode 100644
index 000000000000..0c72fa4a1251
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_ipflags_drop.pkt
@@ -0,0 +1,14 @@
+// Test IP flags drop
+--tolerance_usecs=50000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 1.1 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02 ~ +1.1 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt
new file mode 100644
index 000000000000..171f9433e55f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_listen_opt_drop.pkt
@@ -0,0 +1,16 @@
+// SYN/ACK option drop test
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.02 ~+2 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.02 ~+5 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.02 ~+8 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt
new file mode 100644
index 000000000000..0f65cf56cd2b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_ack_drop.pkt
@@ -0,0 +1,28 @@
+// Test that SYN-ACK with ACE flags and without
+// ACE flags got dropped. Although we disable ECN,
+// we shouldn't consider this as blackholed as
+// these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// SYN-ACK maybe getting blackholed, disable ECN
++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++4~+4.4 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Received an ACK after sending 3rd retransmission, not a blackhole
++0.1 < [noecn] . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt
new file mode 100644
index 000000000000..343181633980
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_multiple_syn_drop.pkt
@@ -0,0 +1,18 @@
+// Test that SYN with ACE flags and without
+// ACE flags got dropped. Although we disable
+// ECN, we shouldn't consider this as blackholed
+// as these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 3.1 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.02~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0~+0.01 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt
new file mode 100644
index 000000000000..37dabc4603c8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_bleach.pkt
@@ -0,0 +1,23 @@
+// Test AccECN flags bleach
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] . 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [noecn] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt
new file mode 100644
index 000000000000..5b14892fda51
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_connect.pkt
@@ -0,0 +1,23 @@
+// Test basic AccECN negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 <ECN e0b 1001 ceb 0 e1b 0,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt
new file mode 100644
index 000000000000..25f7cb2feb25
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_listen.pkt
@@ -0,0 +1,26 @@
+// Test basic AccECN negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt
new file mode 100644
index 000000000000..50e08c492a69
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_noopt_connect.pkt
@@ -0,0 +1,23 @@
+// Test basic AccECN negotiation without option
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1
++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt
new file mode 100644
index 000000000000..2904f1ba9975
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_negotiation_optenable.pkt
@@ -0,0 +1,23 @@
+// Test basic AccECN negotiation, late option enable
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1
++.05 < [ect0] EAP. 1:1(0) ack 1001 win 256 <ECN e0b 1001 ceb 0 e1b 1,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt
new file mode 100644
index 000000000000..64e0fc1c1f14
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_no_ecn_after_accecn.pkt
@@ -0,0 +1,20 @@
+// Test client behavior on receiving a non ECN SYN-ACK
+// after receiving an AccECN SYN-ACK and moving to
+// ESTABLISHED state
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+// Receive an AccECN SYN-ACK and move to ESTABLISHED
++0.05 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
+// Receive a non ECN SYN-ACK and send a challenge ACK with ACE feedback
++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt
new file mode 100644
index 000000000000..f407c629a3f7
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noopt.pkt
@@ -0,0 +1,27 @@
+// Test basic AccECN negotiation with option off using sysctl
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=0
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1
++0.05 < [ect0] EAP. 1:1(0) ack 1001 win 320
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1001:2001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt
new file mode 100644
index 000000000000..32454e7187f9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_noprogress.pkt
@@ -0,0 +1,27 @@
+// Test no progress filtering
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+  // Fake CE and claim no progress
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 1000 e1b 1,nop>
+
++0.01 %{
+assert tcpi_delivered_ce == 0, tcpi_delivered_ce
+assert tcpi_delivered_ce_bytes == 0, tcpi_delivered_ce_bytes
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt
new file mode 100644
index 000000000000..6597d5f2d778
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_notecn_then_accecn_syn.pkt
@@ -0,0 +1,28 @@
+// Test that SYN-ACK with ACE flags and without
+// ACE flags got dropped. Although we disable ECN,
+// we shouldn't consider this as blackholed as
+// these are dropped due to congestion
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN
++0.1 < [ect0] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
++0.1 < [noecn] . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
+// Write with AccECN option but with ip-noecn since we received one SYN with ACE=0
++0.01 write(4, ..., 100) = 100
++.002 > [noecn] P. 1:101(100) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt
new file mode 100644
index 000000000000..0f97dfcfa82d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_fallback.pkt
@@ -0,0 +1,18 @@
+// Test RFC3168 fallback when sysctl asks for AccECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEW 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt
new file mode 100644
index 000000000000..9baffdd66fe5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_rfc3168_to_rfc3168.pkt
@@ -0,0 +1,18 @@
+// Test RFC3168 ECN when sysctl asks for RFC3168 ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEW 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SE. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.05 < . 1:1(0) ack 1 win 320
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt
new file mode 100644
index 000000000000..3fc56f9c6a6f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab.pkt
@@ -0,0 +1,28 @@
+// Test SACK space grab to fit AccECN option
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264
++0.002 > [ect0] EA. 1:1(0) ack 1 <ECN e1b 1001 ceb 0 e0b 1,nop,nop,nop,sack 1001:2001>
++.01 < [ect0] EAP. 3001:4001(1000) ack 1 win 264
++0.002 > [ect0] EA. 1:1(0) ack 1 <ECN e1b 1001 ceb 0 e0b 1001,nop,nop,nop,sack 3001:4001 1001:2001>
++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264
++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1001 ceb 1000 e0b 1001,nop,nop,nop,sack 5001:6001 3001:4001 1001:2001>
+// DSACK works?
++.01 < [ect0] EAP. 5001:6001(1000) ack 1 win 264
++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1001 ceb 1000 e0b 2001,nop,nop,nop,sack 5001:6001 5001:6001 3001:4001>
++.01 < [ect1] EAP. 6001:7001(1000) ack 1 win 264
++0.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 2001 ceb 1000 e0b 2001,nop,nop,nop,sack 5001:7001 3001:4001 1001:2001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt
new file mode 100644
index 000000000000..1c075b5d81ae
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_sack_space_grab_with_ts.pkt
@@ -0,0 +1,39 @@
+// Test SACK space grab to fit AccECN option
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 100,ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// One SACK block should allow all 3 AccECN fields:
++.01 < [ect1] EAP. 1001:2001(1000) ack 1 win 264 <nop,nop,TS val 3 ecr 100>
++0.002 > [ect0] EA. 1:1(0) ack 1 <nop,nop,TS val 160 ecr 2,ECN e1b 1001 ceb 0 e0b 1,nop,nop,nop,sack 1001:2001>
+
+// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check ect1 arriving.
++.01 < [ect1] EAP. 3001:4001(1000) ack 1 win 264 <nop,nop,TS val 4 ecr 100>
++0.002 > [ect0] EA. 1:1(0) ack 1 <nop,nop,TS val 172 ecr 2,ECN e1b 2001 ceb 0,nop,nop,sack 3001:4001 1001:2001>
+
+// Two SACK blocks should fit w/ AccECN if we only need to use 2 AccECN fields: check CE arriving.
++.01 < [ce] EAP. 5001:6001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100>
++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 184 ecr 2,ECN e1b 2001 ceb 1000,nop,nop,sack 5001:6001 3001:4001>
+
+// Check that DSACK works, using 2 SACK blocks in total, if we only need to use 2 AccECN fields: check ect1 arriving.
++.01 < [ect1] EAP. 5001:6001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100>
++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 196 ecr 2,ECN e1b 3001 ceb 1000,nop,nop,sack 5001:6001 5001:6001>
+
+// Check the case where the AccECN option doesn't fit, because sending ect0
+// with order 1 would rquire 3 AccECN fields,
+// and TS (12 bytes) + 2 SACK blocks (20 bytes) + 3 AccECN fields (2 + 3*3 bytes) > 40 bytes.
+// That's OK; Linux TCP AccECN is optimized for the ECT1 case, not ECT0.
++.01 < [ect0] EAP. 6001:7001(1000) ack 1 win 264 <nop,nop,TS val 5 ecr 100>
++0.002 > [ect0] WA. 1:1(0) ack 1 <nop,nop,TS val 204 ecr 2,nop,nop,sack 5001:7001 3001:4001 1001:2001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt
new file mode 100644
index 000000000000..6b88ab78bfce
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled1.pkt
@@ -0,0 +1,20 @@
+// Test against classic ECN server
+// Not-ECT on SYN and server sets 1|0|1 (AE is unused for classic ECN)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SEA. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [ect0] P.5 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++.002 > [ect0] F.5 101:101(0) ack 1 <nop,nop,TS val 400 ecr 700>
++0.1 < [noecn] R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt
new file mode 100644
index 000000000000..d24ada008ece
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_accecn_disabled2.pkt
@@ -0,0 +1,20 @@
+// Test against classic ECN server
+// Not-ECT on SYN and server sets 0|0|1
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SE. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [ect0] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++0 > [noecn] F. 101:101(0) ack 1 <...>
++0.1 < R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt
new file mode 100644
index 000000000000..a20d7e890ee1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_broken.pkt
@@ -0,0 +1,19 @@
+// Test against broken server (1|1|1)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] SEWA. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [noecn] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++.002 > [noecn] F. 101:101(0) ack 1 <...>
++0.1 < [noecn] R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt
new file mode 100644
index 000000000000..428255bedab7
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_ecn_disabled.pkt
@@ -0,0 +1,19 @@
+// Test against Non ECN server (0|0|0)
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 8>
++.002 > [noecn] . 1:1(0) ack 1 <nop, nop, TS val 200 ecr 700>
+
++0 write(4, ..., 100) = 100
++.002 > [noecn] P. 1:101(100) ack 1 <nop,nop,TS val 300 ecr 700>
++0 close(4) = 0
+
++.002 > [noecn] F. 101:101(0) ack 1 <nop,nop,TS val 400 ecr 700>
++0.1 < [noecn] R. 1:1(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt
new file mode 100644
index 000000000000..e9a5a0d3677c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_serverside_only.pkt
@@ -0,0 +1,18 @@
+// Test AccECN with sysctl set to server-side only
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=5
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt
new file mode 100644
index 000000000000..412fa903105c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_acked_after_retransmit.pkt
@@ -0,0 +1,18 @@
+// Test that SYN with ACE flags was Acked
+// after 2nd retransmission. In this case,
+// since we got SYN-ACK that supports Accurate
+// ECN, we consider this as successful negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 2.1 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+
++0.1 < [noecn] SW. 0:0(0) ack 1 win 32767 <mss 1016,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++0~+0.01 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt
new file mode 100644
index 000000000000..4622754a2270
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ace_flags_drop.pkt
@@ -0,0 +1,16 @@
+// Test that SYN with ACE flags got dropped
+// We retry one more time with ACE and then
+// fallback to disabled ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 2.1 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++1~+1.1 > [noecn] S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.1 < [noecn] S. 0:0(0) ack 1 win 32767 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0~+0.01 > [noecn] . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt
new file mode 100644
index 000000000000..ee15f108cafe
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_acked_after_retransmit.pkt
@@ -0,0 +1,27 @@
+// Test that SYN-ACK with ACE flags was Acked
+// after 2nd retransmission. In this case,
+// since we got the last ACK that supports Accurate
+// ECN, we consider this as successful negotiation
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// SYN-ACK maybe getting blackholed, disable ECN
++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Received an ACK with ACE flags, state should be set to negotiation succeeded
++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt
new file mode 100644
index 000000000000..ccfe353a8ee4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ack_ace_flags_drop.pkt
@@ -0,0 +1,26 @@
+// Test that SYN-ACK with ACE flags got dropped
+// We retry one more time with ACE and then
+// fallback to disabled ECN
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// SYN-ACK maybe getting blackholed, disable ECN
++2~+2.2 > [noecn] S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// Received an ACK with no ACE flags, state should be set to blackholed
++0.1 < [noecn] . 1:1(0) ack 1 win 320
++0 accept(3, ..., ...) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt
new file mode 100644
index 000000000000..dc83f7a18180
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ce.pkt
@@ -0,0 +1,13 @@
+// Test AccECN ECN field reflector in SYNACK
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < [ce] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SWA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt
new file mode 100644
index 000000000000..e63a8d018c37
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect0.pkt
@@ -0,0 +1,13 @@
+// Test AccECN ECN field reflector in SYNACK
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < [ect0] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SA. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt
new file mode 100644
index 000000000000..23c0e43b3dbe
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_syn_ect1.pkt
@@ -0,0 +1,13 @@
+// Test AccECN ECN field reflector in SYNACK
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < [ect1] SEWA 0:0(0) win 32792 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SEW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt
new file mode 100644
index 000000000000..c3497738f680
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce.pkt
@@ -0,0 +1,27 @@
+// Test SYNACK CE & received_ce update
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > [noecn] SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [ce] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] WA. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.6 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] P.5 1:101(100) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
++.002 > [ect0] .6  101:101(0) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.6 101:201(100) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop>
+
++0.1  < [ect1] P.5 201:301(100) ack 201 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
++.002 > [ect0] .6 201:201(0) ack 101 <ECN e1b 101 ceb 0 e0b 101,nop,nop,nop,sack 201:301>
+
++0.01 < [ce] .6 401:501(100) ack 201 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
++.002 > [ect0] .7 201:201(0) ack 101 <ECN e1b 101 ceb 100 e0b 101,nop,nop,nop,sack 401:501 201:301>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt
new file mode 100644
index 000000000000..5fd77f466572
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ce_updates_delivered_ce.pkt
@@ -0,0 +1,22 @@
+// Reflected SYNACK CE mark increases delivered_ce
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_fallback=0
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
++0.05 < SEWA 0:0(0) win 32767 <mss 1050,nop,nop,sackOK,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+// Fake ce for prev, ECT validator must be disabled for this to work
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt
new file mode 100644
index 000000000000..f6ad1ea5c0c4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect0.pkt
@@ -0,0 +1,24 @@
+// Test SYN=0 reflector
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [ect0] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] A. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.5 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect0] P.5 1:1(0) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
+
++0.01 < [ect0] P.5 1:101(100) ack 101 win 256 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] .5 101:101(0) ack 101 <ECN e1b 1 ceb 0 e0b 101,nop>
++0 read(4, ..., 100) = 100
+
++0 close(4) = 0
++0 > F.5 101:101(0) ack 101 <...>
++0.1 < R. 101:101(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt
new file mode 100644
index 000000000000..7ecfc5fb9dbb
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_ect1.pkt
@@ -0,0 +1,24 @@
+// Test SYN=0 reflector
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < [ect1] SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] EW. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P.5 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
++0.05 < [ect1] P.5 1:1(0) ack 101 win 256 <ECN e0b 101 ceb 0 e1b 1,nop>
+
++0.01 < [ect1] P.5 1:101(100) ack 101 win 256 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 > [ect0] .5 101:101(0) ack 101 <ECN e1b 101 ceb 0 e0b 1,nop>
++0 read(4, ..., 100) = 100
+
++0 close(4) = 0
++0 > F5. 101:101(0) ack 101 <...>
++0.1 < R. 101:101(0) ack 102 win 4242
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt
new file mode 100644
index 000000000000..9e0959782ef5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rexmit.pkt
@@ -0,0 +1,15 @@
+// Test 3rd ACK flags when SYN-ACK is rexmitted
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++.002 ... 0.052 connect(4, ..., ...) = 0
+
++.002 > SEWA 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
+
++0.05 < SW. 0:0(0) ack 1 win 32767 <mss 1460,ECN e0b 1 ceb 0 e1b 1,nop,nop,nop,sackOK,nop,wscale 8>
++.002 > [ect0] W. 1:1(0) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt
new file mode 100644
index 000000000000..a5a41633af07
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_synack_rxmt.pkt
@@ -0,0 +1,25 @@
+// Test that we retransmit SYN-ACK with ACE and without
+// AccECN options after
+// SYN-ACK was lost and TCP moved to TCPS_SYN_RECEIVED
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+sysctl -q net.ipv4.tcp_ecn_option=2
+`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 1) = 0
+
++0 < [noecn] SEWA 0:0(0) win 32792 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++.002 > [noecn] SW. 0:0(0) ack 1 <mss 1460,ECN e1b 1 ceb 0 e0b 1,nop,nop,nop,sackOK,nop,wscale 8>
+
+// Retransmit SYN-ACK without option
++1~+1.1 > [noecn] SW. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
++0.1 < [noecn] W. 1:1(0) ack 1 win 320 <ECN e0b 1 ceb 0 e1b 1,nop>
++.002 accept(3, ..., ...) = 4
+
+// We try to write with AccECN option
++0.01 write(4, ..., 100) = 100
++.002 > [ect0] P5. 1:101(100) ack 1 <ECN e1b 1 ceb 0 e0b 1,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt
new file mode 100644
index 000000000000..f3fe2f098966
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsnoprogress.pkt
@@ -0,0 +1,26 @@
+// Test TS progress filtering
+--tcp_ts_tick_usecs=1000
+--tolerance_usecs=7000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 10 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 10>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <nop,nop,TS val 83 ecr 2>
+  // Fake CE and claim no progress
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 83>
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt
new file mode 100644
index 000000000000..1446799d2481
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_accecn_tsprogress.pkt
@@ -0,0 +1,25 @@
+// Test TS progress filtering
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=3
+`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < SEWA 0:0(0) win 32792 <mss 1050,sackOK,TS val 1 ecr 0,nop,wscale 8>
++.002 > SW. 0:0(0) ack 1 <mss 1460,sackOK,TS val 10 ecr 1,ECN e1b 1 ceb 0 e0b 1,nop,nop,wscale 8>
++0.05 < [ect0] W. 1:1(0) ack 1 win 264 <nop,nop,TS val 2 ecr 10>
++.002 accept(3, ..., ...) = 4
+
++0.01 %{ assert tcpi_delivered_ce == 0, tcpi_delivered_ce }%
+
++0.01 write(4, ..., 1000) = 1000
++.002 > [ect0] EAP. 1:1001(1000) ack 1 <nop,nop,TS val 83 ecr 2>
+  // Fake CE and claim no progress
++0.05 < [ect0] WA. 1:1(0) ack 1 win 264 <nop,nop,TS val 3 ecr 83>
+
++0.01 %{ assert tcpi_delivered_ce == 1, tcpi_delivered_ce }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt
new file mode 100644
index 000000000000..319f81dd717d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_basic_client.pkt
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Minimal active open.
+// First to close connection.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+
+   // Connect to server: active open: three-way handshake
+   +0...0 connect(4, ..., ...) = 0
+   +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+   +0 < S. 0:0(0) ack 1 win 65535 <mss 1460,sackOK,nop,nop,nop,wscale 7>
+   +0 > . 1:1(0) ack 1
+
+   // Send data
+   +0 send(4, ..., 1000, 0) = 1000
+   +0 > P. 1:1001(1000) ack 1
+   +0 < . 1:1(0) ack 1001 win 257
+
+   +0 close(4) = 0
+   +0 > F. 1001:1001(0) ack 1
+   +0 < F. 1:1(0) ack 1002 win 257
+   +0 > . 1002:1002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt
new file mode 100644
index 000000000000..e72a291b666e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_basic_server.pkt
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Minimal passive open.
+// Peer is first to close.
+
+`./defaults.sh`
+
+   // Open listener socket
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   // Incoming connection: passive open: three-way handshake
+   +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 8>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 257
+
+   // Open connection socket and close listener socket
+   +0 accept(3, ..., ...) = 4
+   +0 close(3) = 0
+
+   // Peer sends data: acknowledge and receive
+   +0 < P. 1:1001(1000) ack 1 win 257
+   +0 > . 1:1(0) ack 1001
+   +0 recv(4, ..., 1000, 0) = 1000
+
+   // Peer initiates connection close
+   +0 < F. 1001:1001(0) ack 1 win 257
+ +.04 > . 1:1(0) ack 1002
+
+   // Local socket also closes its side
+   +0 close(4) = 0
+   +0 > F. 1:1(0) ack 1002
+   +0 < . 1002:1002(0) ack 2 win 257
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
index 914eabab367a..657e42ca65b5 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 // Test for blocking read.
+
 --tolerance_usecs=10000
+--mss=1000
 
 `./defaults.sh`
 
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt
new file mode 100644
index 000000000000..eef01d5f1118
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+// Initialize connection
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+  +.1 < . 1:1(0) ack 1 win 32792
+
+
+   +0 accept(3, ..., ...) = 4
+   +0 < . 1:1001(1000) ack 1 win 32792
+   +0 > . 1:1(0) ack 1001
+   +0 read(4, ..., 1000) = 1000
+
+// resend the payload + a FIN
+   +0 < F. 1:1001(1000) ack 1 win 32792
+// Why do we have a delay and no dsack ?
+   +0~+.04 > . 1:1(0) ack 1002
+
+   +0 close(4) = 0
+
+// According to RFC 2525, section 2.17
+// we should _not_ send an RST here, because there was no data to consume.
+   +0 > F. 1:1(0) ack 1002
diff --git a/tools/testing/selftests/net/packetdrill/tcp_disorder_fin_in_FIN_WAIT.pkt b/tools/testing/selftests/net/packetdrill/tcp_disorder_fin_in_FIN_WAIT.pkt
new file mode 100644
index 000000000000..336cbf7815c8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_disorder_fin_in_FIN_WAIT.pkt
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check fix in 795a7dfbc3d9 ("net: tcp: accept old ack during closing")
+
+// Set up config.
+`./defaults.sh`
+
+// Initialize a server socket.
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S  0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   *  > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 < .  1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+
+   +0 shutdown(4, SHUT_WR) = 0
+   *  > F. 1:1(0) ack 1
+
+// We expect to receive one ACK.
+// But what happens if a FIN was already in transmt and received out-of-order ?
+
+   +0 < .  2:2(0) ack 2 win 257
+
+// This FIN packet was sent before the prior ACK (see ack 1).
+   +0 < F. 1:1(0) ack 1 win 257
+
+// Even if the FIN is received out-of-order, we should ACK it.
+
+   *  > .  2:2(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
new file mode 100644
index 000000000000..c790d0af635e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test various DSACK (RFC 2883) behaviors.
+
+--mss=1000
+
+`./defaults.sh`
+
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+  +.1 < . 1:1(0) ack 1 win 1024
+   +0 accept(3, ..., ...) = 4
+
+// First SACK range.
+   +0 < P. 1001:2001(1000) ack 1 win 1024
+   +0 > . 1:1(0) ack 1 <nop, nop, sack 1001:2001>
+
+// Check SACK coalescing (contiguous sequence).
+   +0 < P. 2001:3001(1000) ack 1 win 1024
+   +0 > . 1:1(0) ack 1 <nop,nop,sack 1001:3001>
+
+// Check we have two SACK ranges for non contiguous sequences.
+   +0 < P. 4001:5001(1000) ack 1 win 1024
+   +0 > . 1:1(0) ack 1 <nop,nop,sack 4001:5001 1001:3001>
+
+// Three ranges.
+   +0 < P. 7001:8001(1000) ack 1 win 1024
+   +0 > . 1:1(0) ack 1 <nop,nop,sack 7001:8001 4001:5001 1001:3001>
+
+// DSACK (1001:3001) + SACK (6001:7001)
+   +0 < P. 1:6001(6000) ack 1 win 1024
+   +0 > . 1:1(0) ack 6001 <nop,nop,sack 1001:3001 7001:8001>
+
+// DSACK (7001:8001)
+   +0 < P. 6001:8001(2000) ack 1 win 1024
+   +0 > . 1:1(0) ack 8001 <nop,nop,sack 7001:8001>
+
+// DSACK for an older segment.
+   +0 < P. 1:1001(1000) ack 1 win 1024
+   +0 > . 1:1(0) ack 8001 <nop,nop,sack 1:1001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt
new file mode 100644
index 000000000000..32aff9bc4052
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test TFO_SERVER_COOKIE_NOT_REQD flag on receiving
+// SYN with data but without Fast Open cookie option.
+
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x202`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Since TFO_SERVER_COOKIE_NOT_REQD, a TFO socket will be created with
+// the data accepted.
+   +0 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop>
+   +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK>
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+   +0 read(4, ..., 1024) = 1000
+
+// Data After SYN will be accepted too.
+   +0 < . 1001:2001(1000) ack 1 win 5840
+   +0 > . 1:1(0) ack 2001
+
+// Should change the implementation later to set the SYN flag as well.
+   +0 read(4, ..., 1024) = 1000
+   +0 write(4, ..., 1000) = 1000
+   +0 > P. 1:1001(1000) ack 2001
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt
new file mode 100644
index 000000000000..649997a58099
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test TFO_SERVER_WO_SOCKOPT1 without setsockopt(TCP_FASTOPEN)
+
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x402`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+   +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+   +0 read(4, ..., 512) = 10
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt
new file mode 100644
index 000000000000..4a00e0d994f2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Server w/o TCP_FASTOPEN socket option
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:10(10) win 32792 <mss 1460,sackOK,FO TFO_COOKIE>
+
+// Data is ignored since TCP_FASTOPEN is not set on the listener
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+
+   +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable)
+
+// The above should block until ack comes in below.
+   +0 < . 1:31(30) ack 1 win 5840
+   +0 accept(3, ..., ...) = 4
+
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+   +0 read(4, ..., 512) = 30
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt
new file mode 100644
index 000000000000..345ed26ff7f8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test that TFO-enabled server would not respond SYN-ACK with any TFO option
+// when receiving a pure SYN-data. It should respond a pure SYN-ack.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6>
+   +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 100
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+   +0 close(3) = 0
+
+// Test ECN-setup SYN with ECN disabled because this has happened in reality
+   +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6>
+   +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 100
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+   +0 close(3) = 0
+
+// Test ECN-setup SYN w/ ECN enabled
+   +0 `sysctl -q net.ipv4.tcp_ecn=2`
+   +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6>
+   +0 > SE. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 100
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+   +0 close(3) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt
new file mode 100644
index 000000000000..98e6f84497cd
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test TFO server with SYN that has TFO cookie and data.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+   +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+   +0 read(4, ..., 512) = 10
+   +0 write(4, ..., 100) = 100
+   +0 > P. 1:101(100) ack 11
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt
new file mode 100644
index 000000000000..95b1047ffdd5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test zero-payload packet w/ valid TFO cookie - a TFO socket will
+// still be created and accepted but read() will not return until a
+// later pkt with 10 byte.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+
+// A TFO socket is created and is writable.
+   +0 write(4, ..., 100) = 100
+   +0 > P. 1:101(100) ack 1
+   +0...0.300 read(4, ..., 512) = 10
+  +.3 < P. 1:11(10) ack 1 win 5840
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt
new file mode 100644
index 000000000000..f75efd51ed0c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// A reproducer case for a TFO SYNACK RTO undo bug in:
+//   794200d66273 ("tcp: undo cwnd on Fast Open spurious SYNACK retransmit")
+// This sequence that tickles this bug is:
+//  - Fast Open server receives TFO SYN with data, sends SYNACK
+//  - (client receives SYNACK and sends ACK, but ACK is lost)
+//  - server app sends some data packets
+//  - (N of the first data packets are lost)
+//  - server receives client ACK that has a TS ECR matching first SYNACK,
+//    and also SACKs suggesting the first N data packets were lost
+//     - server performs undo of SYNACK RTO, then immediately enters recovery
+//     - buggy behavior in 794200d66273 then performed an undo that caused
+//       the connection to be in a bad state, in CA_Open with retrans_out != 0
+
+// Check that outbound TS Val ticks are as we would expect with 1000 usec per
+// timestamp tick:
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh`
+
+// Initialize connection
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:1000(1000) win 65535 <mss 1012,sackOK,TS val 1000 ecr 0,wscale 7,nop,nop,nop,FO TFO_COOKIE>
+   +0 > S. 0:0(0) ack 1001 <mss 1460,sackOK,TS val 2000 ecr 1000,nop,wscale 8>
+   +0 accept(3, ..., ...) = 4
+
+// Application writes more data
+   +.010 write(4, ..., 10000) = 10000
+   +0 > P. 1:5001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000>
+   +0 > P. 5001:10001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000>
+   +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+   +0 < . 1001:1001(0) ack 1 win 257 <TS val 1010 ecr 2000,sack 2001:5001>
+   +0 > P. 1:2001(2000) ack 1001 <nop,nop,TS val 2010 ecr 1010>
+   +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }%
+   +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }%
+
+   +0 < . 1001:1001(0) ack 1 win 257 <TS val 1011 ecr 2000,sack 2001:6001>
+   +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }%
+   +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt
new file mode 100644
index 000000000000..c3cb0e8bdcf8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Test the Experimental Option
+//
+// SYN w/ FOEXP w/o cookie must generates SYN+ACK w/ FOEXP
+// w/ a valid cookie, and the cookie must be the same one
+// with one generated by IANA FO
+
+`./defaults.sh`
+
+// Request a TFO cookie by Experimental Option
+// This must generate the same TFO_COOKIE
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,FOEXP TFO_COOKIE>
+
+   +0 close(3) = 0
+
+// Test if FOEXP with a valid cookie creates a TFO socket
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP TFO_COOKIE>
+   +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+   +0 read(4, ..., 512) = 10
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt
new file mode 100644
index 000000000000..dc09f8d9a381
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a FIN pkt with the ACK bit to a TFO socket.
+// The socket will go to TCP_CLOSE_WAIT state and data can be
+// read until the socket is closed, at which time a FIN will be sent.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+   +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// FIN is acked and the socket goes to TCP_CLOSE_WAIT state
+// in tcp_fin() called from tcp_data_queue().
+   +0 < F. 11:11(0) ack 1 win 32792
+   +0 > . 1:1(0) ack 12
+
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+   +0 %{ assert tcpi_state == TCP_CLOSE_WAIT, tcpi_state }%
+
+   +0 read(4, ..., 512) = 10
+   +0 close(4) = 0
+   +0 > F. 1:1(0) ack 12
+    * > F. 1:1(0) ack 12
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt
new file mode 100644
index 000000000000..d5543672e2bd
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send an ICMP host_unreachable pkt to a pending SYN_RECV req.
+//
+// If it's a TFO req, the ICMP error will cause it to switch
+// to TCP_CLOSE state but remains in the acceptor queue.
+
+--ip_version=ipv4
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+   +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// Out-of-window icmp is ignored but accounted.
+   +0 `nstat > /dev/null`
+   +0 < icmp unreachable [5000:6000(1000)]
+   +0 `nstat | grep TcpExtOutOfWindowIcmps > /dev/null`
+
+// Valid ICMP unreach.
+   +0 < icmp unreachable host_unreachable [0:10(10)]
+
+// Unlike the non-TFO case, the req is still there to be accepted.
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+// tcp_done_with_error() in tcp_v4_err() sets sk->sk_state
+// to TCP_CLOSE
+   +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+// The 1st read will succeed and return the data in SYN
+   +0 read(4, ..., 512) = 10
+
+// The 2nd read will fail.
+   +0 read(4, ..., 512) = -1 EHOSTUNREACH (No route to host)
+
+// But is no longer writable because it's in TCP_CLOSE state.
+   +0 write(4, ..., 100) = -1 EPIPE (Broken Pipe)
+
+// inbound pkt will trigger RST because the socket has been moved
+// off the TCP hash tables.
+   +0 < . 1:1(0) ack 1 win 32792
+   +0 > R 1:1(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt
new file mode 100644
index 000000000000..040d5547ed80
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a RST to a TFO socket after it has been accepted.
+//
+// First read() will return all the data and this is consistent
+// with the non-TFO case. Second read will return -1
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+   +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+   +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }%
+
+// 1st read will return the data from SYN.
+// tcp_reset() sets sk->sk_err to ECONNRESET for SYN_RECV.
+   +0 < R. 11:11(0) win 32792
+   +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+// This one w/o ACK bit will cause the same effect.
+// +0 < R 11:11(0) win 32792
+// See Step 2 in tcp_validate_incoming().
+
+// found_ok_skb in tcp_recvmsg_locked()
+   +0 read(4, ..., 512) = 10
+
+// !copied && sk->sk_err -> sock_error(sk)
+   +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer)
+   +0 close(4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt
new file mode 100644
index 000000000000..7f9de6c66cbd
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a RST to a TFO socket before it is accepted.
+//
+// The socket won't go away and after it's accepted the data
+// in the SYN pkt can still be read. But that's about all that
+// the acceptor can do with the socket.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7,FO TFO_COOKIE,nop,nop>
+   +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// 1st read will return the data from SYN.
+   +0 < R. 11:11(0) win 257
+
+// This one w/o ACK bit will cause the same effect.
+// +0 < R 11:11(0) win 257
+
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+   +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+   +0 read(4, ..., 512) = 10
+   +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer)
+   +0 close(4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt
new file mode 100644
index 000000000000..548a87701b5d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a RST to a TFO socket after it is accepted.
+//
+// The socket will change to TCP_CLOSE state with pending data so
+// write() will fail. Pending data can be still be read and close()
+// won't trigger RST if data is not read
+//
+// 565b7b2d2e63 ("tcp: do not send reset to already closed sockets")
+// https://lore.kernel.org/netdev/4C1A2502.1030502@openvz.org/
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop, FO TFO_COOKIE,nop,nop>
+   +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+   +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }%
+
+// tcp_done() sets sk->sk_state to TCP_CLOSE and clears tp->fastopen_rsk
+   +0 < R. 11:11(0) win 32792
+   +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+   +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer)
+   +0 close(4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt
new file mode 100644
index 000000000000..20090bf77655
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a RST to a fully established socket with pending data before
+// it is accepted.
+//
+// The socket with pending data won't go away and can still be accepted
+// with data read. But it will be in TCP_CLOSE state.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Invalid cookie, so accept() fails.
+   +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO aaaaaaaaaaaaaaaa,nop,nop>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK, FO TFO_COOKIE,nop,nop>
+
+   +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable)
+
+// Complete 3WHS and send data and RST
+   +0 < . 1:1(0) ack 1 win 32792
+   +0 < . 1:11(10) ack 1 win 32792
+   +0 < R. 11:11(0) win 32792
+
+// A valid reset won't make the fully-established socket go away.
+// It's just that the acceptor will get a dead, unusable socket
+// in TCP_CLOSE state.
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+   +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+   +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer)
+   +0 read(4, ..., 512) = 10
+   +0 read(4, ..., 512) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt
new file mode 100644
index 000000000000..9f52d7de3436
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Test the server cookie is generated by aes64 encoding of remote and local
+// IP addresses with a master key specified via sockopt TCP_FASTOPEN_KEY
+//
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen_key=00000000-00000000-00000000-00000000`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+// Set a key of a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 (big endian).
+// This would produce a cookie of TFO_COOKIE like many other
+// tests (which the same key but set via sysctl).
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY,
+                 "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0
+
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Request a valid cookie TFO_COOKIE
+   +0 < S 1428932:1428942(10) win 10000 <mss 1012,nop,nop,FO,sackOK,TS val 1 ecr 0,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1428933 <mss 1460,sackOK,TS val 10000 ecr 1,nop,wscale 8,FO TFO_COOKIE,nop,nop>
+   +0 < . 1:1(0) ack 1 win 257 <nop,nop,TS val 2 ecr 10000>
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+
+   +0 close(4) = 0
+   +0 > F. 1:1(0) ack 1 <nop,nop,TS val 10001 ecr 2>
+   +0 < F. 1:1(0) ack 2 win 257 <nop,nop,TS val 3 ecr 10001>
+   +0 > . 2:2(0) ack 2 <nop,nop,TS val 10002 ecr 3>
+
+   +0 close(3) = 0
+
+// Restart the listener
+   +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Test setting the key in the listen state, and produces an identical cookie
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY,
+                 "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0
+
+   +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7>
+   +0 > S. 0:0(0) ack 6815001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8>
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+   +0 < . 1001:1001(0) ack 1 win 257 <nop,nop,TS val 12 ecr 10000>
+   +0 read(4, ..., 8192) = 1000
+
+   +0 close(4) = 0
+   +0 > F. 1:1(0) ack 1001 <nop,nop,TS val 10101 ecr 12>
+   +0 < F. 1001:1001(0) ack 2 win 257 <nop,nop,TS val 112 ecr 10101>
+   +0 > . 2:2(0) ack 1002 <nop,nop,TS val 10102 ecr 112>
+
+   +0 close(3) = 0
+
+// Restart the listener
+   +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Test invalid key length (must be 16 bytes)
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 0) = -1 (Invalid Argument)
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 3) = -1 (Invalid Argument)
+
+// Previous cookie won't be accepted b/c this listener uses the global key (0-0-0-0)
+   +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7>
+   +0 > S. 0:0(0) ack 6814001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8,FO TFO_COOKIE_ZERO,nop,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt
new file mode 100644
index 000000000000..e82e06da44c9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Close a listener socket with pending TFO child.
+// This will trigger RST pkt to go out.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+   +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// RST pkt is generated for each not-yet-accepted TFO child.
+// inet_csk_listen_stop() -> inet_child_forget() -> tcp_disconnect()
+// -> tcp_need_reset() is true for SYN_RECV
+   +0 close(3) = 0
+   +0 > R. 1:1(0) ack 11
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt
new file mode 100644
index 000000000000..2a148bb14cbf
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:10(10) win 32792 <mss 1460,nop,nop,sackOK,nop,nop,FO TFO_COOKIE>
+   +0 > S. 0:0(0) ack 11 win 65535 <mss 1460,nop,nop,sackOK>
+
+// sk->sk_state is TCP_SYN_RECV
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }%
+
+// tcp_disconnect() sets sk->sk_state to TCP_CLOSE
+   +0 connect(4, AF_UNSPEC, ...) = 0
+   +0 > R. 1:1(0) ack 11 win 65535
+   +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+// connect() sets sk->sk_state to TCP_SYN_SENT
+   +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+   +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation is now in progress)
+   +0 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+   +0 %{ assert tcpi_state == TCP_SYN_SENT, tcpi_state }%
+
+// tp->fastopen_rsk must be NULL
+   +1 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt
new file mode 100644
index 000000000000..09fb63f78a0e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Close a TFO socket with unread data.
+// This will trigger a RST pkt.
+
+`./defaults.sh`
+
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+   +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+   +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }%
+
+// data_was_unread == true in __tcp_close()
+   +0 close(4) = 0
+   +0 > R. 1:1(0) ack 11
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
index df49c67645ac..e13f0eee9795 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
@@ -1,5 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 // Test TCP_INQ and TCP_CM_INQ on the client side.
+
+--mss=1000
+
 `./defaults.sh
 `
 
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
index 04a5e2590c62..14dd5f813d50 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
@@ -1,5 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 // Test TCP_INQ and TCP_CM_INQ on the server side.
+
+--mss=1000
+
 `./defaults.sh
 `
 
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt
new file mode 100644
index 000000000000..09aabc775e80
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
+
+// Test that a not-yet-accepted socket does not change
+// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets.
+
+   +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+  +.1 < . 1:1(0) ack 1 win 257
+  +0  < . 2001:41001(39000) ack 1 win 257
+  +0  > . 1:1(0) ack 1 <nop,nop,sack 2001:41001>
+  +0  < . 41001:101001(60000) ack 1 win 257
+  +0  > . 1:1(0) ack 1 <nop,nop,sack 2001:101001>
+  +0  < . 1:1001(1000) ack 1 win 257
+  +0  > . 1:1(0) ack 1001 <nop,nop,sack 2001:101001>
+  +0  < . 1001:2001(1000) ack 1 win 257
+  +0  > . 1:1(0) ack 101001
+
+  +0 accept(3, ..., ...) = 4
+
+  +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }%
+
+  +0 close(4) = 0
+  +0 close(3) = 0
+
+// Test that ooo packets for accepted sockets do increase sk_rcvbuf
+   +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+  +.1 < . 1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+
+  +0  < . 2001:41001(39000) ack 1 win 257
+  +0  > . 1:1(0) ack 1 <nop,nop,sack 2001:41001>
+  +0  < . 41001:101001(60000) ack 1 win 257
+  +0  > . 1:1(0) ack 1 <nop,nop,sack 2001:101001>
+
+  +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }%
+
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
new file mode 100644
index 000000000000..7e6bc5fb0c8d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
+
+   +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+  +.1 < . 1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+
+   +0 < . 2001:11001(9000) ack 1 win 257
+   +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001>
+
+// check that ooo packet properly updates tcpi_rcv_mss
+   +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }%
+
+   +0 < . 11001:21001(10000) ack 1 win 257
+   +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001>
+
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
new file mode 100644
index 000000000000..12882be10f2e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+    0 `nstat -n`
+
+// Establish a connection.
+   +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0>
+  +.1 < . 1:1(0) ack 1 win 257
+
+  +0 accept(3, ..., ...) = 4
+
+  +0 < P. 1:4001(4000) ack 1 win 257
+  +0 > .  1:1(0) ack 4001 win 5000
+
+// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+  +0 < P. 4001:54001(50000) ack 1 win 257
+  +0 > .  1:1(0) ack 4001 win 5000
+
+// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+  +1 < P. 5001:55001(50000) ack 1 win 257
+  +0 > .  1:1(0) ack 4001 win 5000
+
+// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+  +0 < P. 70001:80001(10000) ack 1 win 257
+  +0 > .  1:1(0) ack 4001 win 5000
+
+  +0 read(4, ..., 100000) = 4000
+
+// If queue is empty, accept a packet even if its end_seq is above rcv_mwnd_seq
+  +0 < P. 4001:54001(50000) ack 1 win 257
+   * > .  1:1(0) ack 54001 win 0
+
+// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times.
++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt
new file mode 100644
index 000000000000..b9ab264b2a11
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_neg_window.pkt
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test maximum advertised window limit when rcv_nxt advances past
+// rcv_mwnd_seq. The "usable window" must be properly clamped to zero
+// rather than becoming negative.
+
+--mss=1000
+
+`./defaults.sh`
+
+// Establish a connection.
+   +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0>
+  +.1 < . 1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+
+// A too big packet is accepted if the receive queue is empty. It
+// does not trigger an immediate ACK.
+   +0 < P. 1:20001(20000) ack 1 win 257
+   +0 %{ assert tcpi_bytes_received == 20000, tcpi_bytes_received; }%
+
+// Send a RST immediately so that there is no rcv_wup/rcv_mwnd_seq update yet
+   +0 < R. 20001:20001(0) ack 1 win 257
+
+// Verify that the RST was accepted. Indirectly this also verifies that no
+// immediate ACK was sent for the data packet above.
+   +0 < . 20001:20001(0) ack 1 win 257
+   +0 > R 1:1(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed.pkt
new file mode 100644
index 000000000000..6af0e0eb183a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_allowed.pkt
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_shrink_window=1
+sysctl -q net.ipv4.tcp_rmem="4096 32768 $((32*1024*1024))"`
+
+   0 `nstat -n`
+
+// Establish a connection.
+  +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+  +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+  +0 bind(3, ..., ...) = 0
+  +0 listen(3, 1) = 0
+
+  +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+  +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 10>
+  +0 < . 1:1(0) ack 1 win 257
+
+  +0 accept(3, ..., ...) = 4
+
+  +0 < P. 1:10001(10000) ack 1 win 257
+   * > .  1:1(0) ack 10001 win 15
+
+  +0 < P. 10001:11024(1023) ack 1 win 257
+   * > .  1:1(0) ack 11024 win 13
+
+// Max window seq advertised 10001 + 15*1024 = 25361, last advertised: 11024 + 13*1024 = 24336
+
+// Segment beyond the max window is dropped
+  +0 < P. 11024:25362(14338) ack 1 win 257
+   * > .  1:1(0) ack 11024 win 13
+
+// Segment using the max window is accepted
+  +0 < P. 11024:25361(14337) ack 1 win 257
+   * > .  1:1(0) ack 25361 win 0
+
+// Check LINUX_MIB_BEYOND_WINDOW has been incremented once
+  +0 `nstat | grep TcpExtBeyondWindow | grep -q " 1 "`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_nomem.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_nomem.pkt
new file mode 100644
index 000000000000..a80eb55dc69a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_wnd_shrink_nomem.pkt
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+// When tcp_receive_window() < tcp_max_receive_window(), tcp_sequence() accepts
+// packets that would be dropped under normal conditions (i.e. tcp_receive_window()
+// equal to tcp_max_receive_window()).
+// Test that such packets are handled as expected for RWIN == 0 and for RWIN > 0.
+
+--mss=1000
+
+`./defaults.sh`
+
+    0 `nstat -n`
+
+// Establish a connection.
+   +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1000000], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 4>
+   +0 < . 1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+
+// Put 1040000 bytes into the receive buffer
+   +0 < P. 1:65001(65000) ack 1 win 257
+    * > .  1:1(0) ack 65001
+   +0 < P. 65001:130001(65000) ack 1 win 257
+    * > .  1:1(0) ack 130001
+   +0 < P. 130001:195001(65000) ack 1 win 257
+    * > .  1:1(0) ack 195001
+   +0 < P. 195001:260001(65000) ack 1 win 257
+    * > .  1:1(0) ack 260001
+   +0 < P. 260001:325001(65000) ack 1 win 257
+    * > .  1:1(0) ack 325001
+   +0 < P. 325001:390001(65000) ack 1 win 257
+    * > .  1:1(0) ack 390001
+   +0 < P. 390001:455001(65000) ack 1 win 257
+    * > .  1:1(0) ack 455001
+   +0 < P. 455001:520001(65000) ack 1 win 257
+    * > .  1:1(0) ack 520001
+   +0 < P. 520001:585001(65000) ack 1 win 257
+    * > .  1:1(0) ack 585001
+   +0 < P. 585001:650001(65000) ack 1 win 257
+    * > .  1:1(0) ack 650001
+   +0 < P. 650001:715001(65000) ack 1 win 257
+    * > .  1:1(0) ack 715001
+   +0 < P. 715001:780001(65000) ack 1 win 257
+    * > .  1:1(0) ack 780001
+   +0 < P. 780001:845001(65000) ack 1 win 257
+    * > .  1:1(0) ack 845001
+   +0 < P. 845001:910001(65000) ack 1 win 257
+    * > .  1:1(0) ack 910001
+   +0 < P. 910001:975001(65000) ack 1 win 257
+    * > .  1:1(0) ack 975001
+   +0 < P. 975001:1040001(65000) ack 1 win 257
+    * > .  1:1(0) ack 1040001
+
+// Trigger an extreme memory squeeze by shrinking SO_RCVBUF
+   +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [16000], 4) = 0
+
+   +0 < P. 1040001:1105001(65000) ack 1 win 257
+    * > .  1:1(0) ack 1040001 win 0
+// Check LINUX_MIB_TCPRCVQDROP has been incremented
+   +0 `nstat -s | grep TcpExtTCPRcvQDrop| grep -q " 1 "`
+
+// RWIN == 0: rcv_wup = 1040001, rcv_wnd = 0, rcv_mwnd_seq > 1105001 (significantly larger, typically ~1970000)
+
+// Accept pure ack with seq in max adv. window
+   +0 write(4, ..., 1000) = 1000
+   +0 > P. 1:1001(1000) ack 1040001 win 0
+   +0 < .  1105001:1105001(0) ack 1001 win 257
+
+// In order segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_ZEROWINDOW)
+   +0 < P. 1040001:1041001(1000) ack 1001 win 257
+   +0 > .  1001:1001(0) ack 1040001 win 0
+// Ooo partial segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_ZEROWINDOW)
+   +0 < P. 1039001:1041001(2000) ack 1001 win 257
+   +0 > .  1001:1001(0) ack 1040001 win 0 <nop,nop,sack 1039001:1040001>
+// Check LINUX_MIB_TCPZEROWINDOWDROP has been incremented twice
+   +0 `nstat -s | grep TcpExtTCPZeroWindowDrop| grep -q " 2 "`
+
+// Ooo segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_OVERWINDOW)
+   +0 < P. 1105001:1106001(1000) ack 1001 win 257
+   +0 > .  1001:1001(0) ack 1040001 win 0
+// Ooo segment, beyond max adv. window -> drop (SKB_DROP_REASON_TCP_INVALID_SEQUENCE)
+   +0 < P. 2000001:2001001(1000) ack 1001 win 257
+   +0 > .  1001:1001(0) ack 1040001 win 0
+// Check LINUX_MIB_BEYOND_WINDOW has been incremented twice
+   +0 `nstat -s | grep TcpExtBeyondWindow | grep -q " 2 "`
+
+// Read all data
+   +0 read(4, ..., 2000000) = 1040000
+    * > .  1001:1001(0) ack 1040001
+
+// RWIN > 0: rcv_wup = 1040001, 0 < rcv_wnd < 32000, rcv_mwnd_seq > 1105001 (significantly larger, typically ~1970000)
+
+// Accept pure ack with seq in max adv. window, beyond adv. window
+   +0 write(4, ..., 1000) = 1000
+   +0 > P.  1001:2001(1000) ack 1040001
+   +0 < . 1105001:1105001(0) ack 2001 win 257
+
+// In order segment, in max adv. window, in adv. window -> accept
+// Note: This also ensures that we cannot hit the empty queue exception in tcp_sequence() in the following tests
+   +0 < P. 1040001:1041001(1000) ack 2001 win 257
+    * > .  2001:2001(0) ack 1041001
+
+// Ooo partial segment, in adv. window -> accept
+   +0 < P. 1040001:1042001(2000) ack 2001 win 257
+   +0 > .  2001:2001(0) ack 1042001 <nop,nop,sack 1040001:1041001>
+
+// Ooo segment, in max adv. window, beyond adv. window -> drop (SKB_DROP_REASON_TCP_OVERWINDOW)
+   +0 < P. 1105001:1106001(1000) ack 2001 win 257
+   +0 > .  2001:2001(0) ack 1042001
+// Ooo segment, beyond max adv. window, beyond adv. window -> drop (SKB_DROP_REASON_TCP_INVALID_SEQUENCE)
+   +0 < P. 2000001:2001001(1000) ack 2001 win 257
+   +0 > .  2001:2001(0) ack 1042001
+// Check LINUX_MIB_BEYOND_WINDOW has been incremented twice
+   +0 `nstat -s | grep TcpExtBeyondWindow | grep -q " 4 "`
+
+// We are allowed to go beyond the window and buffer with one packet
+   +0 < P. 1042001:1062001(20000) ack 2001 win 257
+    * > .  2001:2001(0) ack 1062001
+   +0 < P. 1062001:1082001(20000) ack 2001 win 257
+    * > .  2001:2001(0) ack 1082001 win 0
+
+// But not more: In order segment, in max adv. window -> drop (SKB_DROP_REASON_TCP_ZEROWINDOW)
+   +0 < P. 1082001:1083001(1000) ack 2001 win 257
+    * > .  2001:2001(0) ack 1082001
+// Check LINUX_MIB_TCPZEROWINDOWDROP has been incremented again
+   +0 `nstat -s | grep TcpExtTCPZeroWindowDrop| grep -q " 3 "`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_zero_wnd_fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_zero_wnd_fin.pkt
new file mode 100644
index 000000000000..e245359a1a91
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_zero_wnd_fin.pkt
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Some TCP stacks send FINs even though the window is closed. We break
+// a possible FIN/ACK loop by accepting the FIN.
+
+--mss=1000
+
+`./defaults.sh`
+
+// Establish a connection.
+   +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0>
+   +0 < . 1:1(0) ack 1 win 257
+
+   +0 accept(3, ..., ...) = 4
+
+   +0 < P. 1:60001(60000) ack 1 win 257
+    * > .  1:1(0) ack 60001 win 0
+
+   +0 < F. 60001:60001(0) ack 1 win 257
+   +0 > . 1:1(0) ack 60002 win 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt
new file mode 100644
index 000000000000..47550df124ce
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Test SYN+ACK RTX with 1s RTO.
+//
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_rto_max_ms=1000`
+
+//
+// Test 1: TFO SYN+ACK
+//
+    0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+   +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+   +0 < S 0:10(10) win 1000 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+   +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// RTO must be capped to 1s
+   +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+   +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+   +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+   +0 < . 11:11(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK>
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+   +0 close(4) = 0
+   +0 close(3) = 0
+
+
+//
+// Test 2: non-TFO SYN+ACK
+//
+   +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 1000 <mss 1460,sackOK,nop,nop>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+
+// RTO must be capped to 1s
+   +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+   +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+   +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+
+   +0 < . 1:1(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK>
+   +0 accept(3, ..., ...) = 4
+   +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+
+   +0 close(4) = 0
+   +0 close(3) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
index b2b2cdf27e20..454441e7ecff 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
@@ -1,6 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 // Test that we correctly skip zero-length IOVs.
+
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
+
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
    +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
    +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt
new file mode 100644
index 000000000000..95a1957a2cf9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_tcp_tx_timestamp_bug.pkt
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test after "tcp: tcp_tx_timestamp() must look at the rtx queue"
+
+// This test is about receiving the SCM_TSTAMP_ACK,
+// we do not care about its SCM_TIMESTAMPING precision.
+--tolerance_usecs=1000000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_min_tso_segs=70
+`
+
+// Create a socket and set it to non-blocking.
+    0	socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0	fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+   +0	fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Establish connection and verify that there was no error.
+   +0	connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+   +0	> S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.010	< S. 0:0(0) ack 1 win 65535 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7>
+   +0	> . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700>
+   +0	getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+   +0	setsockopt(3, SOL_SOCKET, SO_SNDBUF, [30000], 4) = 0
+
+   +0   write(3, ..., 9880) = 9880
+   +0   > P. 1:9881(9880) ack 1 <nop,nop,TS val 200 ecr 700>
++.010   < . 1:1(0) ack 9881 win 10000 <nop,nop,TS val 701 ecr 200>
+
+   +0   write(3, ..., 19760) = 19760
+   +0   > P. 9881:29641(19760) ack 1 <nop,nop,TS val 201 ecr 701>
++.010   < . 1:1(0) ack 29641 win 10000 <nop,nop,TS val 702 ecr 201>
+
+   +0   write(3, ..., 39520) = 39520
+   +0   > P. 29641:69161(39520) ack 1 <nop,nop,TS val 202 ecr 702>
++.010   < . 1:1(0) ack 69161 win 10000 <nop,nop,TS val 703 ecr 202>
+
+// One more write to increase cwnd
+   +0	write(3, ..., 79040) = 79040
+   +0	> P. 69161:108681(39520) ack 1 <nop,nop,TS val 203 ecr 703>
+   +0	> P. 108681:148201(39520) ack 1 <nop,nop,TS val 203 ecr 703>
++.010	< . 1:1(0) ack 148201 win 1000 <nop,nop,TS val 704 ecr 203>
+
+   +0	setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING,
+		   [SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+		    SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+// We have one write filling one skb
+// last byte can not be stored because of our small SO_SNDBUF
+   +0	write(3, ..., 65209) = 65208
+   +0	> P. 148201:213409(65208) ack 1 <nop,nop,TS val 204 ecr 704>
++.010	< . 1:1(0) ack 213409 win 1000 <nop,nop,TS val 705 ecr 204>
+
+// SCM_TSTAMP_ACK should be received after the last ack at
+// t=60ms.
+   +0	recvmsg(3, {msg_name(...)=...,
+		    msg_iov(1)=[{...,0}],
+                    msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+                    msg_control=[
+			{cmsg_level=SOL_SOCKET,
+			 cmsg_type=SCM_TIMESTAMPING,
+			 cmsg_data={scm_sec=0,scm_nsec=60000000}},
+			{cmsg_level=CMSG_LEVEL_IP,
+			 cmsg_type=CMSG_TYPE_RECVERR,
+			 cmsg_data={ee_errno=ENOMSG,
+				    ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+				    ee_type=0,
+				    ee_code=0,
+				    ee_info=SCM_TSTAMP_ACK,
+				    ee_data=65207}}
+		    ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
index 183051ba0cae..6882b8240a8a 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
@@ -23,14 +23,16 @@
 
 // install a qdisc dropping all packets
    +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0`
+
    +0 write(4, ..., 24) = 24
    // When qdisc is congested we retry every 500ms
    // (TCP_RESOURCE_PROBE_INTERVAL) and therefore
    // we retry 6 times before hitting 3s timeout.
    // First verify that the connection is alive:
-+3.250 write(4, ..., 24) = 24
++3 write(4, ..., 24) = 24
+
    // Now verify that shortly after that the socket is dead:
- +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out)
++1 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out)
 
    +0 %{ assert tcpi_probes == 6, tcpi_probes; \
          assert tcpi_backoff == 0, tcpi_backoff }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
index a82c8899d36b..0a0700afdaa3 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
@@ -4,6 +4,8 @@
 // send a packet with MSG_ZEROCOPY and receive the notification ID
 // repeat and verify IDs are consecutive
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
index c01915e7f4a1..df91675d2991 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
@@ -3,6 +3,8 @@
 //
 // send multiple packets, then read one range of all notifications.
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
index 6509882932e9..2963cfcb14df 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 // Minimal client-side zerocopy test
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
index 2cd78755cb2a..ea0c2fa73c2d 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
@@ -7,6 +7,8 @@
 // First send on a closed socket and wait for (absent) notification.
 // Then connect and send and verify that notification nr. is zero.
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
index 7671c20e01cf..4df978a9b82e 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
@@ -7,6 +7,9 @@
 // fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
 // is correctly fired only once, when EPOLLET is set. send another packet with
 // MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
index fadc480fdb7f..36b6edc4858c 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
@@ -8,6 +8,9 @@
 // fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
 // is correctly fired only once, when EPOLLET is set. send another packet with
 // MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
index 5bfa0d1d2f4a..1bea6f3b4558 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
@@ -8,6 +8,9 @@
 // is correctly fired only once, when EPOLLONESHOT is set. send another packet
 // with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and
 // confirm that EPOLLERR is correctly set.
+
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
index 4a73bbf46961..e27c21ff5d18 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
@@ -8,6 +8,8 @@
 // one will have no data in the initial send. On return 0 the
 // zerocopy notification counter is not incremented. Verify this too.
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
 // Send a FastOpen request, no cookie yet so no data in SYN
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
index 36086c5877ce..b1fa77c77dfa 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
@@ -4,6 +4,8 @@
 // send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
 // kernel returns the notification ID.
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh
  ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207`
 
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
index 672f817faca0..2f5317d0a9fa 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
@@ -7,6 +7,8 @@
 //    because each iovec element becomes a frag
 // 3) the PSH bit is set on an skb when it runs out of fragments
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
index a9a1ac0aea4f..9d5272c6b207 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
@@ -4,6 +4,8 @@
 // verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy
 // packets of all sizes, including the smallest payload, 1B.
 
+--send_omit_free	// do not reuse send buffers with zerocopy
+
 `./defaults.sh`
 
     0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 66be7699c72c..a3323c21f001 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -205,7 +205,6 @@
 #	Check that PMTU exceptions are created for both paths.
 
 source lib.sh
-source net_helper.sh
 
 PAUSE_ON_FAIL=no
 VERBOSE=0
@@ -1090,10 +1089,11 @@ cleanup() {
 
 	cleanup_all_ns
 
-	ip link del veth_A-C		2>/dev/null
-	ip link del veth_A-R1		2>/dev/null
-	cleanup_del_ovs_internal
-	cleanup_del_ovs_vswitchd
+	[ -e "/sys/class/net/veth_A-C"  ] && ip link del veth_A-C
+	[ -e "/sys/class/net/veth_A-R1" ] && ip link del veth_A-R1
+	[ -e "/sys/class/net/ovs_br0"   ] && cleanup_del_ovs_internal
+	[ -e "/sys/class/net/ovs_br0"   ] && cleanup_del_ovs_vswitchd
+
 	rm -f "$tmpoutfile"
 }
 
diff --git a/tools/testing/selftests/net/ppp/Makefile b/tools/testing/selftests/net/ppp/Makefile
new file mode 100644
index 000000000000..b39b0abadde6
--- /dev/null
+++ b/tools/testing/selftests/net/ppp/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+TEST_PROGS := \
+	ppp_async.sh \
+	pppoe.sh \
+# end of TEST_PROGS
+
+TEST_FILES := \
+	ppp_common.sh \
+	pppoe-server-options \
+# end of TEST_FILES
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/ppp/config b/tools/testing/selftests/net/ppp/config
new file mode 100644
index 000000000000..b45d25c5b970
--- /dev/null
+++ b/tools/testing/selftests/net/ppp/config
@@ -0,0 +1,9 @@
+CONFIG_IPV6=y
+CONFIG_PACKET=y
+CONFIG_PPP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPPOE=m
+CONFIG_PPPOE_HASH_BITS_4=y
+CONFIG_VETH=y
diff --git a/tools/testing/selftests/net/ppp/ppp_async.sh b/tools/testing/selftests/net/ppp/ppp_async.sh
new file mode 100755
index 000000000000..10f54c8dd0bc
--- /dev/null
+++ b/tools/testing/selftests/net/ppp/ppp_async.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ppp_common.sh
+
+# Temporary files for PTY symlinks
+TTY_DIR=$(mktemp -d /tmp/ppp.XXXXXX)
+TTY_SERVER="$TTY_DIR"/server
+TTY_CLIENT="$TTY_DIR"/client
+
+# shellcheck disable=SC2329
+cleanup() {
+	cleanup_all_ns
+	[ -n "$SOCAT_PID" ] && kill_process "$SOCAT_PID"
+	rm -fr "$TTY_DIR"
+}
+
+trap cleanup EXIT
+
+ppp_common_init
+modprobe -q ppp_async
+
+# Create the virtual serial device
+socat -d PTY,link="$TTY_SERVER",rawer PTY,link="$TTY_CLIENT",rawer &
+SOCAT_PID=$!
+
+# Wait for symlinks to be created
+slowwait 5 [ -L "$TTY_SERVER" ]
+
+# Start the PPP Server
+ip netns exec "$NS_SERVER" pppd "$TTY_SERVER" 115200 \
+	"$IP_SERVER":"$IP_CLIENT" \
+	local noauth nodefaultroute debug
+
+# Start the PPP Client
+ip netns exec "$NS_CLIENT" pppd "$TTY_CLIENT" 115200 \
+	local noauth updetach nodefaultroute debug
+
+ppp_test_connectivity
+
+log_test "PPP async"
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/ppp/ppp_common.sh b/tools/testing/selftests/net/ppp/ppp_common.sh
new file mode 100644
index 000000000000..40bbec317039
--- /dev/null
+++ b/tools/testing/selftests/net/ppp/ppp_common.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# shellcheck disable=SC2153
+
+source ../lib.sh
+
+IP_SERVER="192.168.200.1"
+IP_CLIENT="192.168.200.2"
+
+ppp_common_init() {
+	# Package requirements
+	require_command socat
+	require_command pppd
+	require_command iperf3
+
+	# Check for root privileges
+	if [ "$(id -u)" -ne 0 ];then
+		echo "SKIP: Need root privileges"
+		exit "$ksft_skip"
+	fi
+
+	# Namespaces
+	setup_ns NS_SERVER NS_CLIENT
+}
+
+ppp_check_addr() {
+	dev=$1
+	addr=$2
+	ns=$3
+	ip -netns "$ns" -4 addr show dev "$dev" 2>/dev/null | grep -q "$addr"
+	return $?
+}
+
+ppp_test_connectivity() {
+	slowwait 10 ppp_check_addr "ppp0" "$IP_CLIENT" "$NS_CLIENT"
+
+	ip netns exec "$NS_CLIENT" ping -c 3 "$IP_SERVER"
+	check_err $?
+
+	ip netns exec "$NS_SERVER" iperf3 -s -1 -D
+	wait_local_port_listen "$NS_SERVER" 5201 tcp
+
+	ip netns exec "$NS_CLIENT" iperf3 -c "$IP_SERVER" -Z -t 2
+	check_err $?
+}
diff --git a/tools/testing/selftests/net/ppp/pppoe-server-options b/tools/testing/selftests/net/ppp/pppoe-server-options
new file mode 100644
index 000000000000..66c8c9d319e9
--- /dev/null
+++ b/tools/testing/selftests/net/ppp/pppoe-server-options
@@ -0,0 +1,2 @@
+noauth
+noipdefault
diff --git a/tools/testing/selftests/net/ppp/pppoe.sh b/tools/testing/selftests/net/ppp/pppoe.sh
new file mode 100755
index 000000000000..f67b51df7490
--- /dev/null
+++ b/tools/testing/selftests/net/ppp/pppoe.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ppp_common.sh
+
+VETH_SERVER="veth-server"
+VETH_CLIENT="veth-client"
+PPPOE_LOG=$(mktemp /tmp/pppoe.XXXXXX)
+
+# shellcheck disable=SC2329
+cleanup() {
+	cleanup_all_ns
+	[ -n "$SOCAT_PID" ] && kill_process "$SOCAT_PID"
+	rm -f "$PPPOE_LOG"
+}
+
+trap cleanup EXIT
+
+require_command pppoe-server
+ppp_common_init
+modprobe -q pppoe
+
+# Try to locate pppoe.so plugin
+PPPOE_PLUGIN=$(find /usr/{lib,lib64,lib32}/pppd/ -name pppoe.so -type f -print -quit)
+if [ -z "$PPPOE_PLUGIN" ]; then
+	log_test_skip "PPPoE: pppoe.so plugin not found"
+	exit "$EXIT_STATUS"
+fi
+
+# Create the veth pair
+ip link add "$VETH_SERVER" type veth peer name "$VETH_CLIENT"
+ip link set "$VETH_SERVER" netns "$NS_SERVER"
+ip link set "$VETH_CLIENT" netns "$NS_CLIENT"
+ip -netns "$NS_SERVER" link set "$VETH_SERVER" up
+ip -netns "$NS_CLIENT" link set "$VETH_CLIENT" up
+
+# Start socat as syslog listener
+socat -v -u UNIX-RECV:/dev/log OPEN:/dev/null > "$PPPOE_LOG" 2>&1 &
+SOCAT_PID=$!
+
+# Start the PPP Server. Note that versions before 4.0 ignore -g option and
+# instead use a hardcoded plugin path, so they may fail to find the plugin.
+ip netns exec "$NS_SERVER" pppoe-server -I "$VETH_SERVER" \
+	-L "$IP_SERVER" -R "$IP_CLIENT" -N 1 -q "$(command -v pppd)" \
+	-k -O "$(pwd)/pppoe-server-options" -g "$PPPOE_PLUGIN"
+
+# Start the PPP Client
+ip netns exec "$NS_CLIENT" pppd \
+	local debug updetach noipdefault noauth nodefaultroute \
+	plugin "$PPPOE_PLUGIN" nic-"$VETH_CLIENT"
+
+ppp_test_connectivity
+
+log_test "PPPoE"
+
+# Dump syslog messages if the test failed
+if [ "$RET" -ne 0 ]; then
+	while read -r _sign _date _time len _from _to
+	do      len=${len##*=}
+		read -n "$len" -r LINE
+		echo "$LINE"
+	done < "$PPPOE_LOG"
+fi
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/proc_net_pktgen.c b/tools/testing/selftests/net/proc_net_pktgen.c
new file mode 100644
index 000000000000..fab3b5c2e25d
--- /dev/null
+++ b/tools/testing/selftests/net/proc_net_pktgen.c
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * proc_net_pktgen: kselftest for /proc/net/pktgen interface
+ *
+ * Copyright (c) 2025 Peter Seiderer <ps.report@gmx.net>
+ *
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "kselftest_harness.h"
+
+static const char ctrl_cmd_stop[] = "stop";
+static const char ctrl_cmd_start[] = "start";
+static const char ctrl_cmd_reset[] = "reset";
+
+static const char wrong_ctrl_cmd[] = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789";
+
+static const char thr_cmd_add_loopback_0[] = "add_device lo@0";
+static const char thr_cmd_rm_loopback_0[] = "rem_device_all";
+
+static const char wrong_thr_cmd[] = "forsureawrongcommand";
+static const char legacy_thr_cmd[] = "max_before_softirq";
+
+static const char wrong_dev_cmd[] = "forsurewrongcommand";
+static const char dev_cmd_min_pkt_size_0[] = "min_pkt_size";
+static const char dev_cmd_min_pkt_size_1[] = "min_pkt_size ";
+static const char dev_cmd_min_pkt_size_2[] = "min_pkt_size 0";
+static const char dev_cmd_min_pkt_size_3[] = "min_pkt_size 1";
+static const char dev_cmd_min_pkt_size_4[] = "min_pkt_size 100";
+static const char dev_cmd_min_pkt_size_5[] = "min_pkt_size=1001";
+static const char dev_cmd_min_pkt_size_6[] = "min_pkt_size =2002";
+static const char dev_cmd_min_pkt_size_7[] = "min_pkt_size= 3003";
+static const char dev_cmd_min_pkt_size_8[] = "min_pkt_size = 4004";
+static const char dev_cmd_max_pkt_size_0[] = "max_pkt_size 200";
+static const char dev_cmd_pkt_size_0[] = "pkt_size 300";
+static const char dev_cmd_imix_weights_0[] = "imix_weights 0,7 576,4 1500,1";
+static const char dev_cmd_imix_weights_1[] = "imix_weights 101,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20";
+static const char dev_cmd_imix_weights_2[] = "imix_weights 100,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20 121,21";
+static const char dev_cmd_imix_weights_3[] = "imix_weights";
+static const char dev_cmd_imix_weights_4[] = "imix_weights ";
+static const char dev_cmd_imix_weights_5[] = "imix_weights 0";
+static const char dev_cmd_imix_weights_6[] = "imix_weights 0,";
+static const char dev_cmd_debug_0[] = "debug 1";
+static const char dev_cmd_debug_1[] = "debug 0";
+static const char dev_cmd_frags_0[] = "frags 100";
+static const char dev_cmd_delay_0[] = "delay 100";
+static const char dev_cmd_delay_1[] = "delay 2147483647";
+static const char dev_cmd_rate_0[] = "rate 0";
+static const char dev_cmd_rate_1[] = "rate 100";
+static const char dev_cmd_ratep_0[] = "ratep 0";
+static const char dev_cmd_ratep_1[] = "ratep 200";
+static const char dev_cmd_udp_src_min_0[] = "udp_src_min 1";
+static const char dev_cmd_udp_dst_min_0[] = "udp_dst_min 2";
+static const char dev_cmd_udp_src_max_0[] = "udp_src_max 3";
+static const char dev_cmd_udp_dst_max_0[] = "udp_dst_max 4";
+static const char dev_cmd_clone_skb_0[] = "clone_skb 1";
+static const char dev_cmd_clone_skb_1[] = "clone_skb 0";
+static const char dev_cmd_count_0[] = "count 100";
+static const char dev_cmd_src_mac_count_0[] = "src_mac_count 100";
+static const char dev_cmd_dst_mac_count_0[] = "dst_mac_count 100";
+static const char dev_cmd_burst_0[] = "burst 0";
+static const char dev_cmd_node_0[] = "node 100";
+static const char dev_cmd_xmit_mode_0[] = "xmit_mode start_xmit";
+static const char dev_cmd_xmit_mode_1[] = "xmit_mode netif_receive";
+static const char dev_cmd_xmit_mode_2[] = "xmit_mode queue_xmit";
+static const char dev_cmd_xmit_mode_3[] = "xmit_mode nonsense";
+static const char dev_cmd_flag_0[] = "flag UDPCSUM";
+static const char dev_cmd_flag_1[] = "flag !UDPCSUM";
+static const char dev_cmd_flag_2[] = "flag nonsense";
+static const char dev_cmd_dst_min_0[] = "dst_min 101.102.103.104";
+static const char dev_cmd_dst_0[] = "dst 101.102.103.104";
+static const char dev_cmd_dst_max_0[] = "dst_max 201.202.203.204";
+static const char dev_cmd_dst6_0[] = "dst6 2001:db38:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_dst6_min_0[] = "dst6_min 2001:db8:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_dst6_max_0[] = "dst6_max 2001:db8:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_src6_0[] = "src6 2001:db38:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_src_min_0[] = "src_min 101.102.103.104";
+static const char dev_cmd_src_max_0[] = "src_max 201.202.203.204";
+static const char dev_cmd_dst_mac_0[] = "dst_mac 01:02:03:04:05:06";
+static const char dev_cmd_src_mac_0[] = "src_mac 11:12:13:14:15:16";
+static const char dev_cmd_clear_counters_0[] = "clear_counters";
+static const char dev_cmd_flows_0[] = "flows 100";
+static const char dev_cmd_spi_0[] = "spi 100";
+static const char dev_cmd_flowlen_0[] = "flowlen 100";
+static const char dev_cmd_queue_map_min_0[] = "queue_map_min 1";
+static const char dev_cmd_queue_map_max_0[] = "queue_map_max 2";
+static const char dev_cmd_mpls_0[] = "mpls 00000001";
+static const char dev_cmd_mpls_1[] = "mpls 00000001,000000f2";
+static const char dev_cmd_mpls_2[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f";
+static const char dev_cmd_mpls_3[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f,00000f10";
+static const char dev_cmd_vlan_id_0[] = "vlan_id 1";
+static const char dev_cmd_vlan_p_0[] = "vlan_p 1";
+static const char dev_cmd_vlan_cfi_0[] = "vlan_cfi 1";
+static const char dev_cmd_vlan_id_1[] = "vlan_id 4096";
+static const char dev_cmd_svlan_id_0[] = "svlan_id 1";
+static const char dev_cmd_svlan_p_0[] = "svlan_p 1";
+static const char dev_cmd_svlan_cfi_0[] = "svlan_cfi 1";
+static const char dev_cmd_svlan_id_1[] = "svlan_id 4096";
+static const char dev_cmd_tos_0[] = "tos 0";
+static const char dev_cmd_tos_1[] = "tos 0f";
+static const char dev_cmd_tos_2[] = "tos 0ff";
+static const char dev_cmd_traffic_class_0[] = "traffic_class f0";
+static const char dev_cmd_skb_priority_0[] = "skb_priority 999";
+
+FIXTURE(proc_net_pktgen) {
+	int ctrl_fd;
+	int thr_fd;
+	int dev_fd;
+};
+
+FIXTURE_SETUP(proc_net_pktgen) {
+	int r;
+	ssize_t len;
+
+	r = system("modprobe pktgen");
+	ASSERT_EQ(r, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?");
+
+	self->ctrl_fd = open("/proc/net/pktgen/pgctrl", O_RDWR);
+	ASSERT_GE(self->ctrl_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?");
+
+	self->thr_fd = open("/proc/net/pktgen/kpktgend_0", O_RDWR);
+	ASSERT_GE(self->thr_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?");
+
+	len = write(self->thr_fd, thr_cmd_add_loopback_0, sizeof(thr_cmd_add_loopback_0));
+	ASSERT_EQ(len, sizeof(thr_cmd_add_loopback_0)) TH_LOG("device lo@0 already registered?");
+
+	self->dev_fd = open("/proc/net/pktgen/lo@0", O_RDWR);
+	ASSERT_GE(self->dev_fd, 0) TH_LOG("device entry for lo@0 missing?");
+}
+
+FIXTURE_TEARDOWN(proc_net_pktgen) {
+	int ret;
+	ssize_t len;
+
+	ret = close(self->dev_fd);
+	EXPECT_EQ(ret, 0);
+
+	len = write(self->thr_fd, thr_cmd_rm_loopback_0, sizeof(thr_cmd_rm_loopback_0));
+	EXPECT_EQ(len, sizeof(thr_cmd_rm_loopback_0));
+
+	ret = close(self->thr_fd);
+	EXPECT_EQ(ret, 0);
+
+	ret = close(self->ctrl_fd);
+	EXPECT_EQ(ret, 0);
+}
+
+TEST_F(proc_net_pktgen, wrong_ctrl_cmd) {
+	for (int i = 0; i <= sizeof(wrong_ctrl_cmd); i++) {
+		ssize_t len;
+
+		len = write(self->ctrl_fd, wrong_ctrl_cmd, i);
+		EXPECT_EQ(len, -1);
+		EXPECT_EQ(errno, EINVAL);
+	}
+}
+
+TEST_F(proc_net_pktgen, ctrl_cmd) {
+	ssize_t len;
+
+	len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop));
+	EXPECT_EQ(len,	sizeof(ctrl_cmd_stop));
+
+	len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop) - 1);
+	EXPECT_EQ(len,	sizeof(ctrl_cmd_stop) - 1);
+
+	len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start));
+	EXPECT_EQ(len,	sizeof(ctrl_cmd_start));
+
+	len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start) - 1);
+	EXPECT_EQ(len,	sizeof(ctrl_cmd_start) - 1);
+
+	len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset));
+	EXPECT_EQ(len,	sizeof(ctrl_cmd_reset));
+
+	len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset) - 1);
+	EXPECT_EQ(len,	sizeof(ctrl_cmd_reset) - 1);
+}
+
+TEST_F(proc_net_pktgen, wrong_thr_cmd) {
+	for (int i = 0; i <= sizeof(wrong_thr_cmd); i++) {
+		ssize_t len;
+
+		len = write(self->thr_fd, wrong_thr_cmd, i);
+		EXPECT_EQ(len, -1);
+		EXPECT_EQ(errno, EINVAL);
+	}
+}
+
+TEST_F(proc_net_pktgen, legacy_thr_cmd) {
+	for (int i = 0; i <= sizeof(legacy_thr_cmd); i++) {
+		ssize_t len;
+
+		len = write(self->thr_fd, legacy_thr_cmd, i);
+		if (i < (sizeof(legacy_thr_cmd) - 1)) {
+			/* incomplete command string */
+			EXPECT_EQ(len, -1);
+			EXPECT_EQ(errno, EINVAL);
+		} else {
+			/* complete command string without/with trailing '\0' */
+			EXPECT_EQ(len, i);
+		}
+	}
+}
+
+TEST_F(proc_net_pktgen, wrong_dev_cmd) {
+	for (int i = 0; i <= sizeof(wrong_dev_cmd); i++) {
+		ssize_t len;
+
+		len = write(self->dev_fd, wrong_dev_cmd, i);
+		EXPECT_EQ(len, -1);
+		EXPECT_EQ(errno, EINVAL);
+	}
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_min_pkt_size) {
+	ssize_t len;
+
+	/* with trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0));
+
+	/* without trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0) - 1);
+	EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0) - 1);
+
+	/* with trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1));
+	EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1));
+
+	/* without trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1) - 1);
+	EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1) - 1);
+
+	/* with trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2));
+	EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2));
+
+	/* without trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2) - 1);
+	EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2) - 1);
+
+	len = write(self->dev_fd, dev_cmd_min_pkt_size_3, sizeof(dev_cmd_min_pkt_size_3));
+	EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_3));
+
+	len = write(self->dev_fd, dev_cmd_min_pkt_size_4, sizeof(dev_cmd_min_pkt_size_4));
+	EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_4));
+
+	len = write(self->dev_fd, dev_cmd_min_pkt_size_5, sizeof(dev_cmd_min_pkt_size_5));
+	EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_5));
+
+	len = write(self->dev_fd, dev_cmd_min_pkt_size_6, sizeof(dev_cmd_min_pkt_size_6));
+	EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_6));
+
+	len = write(self->dev_fd, dev_cmd_min_pkt_size_7, sizeof(dev_cmd_min_pkt_size_7));
+	EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_7));
+
+	len = write(self->dev_fd, dev_cmd_min_pkt_size_8, sizeof(dev_cmd_min_pkt_size_8));
+	EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_8));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_max_pkt_size) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_max_pkt_size_0, sizeof(dev_cmd_max_pkt_size_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_max_pkt_size_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_pkt_size) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_pkt_size_0, sizeof(dev_cmd_pkt_size_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_pkt_size_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_imix_weights) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_imix_weights_0, sizeof(dev_cmd_imix_weights_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_0));
+
+	len = write(self->dev_fd, dev_cmd_imix_weights_1, sizeof(dev_cmd_imix_weights_1));
+	EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_1));
+
+	len = write(self->dev_fd, dev_cmd_imix_weights_2, sizeof(dev_cmd_imix_weights_2));
+	EXPECT_EQ(len, -1);
+	EXPECT_EQ(errno, E2BIG);
+
+	/* with trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3));
+	EXPECT_EQ(len, -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	/* without trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3) - 1);
+	EXPECT_EQ(len, -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	/* with trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4));
+	EXPECT_EQ(len, -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	/* without trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4) - 1);
+	EXPECT_EQ(len, -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	/* with trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5));
+	EXPECT_EQ(len, -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	/* without trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5) - 1);
+	EXPECT_EQ(len, -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	/* with trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6));
+	EXPECT_EQ(len, -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	/* without trailing '\0' */
+	len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6) - 1);
+	EXPECT_EQ(len, -1);
+	EXPECT_EQ(errno, EINVAL);
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_debug) {
+	ssize_t len;
+
+	/* debug on */
+	len = write(self->dev_fd, dev_cmd_debug_0, sizeof(dev_cmd_debug_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_debug_0));
+
+	/* debug off */
+	len = write(self->dev_fd, dev_cmd_debug_1, sizeof(dev_cmd_debug_1));
+	EXPECT_EQ(len, sizeof(dev_cmd_debug_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_frags) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_frags_0, sizeof(dev_cmd_frags_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_frags_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_delay) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_delay_0, sizeof(dev_cmd_delay_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_delay_0));
+
+	len = write(self->dev_fd, dev_cmd_delay_1, sizeof(dev_cmd_delay_1));
+	EXPECT_EQ(len, sizeof(dev_cmd_delay_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_rate) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_rate_0, sizeof(dev_cmd_rate_0));
+	EXPECT_EQ(len, -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	len = write(self->dev_fd, dev_cmd_rate_1, sizeof(dev_cmd_rate_1));
+	EXPECT_EQ(len, sizeof(dev_cmd_rate_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_ratep) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_ratep_0, sizeof(dev_cmd_ratep_0));
+	EXPECT_EQ(len, -1);
+	EXPECT_EQ(errno, EINVAL);
+
+	len = write(self->dev_fd, dev_cmd_ratep_1, sizeof(dev_cmd_ratep_1));
+	EXPECT_EQ(len, sizeof(dev_cmd_ratep_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_src_min) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_udp_src_min_0, sizeof(dev_cmd_udp_src_min_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_udp_src_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_dst_min) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_udp_dst_min_0, sizeof(dev_cmd_udp_dst_min_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_src_max) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_udp_src_max_0, sizeof(dev_cmd_udp_src_max_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_udp_src_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_dst_max) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_udp_dst_max_0, sizeof(dev_cmd_udp_dst_max_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_clone_skb) {
+	ssize_t len;
+
+	/* clone_skb on (gives EOPNOTSUPP on lo device) */
+	len = write(self->dev_fd, dev_cmd_clone_skb_0, sizeof(dev_cmd_clone_skb_0));
+	EXPECT_EQ(len, -1);
+	EXPECT_EQ(errno, EOPNOTSUPP);
+
+	/* clone_skb off */
+	len = write(self->dev_fd, dev_cmd_clone_skb_1, sizeof(dev_cmd_clone_skb_1));
+	EXPECT_EQ(len, sizeof(dev_cmd_clone_skb_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_count) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_count_0, sizeof(dev_cmd_count_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_count_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_mac_count) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_src_mac_count_0, sizeof(dev_cmd_src_mac_count_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_src_mac_count_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_mac_count) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_dst_mac_count_0, sizeof(dev_cmd_dst_mac_count_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_count_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_burst) {
+	ssize_t len;
+
+	/* burst off */
+	len = write(self->dev_fd, dev_cmd_burst_0, sizeof(dev_cmd_burst_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_burst_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_node) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_node_0, sizeof(dev_cmd_node_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_node_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_xmit_mode) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_xmit_mode_0, sizeof(dev_cmd_xmit_mode_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_0));
+
+	len = write(self->dev_fd, dev_cmd_xmit_mode_1, sizeof(dev_cmd_xmit_mode_1));
+	EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_1));
+
+	len = write(self->dev_fd, dev_cmd_xmit_mode_2, sizeof(dev_cmd_xmit_mode_2));
+	EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_2));
+
+	len = write(self->dev_fd, dev_cmd_xmit_mode_3, sizeof(dev_cmd_xmit_mode_3));
+	EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_3));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_flag) {
+	ssize_t len;
+
+	/* flag UDPCSUM on */
+	len = write(self->dev_fd, dev_cmd_flag_0, sizeof(dev_cmd_flag_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_flag_0));
+
+	/* flag UDPCSUM off */
+	len = write(self->dev_fd, dev_cmd_flag_1, sizeof(dev_cmd_flag_1));
+	EXPECT_EQ(len, sizeof(dev_cmd_flag_1));
+
+	/* flag invalid */
+	len = write(self->dev_fd, dev_cmd_flag_2, sizeof(dev_cmd_flag_2));
+	EXPECT_EQ(len, sizeof(dev_cmd_flag_2));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_min) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_dst_min_0, sizeof(dev_cmd_dst_min_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_dst_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_dst_0, sizeof(dev_cmd_dst_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_dst_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_max) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_dst_max_0, sizeof(dev_cmd_dst_max_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_dst_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst6) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_dst6_0, sizeof(dev_cmd_dst6_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_dst6_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst6_min) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_dst6_min_0, sizeof(dev_cmd_dst6_min_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_dst6_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst6_max) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_dst6_max_0, sizeof(dev_cmd_dst6_max_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_dst6_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src6) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_src6_0, sizeof(dev_cmd_src6_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_src6_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_min) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_src_min_0, sizeof(dev_cmd_src_min_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_src_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_max) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_src_max_0, sizeof(dev_cmd_src_max_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_src_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_mac) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_dst_mac_0, sizeof(dev_cmd_dst_mac_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_mac) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_src_mac_0, sizeof(dev_cmd_src_mac_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_src_mac_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_clear_counters) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_clear_counters_0, sizeof(dev_cmd_clear_counters_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_clear_counters_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_flows) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_flows_0, sizeof(dev_cmd_flows_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_flows_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_spi) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_spi_0, sizeof(dev_cmd_spi_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_spi_0)) TH_LOG("CONFIG_XFRM not enabled?");
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_flowlen) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_flowlen_0, sizeof(dev_cmd_flowlen_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_flowlen_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_queue_map_min) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_queue_map_min_0, sizeof(dev_cmd_queue_map_min_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_queue_map_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_queue_map_max) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_queue_map_max_0, sizeof(dev_cmd_queue_map_max_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_queue_map_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_mpls) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_mpls_0, sizeof(dev_cmd_mpls_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_mpls_0));
+
+	len = write(self->dev_fd, dev_cmd_mpls_1, sizeof(dev_cmd_mpls_1));
+	EXPECT_EQ(len, sizeof(dev_cmd_mpls_1));
+
+	len = write(self->dev_fd, dev_cmd_mpls_2, sizeof(dev_cmd_mpls_2));
+	EXPECT_EQ(len, sizeof(dev_cmd_mpls_2));
+
+	len = write(self->dev_fd, dev_cmd_mpls_3, sizeof(dev_cmd_mpls_3));
+	EXPECT_EQ(len, -1);
+	EXPECT_EQ(errno, E2BIG);
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_vlan_id) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_vlan_id_0, sizeof(dev_cmd_vlan_id_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_0));
+
+	len = write(self->dev_fd, dev_cmd_vlan_p_0, sizeof(dev_cmd_vlan_p_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_vlan_p_0));
+
+	len = write(self->dev_fd, dev_cmd_vlan_cfi_0, sizeof(dev_cmd_vlan_cfi_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_vlan_cfi_0));
+
+	len = write(self->dev_fd, dev_cmd_vlan_id_1, sizeof(dev_cmd_vlan_id_1));
+	EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_svlan_id) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_svlan_id_0, sizeof(dev_cmd_svlan_id_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_0));
+
+	len = write(self->dev_fd, dev_cmd_svlan_p_0, sizeof(dev_cmd_svlan_p_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_svlan_p_0));
+
+	len = write(self->dev_fd, dev_cmd_svlan_cfi_0, sizeof(dev_cmd_svlan_cfi_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_svlan_cfi_0));
+
+	len = write(self->dev_fd, dev_cmd_svlan_id_1, sizeof(dev_cmd_svlan_id_1));
+	EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_1));
+}
+
+
+TEST_F(proc_net_pktgen, dev_cmd_tos) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_tos_0, sizeof(dev_cmd_tos_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_tos_0));
+
+	len = write(self->dev_fd, dev_cmd_tos_1, sizeof(dev_cmd_tos_1));
+	EXPECT_EQ(len, sizeof(dev_cmd_tos_1));
+
+	len = write(self->dev_fd, dev_cmd_tos_2, sizeof(dev_cmd_tos_2));
+	EXPECT_EQ(len, sizeof(dev_cmd_tos_2));
+}
+
+
+TEST_F(proc_net_pktgen, dev_cmd_traffic_class) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_traffic_class_0, sizeof(dev_cmd_traffic_class_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_traffic_class_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_skb_priority) {
+	ssize_t len;
+
+	len = write(self->dev_fd, dev_cmd_skb_priority_0, sizeof(dev_cmd_skb_priority_0));
+	EXPECT_EQ(len, sizeof(dev_cmd_skb_priority_0));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 84c524357075..ab8d8b7e6cb0 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -54,7 +54,7 @@
 #include <unistd.h>
 
 #include "psock_lib.h"
-#include "../kselftest.h"
+#include "kselftest.h"
 
 #define RING_NUM_FRAMES			20
 
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
index 6e4fef560873..067265b0a554 100644
--- a/tools/testing/selftests/net/psock_lib.h
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -22,10 +22,6 @@
 
 #define PORT_BASE			8000
 
-#ifndef __maybe_unused
-# define __maybe_unused		__attribute__ ((__unused__))
-#endif
-
 static __maybe_unused void pair_udp_setfilter(int fd)
 {
 	/* the filter below checks for all of the following conditions that
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 404a2ce759ab..7caf3135448d 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -12,7 +12,7 @@
  *
  * Datapath:
  *   Open a pair of packet sockets and send resp. receive an a priori known
- *   packet pattern accross the sockets and check if it was received resp.
+ *   packet pattern across the sockets and check if it was received resp.
  *   sent correctly. Fanout in combination with RX_RING is currently not
  *   tested here.
  *
@@ -22,6 +22,7 @@
  *   - TPACKET_V3: RX_RING
  */
 
+#undef NDEBUG
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/types.h>
@@ -33,7 +34,6 @@
 #include <ctype.h>
 #include <fcntl.h>
 #include <unistd.h>
-#include <bits/wordsize.h>
 #include <net/ethernet.h>
 #include <netinet/ip.h>
 #include <arpa/inet.h>
@@ -46,7 +46,7 @@
 
 #include "psock_lib.h"
 
-#include "../kselftest.h"
+#include "kselftest.h"
 
 #ifndef bug_on
 # define bug_on(cond)		assert(!(cond))
@@ -785,7 +785,7 @@ static int test_kernel_bit_width(void)
 
 static int test_user_bit_width(void)
 {
-	return __WORDSIZE;
+	return sizeof(long) * 8;
 }
 
 static const char *tpacket_str[] = {
diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile
index 612a7219990e..fe363be8e358 100644
--- a/tools/testing/selftests/net/rds/Makefile
+++ b/tools/testing/selftests/net/rds/Makefile
@@ -5,8 +5,15 @@ all:
 
 TEST_PROGS := run.sh
 
-TEST_FILES := include.sh test.py
+TEST_FILES := \
+	include.sh \
+	settings \
+	test.py \
+# end of TEST_FILES
 
-EXTRA_CLEAN := /tmp/rds_logs include.sh
+EXTRA_CLEAN := \
+	include.sh \
+	/tmp/rds_logs \
+# end of EXTRA_CLEAN
 
 include ../../lib.mk
diff --git a/tools/testing/selftests/net/rds/README.txt b/tools/testing/selftests/net/rds/README.txt
index cbde2951ab13..c6fe003d503b 100644
--- a/tools/testing/selftests/net/rds/README.txt
+++ b/tools/testing/selftests/net/rds/README.txt
@@ -31,8 +31,11 @@ EXAMPLE:
     # Alternatly create a gcov disabled .config
     tools/testing/selftests/net/rds/config.sh
 
+    # Config paths may also be specified with the -c flag
+    tools/testing/selftests/net/rds/config.sh -c .config.local
+
     # build the kernel
-    vng --build  --config tools/testing/selftests/net/config
+    vng --build --config .config
 
     # launch the tests in a VM
     vng -v --rwdir ./ --run . --user root --cpus 4 -- \
diff --git a/tools/testing/selftests/net/rds/config b/tools/testing/selftests/net/rds/config
new file mode 100644
index 000000000000..97db7ecb892a
--- /dev/null
+++ b/tools/testing/selftests/net/rds/config
@@ -0,0 +1,5 @@
+CONFIG_NET_NS=y
+CONFIG_NET_SCH_NETEM=y
+CONFIG_RDS=y
+CONFIG_RDS_TCP=y
+CONFIG_VETH=y
diff --git a/tools/testing/selftests/net/rds/config.sh b/tools/testing/selftests/net/rds/config.sh
index 791c8dbe1095..29a79314dd60 100755
--- a/tools/testing/selftests/net/rds/config.sh
+++ b/tools/testing/selftests/net/rds/config.sh
@@ -6,15 +6,20 @@ set -u
 set -x
 
 unset KBUILD_OUTPUT
+CONF_FILE=""
+FLAGS=()
 
 GENERATE_GCOV_REPORT=0
-while getopts "g" opt; do
+while getopts "gc:" opt; do
   case ${opt} in
     g)
       GENERATE_GCOV_REPORT=1
       ;;
+    c)
+      CONF_FILE=$OPTARG
+      ;;
     :)
-      echo "USAGE: config.sh [-g]"
+      echo "USAGE: config.sh [-g] [-c config]"
       exit 1
       ;;
     ?)
@@ -24,30 +29,32 @@ while getopts "g" opt; do
   esac
 done
 
-CONF_FILE="tools/testing/selftests/net/config"
+if [[ "$CONF_FILE" != "" ]]; then
+	FLAGS=(--file "$CONF_FILE")
+fi
 
 # no modules
-scripts/config --file "$CONF_FILE" --disable CONFIG_MODULES
+scripts/config "${FLAGS[@]}" --disable CONFIG_MODULES
 
 # enable RDS
-scripts/config --file "$CONF_FILE" --enable CONFIG_RDS
-scripts/config --file "$CONF_FILE" --enable CONFIG_RDS_TCP
+scripts/config "${FLAGS[@]}" --enable CONFIG_RDS
+scripts/config "${FLAGS[@]}" --enable CONFIG_RDS_TCP
 
 if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then
 	# instrument RDS and only RDS
-	scripts/config --file "$CONF_FILE" --enable CONFIG_GCOV_KERNEL
-	scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL
-	scripts/config --file "$CONF_FILE" --enable GCOV_PROFILE_RDS
+	scripts/config "${FLAGS[@]}" --enable CONFIG_GCOV_KERNEL
+	scripts/config "${FLAGS[@]}" --disable GCOV_PROFILE_ALL
+	scripts/config "${FLAGS[@]}" --enable GCOV_PROFILE_RDS
 else
-	scripts/config --file "$CONF_FILE" --disable CONFIG_GCOV_KERNEL
-	scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL
-	scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_RDS
+	scripts/config "${FLAGS[@]}" --disable CONFIG_GCOV_KERNEL
+	scripts/config "${FLAGS[@]}" --disable GCOV_PROFILE_ALL
+	scripts/config "${FLAGS[@]}" --disable GCOV_PROFILE_RDS
 fi
 
 # need network namespaces to run tests with veth network interfaces
-scripts/config --file "$CONF_FILE" --enable CONFIG_NET_NS
-scripts/config --file "$CONF_FILE" --enable CONFIG_VETH
+scripts/config "${FLAGS[@]}" --enable CONFIG_NET_NS
+scripts/config "${FLAGS[@]}" --enable CONFIG_VETH
 
 # simulate packet loss
-scripts/config --file "$CONF_FILE" --enable CONFIG_NET_SCH_NETEM
+scripts/config "${FLAGS[@]}" --enable CONFIG_NET_SCH_NETEM
 
diff --git a/tools/testing/selftests/net/rds/run.sh b/tools/testing/selftests/net/rds/run.sh
index 8aee244f582a..897d17d1b8db 100755
--- a/tools/testing/selftests/net/rds/run.sh
+++ b/tools/testing/selftests/net/rds/run.sh
@@ -19,6 +19,9 @@ if test -f "$build_include"; then
 	build_dir="$mk_build_dir"
 fi
 
+# Source settings for timeout value (also used by ksft runner)
+source "$current_dir"/settings
+
 # This test requires kernel source and the *.gcda data therein
 # Locate the top level of the kernel source, and the net/rds
 # subfolder with the appropriate *.gcno object files
@@ -194,8 +197,8 @@ set +e
 echo running RDS tests...
 echo Traces will be logged to "$TRACE_FILE"
 rm -f "$TRACE_FILE"
-strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" --timeout 400 -d "$LOG_DIR" \
-       -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP"
+strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" \
+	--timeout "$timeout" -d "$LOG_DIR" -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP"
 
 test_rc=$?
 dmesg > "${LOG_DIR}/dmesg.out"
diff --git a/tools/testing/selftests/net/rds/settings b/tools/testing/selftests/net/rds/settings
new file mode 100644
index 000000000000..d2009a64589c
--- /dev/null
+++ b/tools/testing/selftests/net/rds/settings
@@ -0,0 +1 @@
+timeout=400
diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py
index 4a7178d11193..93e23e8b256c 100755
--- a/tools/testing/selftests/net/rds/test.py
+++ b/tools/testing/selftests/net/rds/test.py
@@ -11,9 +11,8 @@ import signal
 import socket
 import subprocess
 import sys
-import atexit
-from pwd import getpwuid
-from os import stat
+import tempfile
+import shutil
 
 # Allow utils module to be imported from different directory
 this_dir = os.path.dirname(os.path.realpath(__file__))
@@ -23,45 +22,54 @@ from lib.py.utils import ip
 libc = ctypes.cdll.LoadLibrary('libc.so.6')
 setns = libc.setns
 
-net0 = 'net0'
-net1 = 'net1'
+NET0 = 'net0'
+NET1 = 'net1'
 
-veth0 = 'veth0'
-veth1 = 'veth1'
+VETH0 = 'veth0'
+VETH1 = 'veth1'
 
 # Helper function for creating a socket inside a network namespace.
 # We need this because otherwise RDS will detect that the two TCP
 # sockets are on the same interface and use the loop transport instead
 # of the TCP transport.
-def netns_socket(netns, *args):
+def netns_socket(netns, *sock_args):
+    """
+    Creates sockets inside of network namespace
+
+    :param netns: the name of the network namespace
+    :param sock_args: socket family and type
+    """
     u0, u1 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET)
 
     child = os.fork()
     if child == 0:
         # change network namespace
-        with open(f'/var/run/netns/{netns}') as f:
+        with open(f'/var/run/netns/{netns}', encoding='utf-8') as f:
             try:
-                ret = setns(f.fileno(), 0)
+                setns(f.fileno(), 0)
             except IOError as e:
                 print(e.errno)
                 print(e)
 
         # create socket in target namespace
-        s = socket.socket(*args)
+        sock = socket.socket(*sock_args)
 
         # send resulting socket to parent
-        socket.send_fds(u0, [], [s.fileno()])
+        socket.send_fds(u0, [], [sock.fileno()])
 
         sys.exit(0)
 
     # receive socket from child
-    _, s, _, _ = socket.recv_fds(u1, 0, 1)
+    _, fds, _, _ = socket.recv_fds(u1, 0, 1)
     os.waitpid(child, 0)
     u0.close()
     u1.close()
-    return socket.fromfd(s[0], *args)
+    return socket.fromfd(fds[0], *sock_args)
 
-def signal_handler(sig, frame):
+def signal_handler(_sig, _frame):
+    """
+    Test timed out signal handler
+    """
     print('Test timed out')
     sys.exit(1)
 
@@ -81,13 +89,13 @@ parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication",
                     type=int, default=0)
 args = parser.parse_args()
 logdir=args.logdir
-packet_loss=str(args.loss)+'%'
-packet_corruption=str(args.corruption)+'%'
-packet_duplicate=str(args.duplicate)+'%'
+PACKET_LOSS=str(args.loss)+'%'
+PACKET_CORRUPTION=str(args.corruption)+'%'
+PACKET_DUPLICATE=str(args.duplicate)+'%'
 
-ip(f"netns add {net0}")
-ip(f"netns add {net1}")
-ip(f"link add type veth")
+ip(f"netns add {NET0}")
+ip(f"netns add {NET1}")
+ip("link add type veth")
 
 addrs = [
     # we technically don't need different port numbers, but this will
@@ -99,38 +107,38 @@ addrs = [
 # move interfaces to separate namespaces so they can no longer be
 # bound directly; this prevents rds from switching over from the tcp
 # transport to the loop transport.
-ip(f"link set {veth0} netns {net0} up")
-ip(f"link set {veth1} netns {net1} up")
+ip(f"link set {VETH0} netns {NET0} up")
+ip(f"link set {VETH1} netns {NET1} up")
 
 
 
 # add addresses
-ip(f"-n {net0} addr add {addrs[0][0]}/32 dev {veth0}")
-ip(f"-n {net1} addr add {addrs[1][0]}/32 dev {veth1}")
+ip(f"-n {NET0} addr add {addrs[0][0]}/32 dev {VETH0}")
+ip(f"-n {NET1} addr add {addrs[1][0]}/32 dev {VETH1}")
 
 # add routes
-ip(f"-n {net0} route add {addrs[1][0]}/32 dev {veth0}")
-ip(f"-n {net1} route add {addrs[0][0]}/32 dev {veth1}")
+ip(f"-n {NET0} route add {addrs[1][0]}/32 dev {VETH0}")
+ip(f"-n {NET1} route add {addrs[0][0]}/32 dev {VETH1}")
 
 # sanity check that our two interfaces/addresses are correctly set up
 # and communicating by doing a single ping
-ip(f"netns exec {net0} ping -c 1 {addrs[1][0]}")
+ip(f"netns exec {NET0} ping -c 1 {addrs[1][0]}")
 
 # Start a packet capture on each network
-for net in [net0, net1]:
-    tcpdump_pid = os.fork()
-    if tcpdump_pid == 0:
-        pcap = logdir+'/'+net+'.pcap'
-        subprocess.check_call(['touch', pcap])
-        user = getpwuid(stat(pcap).st_uid).pw_name
-        ip(f"netns exec {net} /usr/sbin/tcpdump -Z {user} -i any -w {pcap}")
-        sys.exit(0)
+tcpdump_procs = []
+for net in [NET0, NET1]:
+    pcap = logdir+'/'+net+'.pcap'
+    fd, pcap_tmp = tempfile.mkstemp(suffix=".pcap", prefix=f"{net}-", dir="/tmp")
+    p = subprocess.Popen(
+        ['ip', 'netns', 'exec', net,
+         '/usr/sbin/tcpdump', '-i', 'any', '-w', pcap_tmp])
+    tcpdump_procs.append((p, pcap_tmp, pcap, fd))
 
 # simulate packet loss, duplication and corruption
-for net, iface in [(net0, veth0), (net1, veth1)]:
+for net, iface in [(NET0, VETH0), (NET1, VETH1)]:
     ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem  \
-         corrupt {packet_corruption} loss {packet_loss} duplicate  \
-         {packet_duplicate}")
+         corrupt {PACKET_CORRUPTION} loss {PACKET_LOSS} duplicate  \
+         {PACKET_DUPLICATE}")
 
 # add a timeout
 if args.timeout > 0:
@@ -138,8 +146,8 @@ if args.timeout > 0:
     signal.signal(signal.SIGALRM, signal_handler)
 
 sockets = [
-    netns_socket(net0, socket.AF_RDS, socket.SOCK_SEQPACKET),
-    netns_socket(net1, socket.AF_RDS, socket.SOCK_SEQPACKET),
+    netns_socket(NET0, socket.AF_RDS, socket.SOCK_SEQPACKET),
+    netns_socket(NET1, socket.AF_RDS, socket.SOCK_SEQPACKET),
 ]
 
 for s, addr in zip(sockets, addrs):
@@ -150,9 +158,7 @@ fileno_to_socket = {
     s.fileno(): s for s in sockets
 }
 
-addr_to_socket = {
-    addr: s for addr, s in zip(addrs, sockets)
-}
+addr_to_socket = dict(zip(addrs, sockets))
 
 socket_to_addr = {
     s: addr for addr, s in zip(addrs, sockets)
@@ -166,14 +172,14 @@ ep = select.epoll()
 for s in sockets:
     ep.register(s, select.EPOLLRDNORM)
 
-n = 50000
+NUM_PACKETS = 50000
 nr_send = 0
 nr_recv = 0
 
-while nr_send < n:
+while nr_send < NUM_PACKETS:
     # Send as much as we can without blocking
     print("sending...", nr_send, nr_recv)
-    while nr_send < n:
+    while nr_send < NUM_PACKETS:
         send_data = hashlib.sha256(
             f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8')
 
@@ -212,7 +218,7 @@ while nr_send < n:
                         break
 
     # exercise net/rds/tcp.c:rds_tcp_sysctl_reset()
-    for net in [net0, net1]:
+    for net in [NET0, NET1]:
         ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000")
         ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000")
 
@@ -242,7 +248,11 @@ for s in sockets:
 print(f"getsockopt(): {nr_success}/{nr_error}")
 
 print("Stopping network packet captures")
-subprocess.check_call(['killall', '-q', 'tcpdump'])
+for p, pcap_tmp, pcap, fd in tcpdump_procs:
+    p.terminate()
+    p.wait()
+    os.close(fd)
+    shutil.move(pcap_tmp, pcap)
 
 # We're done sending and receiving stuff, now let's check if what
 # we received is what we sent.
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
index 066efd30e294..5aad27a0d13a 100644
--- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -22,7 +22,7 @@
 #include <sys/socket.h>
 #include <sys/types.h>
 #include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 struct reuse_opts {
 	int reuseaddr[2];
@@ -112,7 +112,7 @@ TEST(reuseaddr_ports_exhausted_reusable_same_euid)
 		ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
 
 		if (opts->reuseport[0] && opts->reuseport[1]) {
-			EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened.");
+			EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets successfully listened.");
 		} else {
 			EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations.");
 		}
diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c
index b8475cb29be7..1c43401a1c80 100644
--- a/tools/testing/selftests/net/reuseport_addr_any.c
+++ b/tools/testing/selftests/net/reuseport_addr_any.c
@@ -9,7 +9,6 @@
 #include <arpa/inet.h>
 #include <errno.h>
 #include <error.h>
-#include <linux/dccp.h>
 #include <linux/in.h>
 #include <linux/unistd.h>
 #include <stdbool.h>
@@ -21,10 +20,6 @@
 #include <sys/socket.h>
 #include <unistd.h>
 
-#ifndef SOL_DCCP
-#define SOL_DCCP 269
-#endif
-
 static const char *IP4_ADDR = "127.0.0.1";
 static const char *IP6_ADDR = "::1";
 static const char *IP4_MAPPED6 = "::ffff:127.0.0.1";
@@ -86,15 +81,6 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count,
 
 		if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
 			error(1, errno, "tcp: failed to listen on receive port");
-		else if (proto == SOCK_DCCP) {
-			if (setsockopt(rcv_fds[i], SOL_DCCP,
-					DCCP_SOCKOPT_SERVICE,
-					&(int) {htonl(42)}, sizeof(int)))
-				error(1, errno, "failed to setsockopt");
-
-			if (listen(rcv_fds[i], 10))
-				error(1, errno, "dccp: failed to listen on receive port");
-		}
 	}
 }
 
@@ -148,11 +134,6 @@ static int connect_and_send(int family, int proto)
 	if (fd < 0)
 		error(1, errno, "failed to create send socket");
 
-	if (proto == SOCK_DCCP &&
-		setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE,
-				&(int){htonl(42)}, sizeof(int)))
-		error(1, errno, "failed to setsockopt");
-
 	if (bind(fd, saddr, sz))
 		error(1, errno, "failed to bind send socket");
 
@@ -175,7 +156,7 @@ static int receive_once(int epfd, int proto)
 	if (i < 0)
 		error(1, errno, "epoll_wait failed");
 
-	if (proto == SOCK_STREAM || proto == SOCK_DCCP) {
+	if (proto == SOCK_STREAM) {
 		fd = accept(ev.data.fd, NULL, NULL);
 		if (fd < 0)
 			error(1, errno, "failed to accept");
@@ -243,20 +224,6 @@ static void run_one_test(int fam_send, int fam_rcv, int proto,
 
 static void test_proto(int proto, const char *proto_str)
 {
-	if (proto == SOCK_DCCP) {
-		int test_fd;
-
-		test_fd = socket(AF_INET, proto, 0);
-		if (test_fd < 0) {
-			if (errno == ESOCKTNOSUPPORT) {
-				fprintf(stderr, "DCCP not supported: skipping DCCP tests\n");
-				return;
-			} else
-				error(1, errno, "failed to create a DCCP socket");
-		}
-		close(test_fd);
-	}
-
 	fprintf(stderr, "%s IPv4 ... ", proto_str);
 	run_one_test(AF_INET, AF_INET, proto, IP4_ADDR);
 
@@ -271,7 +238,6 @@ int main(void)
 {
 	test_proto(SOCK_DGRAM, "UDP");
 	test_proto(SOCK_STREAM, "TCP");
-	test_proto(SOCK_DCCP, "DCCP");
 
 	fprintf(stderr, "SUCCESS\n");
 	return 0;
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index 65aea27d761c..12e48b97b862 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -23,8 +23,9 @@
 #include <sys/socket.h>
 #include <sys/resource.h>
 #include <unistd.h>
+#include <sched.h>
 
-#include "../kselftest.h"
+#include "kselftest.h"
 
 struct test_params {
 	int recv_family;
@@ -455,8 +456,18 @@ static __attribute__((destructor)) void main_dtor(void)
 	setrlimit(RLIMIT_MEMLOCK, &rlim_old);
 }
 
+static void setup_netns(void)
+{
+	if (unshare(CLONE_NEWNET))
+		error(1, errno, "failed to unshare netns");
+	if (system("ip link set lo up"))
+		error(1, 0, "failed to bring up lo interface in netns");
+}
+
 int main(void)
 {
+	setup_netns();
+
 	fprintf(stderr, "---- IPv4 UDP ----\n");
 	/* NOTE: UDP socket lookups traverse a different code path when there
 	 * are > 10 sockets in a group.  Run the bpf test through both paths.
diff --git a/tools/testing/selftests/net/reuseport_bpf_cpu.c b/tools/testing/selftests/net/reuseport_bpf_cpu.c
index 2d646174729f..ddfe92f6597a 100644
--- a/tools/testing/selftests/net/reuseport_bpf_cpu.c
+++ b/tools/testing/selftests/net/reuseport_bpf_cpu.c
@@ -228,10 +228,20 @@ static void test(int *rcv_fd, int len, int family, int proto)
 		close(rcv_fd[cpu]);
 }
 
+static void setup_netns(void)
+{
+	if (unshare(CLONE_NEWNET))
+		error(1, errno, "failed to unshare netns");
+	if (system("ip link set lo up"))
+		error(1, 0, "failed to bring up lo interface in netns");
+}
+
 int main(void)
 {
 	int *rcv_fd, cpus;
 
+	setup_netns();
+
 	cpus = sysconf(_SC_NPROCESSORS_ONLN);
 	if (cpus <= 0)
 		error(1, errno, "failed counting cpus");
diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
index c9ba36aa688e..8ec52fc5ef41 100644
--- a/tools/testing/selftests/net/reuseport_bpf_numa.c
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -23,7 +23,7 @@
 #include <unistd.h>
 #include <numa.h>
 
-#include "../kselftest.h"
+#include "kselftest.h"
 
 static const int PORT = 8888;
 
@@ -230,10 +230,20 @@ static void test(int *rcv_fd, int len, int family, int proto)
 		close(rcv_fd[node]);
 }
 
+static void setup_netns(void)
+{
+	if (unshare(CLONE_NEWNET))
+		error(1, errno, "failed to unshare netns");
+	if (system("ip link set lo up"))
+		error(1, 0, "failed to bring up lo interface in netns");
+}
+
 int main(void)
 {
 	int *rcv_fd, nodes;
 
+	setup_netns();
+
 	if (numa_available() < 0)
 		ksft_exit_skip("no numa api support\n");
 
diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c
index fb7a59ed759e..0eaf739d0c85 100644
--- a/tools/testing/selftests/net/reuseport_dualstack.c
+++ b/tools/testing/selftests/net/reuseport_dualstack.c
@@ -25,6 +25,7 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <unistd.h>
+#include <sched.h>
 
 static const int PORT = 8888;
 
@@ -156,10 +157,20 @@ static void test(int *rcv_fds, int count, int proto)
 	close(epfd);
 }
 
+static void setup_netns(void)
+{
+	if (unshare(CLONE_NEWNET))
+		error(1, errno, "failed to unshare netns");
+	if (system("ip link set lo up"))
+		error(1, 0, "failed to bring up lo interface in netns");
+}
+
 int main(void)
 {
 	int rcv_fds[32], i;
 
+	setup_netns();
+
 	fprintf(stderr, "---- UDP IPv4 created before IPv6 ----\n");
 	build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 5);
 	build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[5]), 5);
diff --git a/tools/testing/selftests/net/route_hint.sh b/tools/testing/selftests/net/route_hint.sh
new file mode 100755
index 000000000000..2db01ece0cc1
--- /dev/null
+++ b/tools/testing/selftests/net/route_hint.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test ensures directed broadcast routes use dst hint mechanism
+
+source lib.sh
+
+CLIENT_IP4="192.168.0.1"
+SERVER_IP4="192.168.0.2"
+BROADCAST_ADDRESS="192.168.0.255"
+
+setup() {
+	setup_ns CLIENT_NS SERVER_NS
+
+	ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}"
+
+	ip -net "${CLIENT_NS}" link set link0 up
+	ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}/24" dev link0
+
+	ip -net "${SERVER_NS}" link set link1 up
+	ip -net "${SERVER_NS}" addr add "${SERVER_IP4}/24" dev link1
+
+	ip netns exec "${CLIENT_NS}" ethtool -K link0 tcp-segmentation-offload off
+	ip netns exec "${SERVER_NS}" sh -c "echo 500000000 > /sys/class/net/link1/gro_flush_timeout"
+	ip netns exec "${SERVER_NS}" sh -c "echo 1 > /sys/class/net/link1/napi_defer_hard_irqs"
+	ip netns exec "${SERVER_NS}" ethtool -K link1 generic-receive-offload on
+}
+
+cleanup() {
+	ip -net "${SERVER_NS}" link del link1
+	cleanup_ns "${CLIENT_NS}" "${SERVER_NS}"
+}
+
+directed_bcast_hint_test()
+{
+	local rc=0
+
+	echo "Testing for directed broadcast route hint"
+
+	orig_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd')
+	ip netns exec "${CLIENT_NS}" mausezahn link0 -a own -b bcast -A "${CLIENT_IP4}" \
+		-B "${BROADCAST_ADDRESS}" -c1 -t tcp "sp=1-100,dp=1234,s=1,a=0" -p 5 -q
+	sleep 1
+	new_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd')
+
+	res=$(echo "${new_in_brd} - ${orig_in_brd}" | bc)
+
+	if [ "${res}" -lt 100 ]; then
+		echo "[ OK ]"
+		rc="${ksft_pass}"
+	else
+		echo "[FAIL] expected in_brd to be under 100, got ${res}"
+		rc="${ksft_fail}"
+	fi
+
+	return "${rc}"
+}
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+	echo "SKIP: Could not run test without mausezahn tool"
+	exit "${ksft_skip}"
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+	echo "SKIP: Could not run test without jq tool"
+	exit "${ksft_skip}"
+fi
+
+if [ ! -x "$(command -v bc)" ]; then
+	echo "SKIP: Could not run test without bc tool"
+	exit "${ksft_skip}"
+fi
+
+trap cleanup EXIT
+
+setup
+
+directed_bcast_hint_test
+exit $?
diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
index 4287a8529890..b200019b3c80 100755
--- a/tools/testing/selftests/net/rps_default_mask.sh
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -54,16 +54,16 @@ cleanup
 
 echo 1 > /proc/sys/net/core/rps_default_mask
 setup
-chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK
+chk_rps "changing rps_default_mask doesn't affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK
 
 echo 3 > /proc/sys/net/core/rps_default_mask
-chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0
+chk_rps "changing rps_default_mask doesn't affect existing netns" $NETNS lo 0
 
 ip link add name $VETH type veth peer netns $NETNS name $VETH
 ip link set dev $VETH up
 ip -n $NETNS link set dev $VETH up
-chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3
-chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0
+chk_rps "changing rps_default_mask affects newly created devices" "" $VETH 3
+chk_rps "changing rps_default_mask doesn't affect newly child netns[II]" $NETNS $VETH 0
 ip link del dev $VETH
 ip netns del $NETNS
 
@@ -72,8 +72,8 @@ chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0
 
 ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1
 ip link add name $VETH type veth peer netns $NETNS name $VETH
-chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK
+chk_rps "changing rps_default_mask in child ns doesn't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK
 chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1
-chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0
+chk_rps "changing rps_default_mask in child ns doesn't affect existing devices" $NETNS lo 0
 
 exit $ret
diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py
new file mode 100755
index 000000000000..e9ad5e88da97
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_exit, ksft_run, ksft_ge, RtnlAddrFamily
+import socket
+
+IPV4_ALL_HOSTS_MULTICAST = b'\xe0\x00\x00\x01'
+
+def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None:
+    """
+    Verify that at least one interface has the IPv4 all-hosts multicast address.
+    At least the loopback interface should have this address.
+    """
+
+    addresses = rtnl.getmulticast({"ifa-family": socket.AF_INET}, dump=True)
+
+    all_host_multicasts = [
+        addr for addr in addresses if addr['multicast'] == IPV4_ALL_HOSTS_MULTICAST
+    ]
+
+    ksft_ge(len(all_host_multicasts), 1,
+            "No interface found with the IPv4 all-hosts multicast address")
+
+def main() -> None:
+    rtnl = RtnlAddrFamily()
+    ksft_run([dump_mcaddr_check], args=(rtnl, ))
+    ksft_exit()
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 2e8243a65b50..5a5ff88321d5 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -8,6 +8,7 @@ ALL_TESTS="
 	kci_test_polrouting
 	kci_test_route_get
 	kci_test_addrlft
+	kci_test_addrlft_route_cleanup
 	kci_test_promote_secondaries
 	kci_test_tc
 	kci_test_gre
@@ -21,15 +22,18 @@ ALL_TESTS="
 	kci_test_vrf
 	kci_test_encap
 	kci_test_macsec
+	kci_test_macsec_vlan
 	kci_test_ipsec
 	kci_test_ipsec_offload
 	kci_test_fdb_get
 	kci_test_fdb_del
 	kci_test_neigh_get
+	kci_test_neigh_update
 	kci_test_bridge_parent_id
 	kci_test_address_proto
 	kci_test_enslave_bonding
 	kci_test_mngtmpaddr
+	kci_test_operstate
 "
 
 devdummy="test-dummy0"
@@ -291,6 +295,17 @@ kci_test_route_get()
 	end_test "PASS: route get"
 }
 
+check_addr_not_exist()
+{
+	dev=$1
+	addr=$2
+	if ip addr show dev $dev | grep -q $addr; then
+		return 1
+	else
+		return 0
+	fi
+}
+
 kci_test_addrlft()
 {
 	for i in $(seq 10 100) ;do
@@ -298,9 +313,10 @@ kci_test_addrlft()
 		run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
 	done
 
-	sleep 5
-	run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy"
-	if [ $? -eq 0 ]; then
+	slowwait 5 check_addr_not_exist "$devdummy" "10.23.11."
+	if [ $? -eq 1 ]; then
+		# troubleshoot the reason for our failure
+		run_cmd ip addr show dev "$devdummy"
 		check_err 1
 		end_test "FAIL: preferred_lft addresses remaining"
 		return
@@ -309,8 +325,32 @@ kci_test_addrlft()
 	end_test "PASS: preferred_lft addresses have expired"
 }
 
+kci_test_addrlft_route_cleanup()
+{
+	local ret=0
+	local test_addr="2001:db8:99::1/64"
+	local test_prefix="2001:db8:99::/64"
+
+	run_cmd ip -6 addr add $test_addr dev "$devdummy" valid_lft 300 preferred_lft 300
+	run_cmd_grep "$test_prefix proto kernel" ip -6 route show dev "$devdummy"
+	run_cmd ip -6 addr del $test_addr dev "$devdummy"
+	run_cmd_grep_fail "$test_prefix" ip -6 route show dev "$devdummy"
+
+	if [ $ret -ne 0 ]; then
+		end_test "FAIL: route not cleaned up when address with valid_lft deleted"
+		return 1
+	fi
+
+	end_test "PASS: route cleaned up when address with valid_lft deleted"
+}
+
 kci_test_promote_secondaries()
 {
+	run_cmd ifconfig "$devdummy"
+	if [ $ret -ne 0 ]; then
+		end_test "SKIP: ifconfig not installed"
+		return $ksft_skip
+	fi
 	promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries)
 
 	sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1
@@ -507,7 +547,7 @@ kci_test_encap_fou()
 	run_cmd_fail ip -netns "$testns" fou del port 9999
 	run_cmd ip -netns "$testns" fou del port 7777
 	if [ $ret -ne 0 ]; then
-		end_test "FAIL: fou"s
+		end_test "FAIL: fou"
 		return 1
 	fi
 
@@ -561,6 +601,41 @@ kci_test_macsec()
 	end_test "PASS: macsec"
 }
 
+# Test __dev_set_rx_mode call from dev_uc_add under addr_list_lock spinlock.
+# Make sure __dev_set_promiscuity is not grabbing (sleeping) netdev instance
+# lock.
+# https://lore.kernel.org/netdev/2aff4342b0f5b1539c02ffd8df4c7e58dd9746e7.camel@nvidia.com/
+kci_test_macsec_vlan()
+{
+	msname="test_macsec1"
+	vlanname="test_vlan1"
+	local ret=0
+	run_cmd_grep "^Usage: ip macsec" ip macsec help
+	if [ $? -ne 0 ]; then
+		end_test "SKIP: macsec: iproute2 too old"
+		return $ksft_skip
+	fi
+	run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
+	if [ $ret -ne 0 ];then
+		end_test "FAIL: can't add macsec interface, skipping test"
+		return 1
+	fi
+
+	run_cmd ip link set dev "$msname" up
+	ip link add link "$msname" name "$vlanname" type vlan id 1
+	ip link set dev "$vlanname" address 00:11:22:33:44:88
+	ip link set dev "$vlanname" up
+	run_cmd ip link del dev "$vlanname"
+	run_cmd ip link del dev "$msname"
+
+	if [ $ret -ne 0 ];then
+		end_test "FAIL: macsec_vlan"
+		return 1
+	fi
+
+	end_test "PASS: macsec_vlan"
+}
+
 #-------------------------------------------------------------------
 # Example commands
 #   ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
@@ -673,6 +748,11 @@ kci_test_ipsec_offload()
 	sysfsf=$sysfsd/ipsec
 	sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
 	probed=false
+	esp4_offload_probed_default=false
+
+	if lsmod | grep -q esp4_offload; then
+		esp4_offload_probed_default=true
+	fi
 
 	if ! mount | grep -q debugfs; then
 		mount -t debugfs none /sys/kernel/debug/ &> /dev/null
@@ -766,6 +846,7 @@ EOF
 	fi
 
 	# clean up any leftovers
+	! "$esp4_offload_probed_default" && lsmod | grep -q esp4_offload && rmmod esp4_offload
 	echo 0 > /sys/bus/netdevsim/del_device
 	$probed && rmmod netdevsim
 
@@ -1080,6 +1161,60 @@ kci_test_neigh_get()
 	end_test "PASS: neigh get"
 }
 
+kci_test_neigh_update()
+{
+	dstip=10.0.2.4
+	dstmac=de:ad:be:ef:13:37
+	local ret=0
+
+	for proxy in "" "proxy" ; do
+		# add a neighbour entry without any flags
+		run_cmd ip neigh add $proxy $dstip dev "$devdummy" lladdr $dstmac nud permanent
+		run_cmd_grep $dstip ip neigh show $proxy
+		run_cmd_grep_fail "$dstip dev $devdummy .*\(managed\|use\|router\|extern\)" ip neigh show $proxy
+
+		# set the extern_learn flag, but no other
+		run_cmd ip neigh change $proxy $dstip dev "$devdummy" extern_learn
+		run_cmd_grep "$dstip dev $devdummy .* extern_learn" ip neigh show $proxy
+		run_cmd_grep_fail "$dstip dev $devdummy .* \(managed\|use\|router\)" ip neigh show $proxy
+
+		# flags are reset when not provided
+		run_cmd ip neigh change $proxy $dstip dev "$devdummy"
+		run_cmd_grep $dstip ip neigh show $proxy
+		run_cmd_grep_fail "$dstip dev $devdummy .* extern_learn" ip neigh show $proxy
+
+		# add a protocol
+		run_cmd ip neigh change $proxy $dstip dev "$devdummy" protocol boot
+		run_cmd_grep "$dstip dev $devdummy .* proto boot" ip neigh show $proxy
+
+		# protocol is retained when not provided
+		run_cmd ip neigh change $proxy $dstip dev "$devdummy"
+		run_cmd_grep "$dstip dev $devdummy .* proto boot" ip neigh show $proxy
+
+		# change protocol
+		run_cmd ip neigh change $proxy $dstip dev "$devdummy" protocol static
+		run_cmd_grep "$dstip dev $devdummy .* proto static" ip neigh show $proxy
+
+		# also check an extended flag for non-proxy neighs
+		if [ "$proxy" = "" ]; then
+			run_cmd ip neigh change $proxy $dstip dev "$devdummy" managed
+			run_cmd_grep "$dstip dev $devdummy managed" ip neigh show $proxy
+
+			run_cmd ip neigh change $proxy $dstip dev "$devdummy" lladdr $dstmac
+			run_cmd_grep_fail "$dstip dev $devdummy managed" ip neigh show $proxy
+		fi
+
+		run_cmd ip neigh del $proxy $dstip dev "$devdummy"
+	done
+
+	if [ $ret -ne 0 ];then
+		end_test "FAIL: neigh update"
+		return 1
+	fi
+
+	end_test "PASS: neigh update"
+}
+
 kci_test_bridge_parent_id()
 {
 	local ret=0
@@ -1148,6 +1283,12 @@ do_test_address_proto()
 	local ret=0
 	local err
 
+	run_cmd_grep 'proto' ip address help
+	if [ $? -ne 0 ];then
+		end_test "SKIP: addr proto ${what}: iproute2 too old"
+		return $ksft_skip
+	fi
+
 	ip address add dev "$devdummy" "$addr3"
 	check_err $?
 	proto=$(address_get_proto "$addr3")
@@ -1334,6 +1475,39 @@ kci_test_mngtmpaddr()
 	return $ret
 }
 
+kci_test_operstate()
+{
+	local ret=0
+
+	# Check that it is possible to set operational state during device
+	# creation and that it is preserved when the administrative state of
+	# the device is toggled.
+	run_cmd ip link add name vx0 up state up type vxlan id 10010 dstport 4789
+	run_cmd_grep "state UP" ip link show dev vx0
+	run_cmd ip link set dev vx0 down
+	run_cmd_grep "state DOWN" ip link show dev vx0
+	run_cmd ip link set dev vx0 up
+	run_cmd_grep "state UP" ip link show dev vx0
+
+	run_cmd ip link del dev vx0
+
+	# Check that it is possible to set the operational state of the device
+	# after creation.
+	run_cmd ip link add name vx0 up type vxlan id 10010 dstport 4789
+	run_cmd_grep "state UNKNOWN" ip link show dev vx0
+	run_cmd ip link set dev vx0 state up
+	run_cmd_grep "state UP" ip link show dev vx0
+
+	run_cmd ip link del dev vx0
+
+	if [ "$ret" -ne 0 ]; then
+		end_test "FAIL: operstate"
+		return 1
+	fi
+
+	end_test "PASS: operstate"
+}
+
 kci_test_rtnl()
 {
 	local current_test
@@ -1367,6 +1541,8 @@ usage: ${0##*/} OPTS
 EOF
 }
 
+require_command jq
+
 #check for needed privileges
 if [ "$(id -u)" -ne 0 ];then
 	end_test "SKIP: Need root privileges"
diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh
new file mode 100755
index 000000000000..3f9780232bd6
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink_notification.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking rtnetlink notification callpaths, and get as much
+# coverage as possible.
+#
+# set -e
+
+ALL_TESTS="
+	kci_test_mcast_addr_notification
+	kci_test_anycast_addr_notification
+"
+
+source lib.sh
+test_dev="test-dummy1"
+
+kci_test_mcast_addr_notification()
+{
+	RET=0
+	local tmpfile
+	local monitor_pid
+	local match_result
+
+	tmpfile=$(mktemp)
+	defer rm "$tmpfile"
+
+	ip monitor maddr > $tmpfile &
+	monitor_pid=$!
+	defer kill_process "$monitor_pid"
+
+	sleep 1
+
+	if [ ! -e "/proc/$monitor_pid" ]; then
+		RET=$ksft_skip
+		log_test "mcast addr notification: iproute2 too old"
+		return $RET
+	fi
+
+	ip link add name "$test_dev" type dummy
+	check_err $? "failed to add dummy interface"
+	ip link set "$test_dev" up
+	check_err $? "failed to set dummy interface up"
+	ip link del dev "$test_dev"
+	check_err $? "Failed to delete dummy interface"
+	sleep 1
+
+	# There should be 4 line matches as follows.
+	# 13: test-dummy1    inet6 mcast ff02::1 scope global 
+	# 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+	# Deleted 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+	# Deleted 13: test-dummy1    inet6 mcast ff02::1 scope global 
+	match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile")
+	if [ "$match_result" -ne 4 ]; then
+		RET=$ksft_fail
+	fi
+	log_test "mcast addr notification: Expected 4 matches, got $match_result"
+	return $RET
+}
+
+kci_test_anycast_addr_notification()
+{
+	RET=0
+	local tmpfile
+	local monitor_pid
+	local match_result
+
+	tmpfile=$(mktemp)
+	defer rm "$tmpfile"
+
+	ip monitor acaddress > "$tmpfile" &
+	monitor_pid=$!
+	defer kill_process "$monitor_pid"
+	sleep 1
+
+	if [ ! -e "/proc/$monitor_pid" ]; then
+		RET=$ksft_skip
+		log_test "anycast addr notification: iproute2 too old"
+		return "$RET"
+	fi
+
+	ip link add name "$test_dev" type dummy
+	check_err $? "failed to add dummy interface"
+	ip link set "$test_dev" up
+	check_err $? "failed to set dummy interface up"
+	sysctl -qw net.ipv6.conf."$test_dev".forwarding=1
+	ip link del dev "$test_dev"
+	check_err $? "Failed to delete dummy interface"
+	sleep 1
+
+	# There should be 2 line matches as follows.
+	# 9: dummy2    inet6 any fe80:: scope global
+	# Deleted 9: dummy2    inet6 any fe80:: scope global
+	match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile")
+	if [ "$match_result" -ne 2 ]; then
+		RET=$ksft_fail
+	fi
+	log_test "anycast addr notification: Expected 2 matches, got $match_result"
+	return "$RET"
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+	RET=$ksft_skip
+	log_test "need root privileges"
+	exit $RET
+fi
+
+require_command ip
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 16ac4df55fdb..b81ed0352d6c 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -18,7 +18,7 @@
 #include <linux/net_tstamp.h>
 #include <linux/errqueue.h>
 
-#include "../kselftest.h"
+#include "kselftest.h"
 
 struct options {
 	int so_timestamp;
diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c
index f02f1f95d227..a04dac0b8027 100644
--- a/tools/testing/selftests/net/sctp_hello.c
+++ b/tools/testing/selftests/net/sctp_hello.c
@@ -29,7 +29,6 @@ static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len
 static int do_client(int argc, char *argv[])
 {
 	struct sockaddr_storage ss;
-	char buf[] = "hello";
 	int csk, ret, len;
 
 	if (argc < 5) {
@@ -56,16 +55,10 @@ static int do_client(int argc, char *argv[])
 
 	set_addr(&ss, argv[3], argv[4], &len);
 	ret = connect(csk, (struct sockaddr *)&ss, len);
-	if (ret < 0) {
-		printf("failed to connect to peer\n");
+	if (ret < 0)
 		return -1;
-	}
 
-	ret = send(csk, buf, strlen(buf) + 1, 0);
-	if (ret < 0) {
-		printf("failed to send msg %d\n", ret);
-		return -1;
-	}
+	recv(csk, NULL, 0, 0);
 	close(csk);
 
 	return 0;
@@ -75,7 +68,6 @@ int main(int argc, char *argv[])
 {
 	struct sockaddr_storage ss;
 	int lsk, csk, ret, len;
-	char buf[20];
 
 	if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
 		printf("%s server|client ...\n", argv[0]);
@@ -125,11 +117,6 @@ int main(int argc, char *argv[])
 		return -1;
 	}
 
-	ret = recv(csk, buf, sizeof(buf), 0);
-	if (ret <= 0) {
-		printf("failed to recv msg %d\n", ret);
-		return -1;
-	}
 	close(csk);
 	close(lsk);
 
diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh
index c854034b6aa1..667b211aa8a1 100755
--- a/tools/testing/selftests/net/sctp_vrf.sh
+++ b/tools/testing/selftests/net/sctp_vrf.sh
@@ -20,9 +20,9 @@ setup() {
 	modprobe sctp_diag
 	setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS
 
-	ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
-	ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
-	ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+	ip net exec $CLIENT_NS1 sysctl -wq net.ipv6.conf.default.accept_dad=0
+	ip net exec $CLIENT_NS2 sysctl -wq net.ipv6.conf.default.accept_dad=0
+	ip net exec $SERVER_NS sysctl -wq net.ipv6.conf.default.accept_dad=0
 
 	ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1
 	ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2
@@ -62,17 +62,40 @@ setup() {
 }
 
 cleanup() {
-	ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+	wait_client $CLIENT_NS1
+	wait_client $CLIENT_NS2
+	stop_server
 	cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS
 }
 
-wait_server() {
+start_server() {
 	local IFACE=$1
 	local CNT=0
 
-	until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \
-		grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do
-		[ $((CNT++)) = "20" ] && { RET=3; return $RET; }
+	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP $SERVER_PORT $IFACE &
+	disown
+	until ip netns exec $SERVER_NS ss -SlH | grep -q "$IFACE"; do
+		[ $((CNT++)) -eq 30 ] && { RET=3; return $RET; }
+		sleep 0.1
+	done
+}
+
+stop_server() {
+	local CNT=0
+
+	ip netns exec $SERVER_NS pkill sctp_hello
+	while ip netns exec $SERVER_NS ss -SaH | grep -q .; do
+		[ $((CNT++)) -eq 30 ] && break
+		sleep 0.1
+	done
+}
+
+wait_client() {
+	local CLIENT_NS=$1
+	local CNT=0
+
+	while ip netns exec $CLIENT_NS ss -SaH | grep -q .; do
+		[ $((CNT++)) -eq 30 ] && break
 		sleep 0.1
 	done
 }
@@ -81,14 +104,12 @@ do_test() {
 	local CLIENT_NS=$1
 	local IFACE=$2
 
-	ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
-	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
-		$SERVER_PORT $IFACE 2>&1 >/dev/null &
-	disown
-	wait_server $IFACE || return $RET
+	start_server $IFACE || return $RET
 	timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \
-		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT
 	RET=$?
+	wait_client $CLIENT_NS
+	stop_server
 	return $RET
 }
 
@@ -96,25 +117,21 @@ do_testx() {
 	local IFACE1=$1
 	local IFACE2=$2
 
-	ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
-	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
-		$SERVER_PORT $IFACE1 2>&1 >/dev/null &
-	disown
-	wait_server $IFACE1 || return $RET
-	ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
-		$SERVER_PORT $IFACE2 2>&1 >/dev/null &
-	disown
-	wait_server $IFACE2 || return $RET
+	start_server $IFACE1 || return $RET
+	start_server $IFACE2 || return $RET
 	timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \
-		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT && \
 	timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \
-		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT
 	RET=$?
+	wait_client $CLIENT_NS1
+	wait_client $CLIENT_NS2
+	stop_server
 	return $RET
 }
 
 testup() {
-	ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null
+	ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=1
 	echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y "
 	do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; }
 	echo "[PASS]"
@@ -123,7 +140,7 @@ testup() {
 	do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; }
 	echo "[PASS]"
 
-	ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null
+	ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=0
 	echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N "
 	do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; }
 	echo "[PASS]"
@@ -160,7 +177,7 @@ testup() {
 	do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; }
 	echo "[PASS]"
 
-	echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N "
+	echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, Y "
 	do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; }
 	echo "[PASS]"
 }
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
deleted file mode 100644
index 2070b57849de..000000000000
--- a/tools/testing/selftests/net/setup_loopback.sh
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
-readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
-readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
-readonly HARD_IRQS="$(< ${IRQ_PATH})"
-readonly server_ns=$(mktemp -u server-XXXXXXXX)
-readonly client_ns=$(mktemp -u client-XXXXXXXX)
-
-netdev_check_for_carrier() {
-	local -r dev="$1"
-
-	for i in {1..5}; do
-		carrier="$(cat /sys/class/net/${dev}/carrier)"
-		if [[ "${carrier}" -ne 1 ]] ; then
-			echo "carrier not ready yet..." >&2
-			sleep 1
-		else
-			echo "carrier ready" >&2
-			break
-		fi
-	done
-	echo "${carrier}"
-}
-
-# Assumes that there is no existing ipvlan device on the physical device
-setup_loopback_environment() {
-	local dev="$1"
-
-	# Fail hard if cannot turn on loopback mode for current NIC
-	ethtool -K "${dev}" loopback on || exit 1
-	sleep 1
-
-	# Check for the carrier
-	carrier=$(netdev_check_for_carrier ${dev})
-	if [[ "${carrier}" -ne 1 ]] ; then
-		echo "setup_loopback_environment failed"
-		exit 1
-	fi
-}
-
-setup_macvlan_ns(){
-	local -r link_dev="$1"
-	local -r ns_name="$2"
-	local -r ns_dev="$3"
-	local -r ns_mac="$4"
-	local -r addr="$5"
-
-	ip link add link "${link_dev}" dev "${ns_dev}" \
-		address "${ns_mac}" type macvlan
-	exit_code=$?
-	if [[ "${exit_code}" -ne 0 ]]; then
-		echo "setup_macvlan_ns failed"
-		exit $exit_code
-	fi
-
-	[[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
-	ip link set dev "${ns_dev}" netns "${ns_name}"
-	ip -netns "${ns_name}" link set dev "${ns_dev}" up
-	if [[ -n "${addr}" ]]; then
-		ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
-	fi
-
-	sleep 1
-}
-
-cleanup_macvlan_ns(){
-	while (( $# >= 2 )); do
-		ns_name="$1"
-		ns_dev="$2"
-		ip -netns "${ns_name}" link del dev "${ns_dev}"
-		ip netns del "${ns_name}"
-		shift 2
-	done
-}
-
-cleanup_loopback(){
-	local -r dev="$1"
-
-	ethtool -K "${dev}" loopback off
-	sleep 1
-
-	# Check for the carrier
-	carrier=$(netdev_check_for_carrier ${dev})
-	if [[ "${carrier}" -ne 1 ]] ; then
-		echo "setup_loopback_environment failed"
-		exit 1
-	fi
-}
-
-setup_interrupt() {
-	# Use timer on  host to trigger the network stack
-	# Also disable device interrupt to not depend on NIC interrupt
-	# Reduce test flakiness caused by unexpected interrupts
-	echo 100000 >"${FLUSH_PATH}"
-	echo 50 >"${IRQ_PATH}"
-}
-
-setup_ns() {
-	# Set up server_ns namespace and client_ns namespace
-	setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
-	setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
-}
-
-cleanup_ns() {
-	cleanup_macvlan_ns ${server_ns} server ${client_ns} client
-}
-
-setup() {
-	setup_loopback_environment "${dev}"
-	setup_interrupt
-}
-
-cleanup() {
-	cleanup_loopback "${dev}"
-
-	echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
-	echo "${HARD_IRQS}" >"${IRQ_PATH}"
-}
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
deleted file mode 100644
index 1f78a87f6f37..000000000000
--- a/tools/testing/selftests/net/setup_veth.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly server_ns=$(mktemp -u server-XXXXXXXX)
-readonly client_ns=$(mktemp -u client-XXXXXXXX)
-
-setup_veth_ns() {
-	local -r link_dev="$1"
-	local -r ns_name="$2"
-	local -r ns_dev="$3"
-	local -r ns_mac="$4"
-
-	[[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
-	echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
-	ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
-	ip -netns "${ns_name}" link set dev "${ns_dev}" up
-
-	ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off
-}
-
-setup_ns() {
-	# Set up server_ns namespace and client_ns namespace
-	ip link add name server type veth peer name client
-
-	setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
-	setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
-}
-
-cleanup_ns() {
-	local ns_name
-
-	for ns_name in ${client_ns} ${server_ns}; do
-		[[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
-	done
-}
-
-setup() {
-	# no global init setup step needed
-	:
-}
-
-cleanup() {
-	cleanup_ns
-}
diff --git a/tools/testing/selftests/net/sk_so_peek_off.c b/tools/testing/selftests/net/sk_so_peek_off.c
index d87dd8d8d491..2a3f5c604f52 100644
--- a/tools/testing/selftests/net/sk_so_peek_off.c
+++ b/tools/testing/selftests/net/sk_so_peek_off.c
@@ -8,7 +8,7 @@
 #include <sys/types.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
-#include "../kselftest.h"
+#include "kselftest.h"
 
 static char *afstr(int af, int proto)
 {
diff --git a/tools/testing/selftests/net/skf_net_off.c b/tools/testing/selftests/net/skf_net_off.c
new file mode 100644
index 000000000000..1fdf61d6cd7f
--- /dev/null
+++ b/tools/testing/selftests/net/skf_net_off.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Open a tun device.
+ *
+ * [modifications: use IFF_NAPI_FRAGS, add sk filter]
+ *
+ * Expects the device to have been configured previously, e.g.:
+ *   sudo ip tuntap add name tap1 mode tap
+ *   sudo ip link set tap1 up
+ *   sudo ip link set dev tap1 addr 02:00:00:00:00:01
+ *   sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad
+ *
+ * And to avoid premature pskb_may_pull:
+ *
+ *   sudo ethtool -K tap1 gro off
+ *   sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux'
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if.h>
+#include <linux/if_packet.h>
+#include <linux/if_tun.h>
+#include <linux/ipv6.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+static bool cfg_do_filter;
+static bool cfg_do_frags;
+static int cfg_dst_port = 8000;
+static char *cfg_ifname;
+
+static int tun_open(const char *tun_name)
+{
+	struct ifreq ifr = {0};
+	int fd, ret;
+
+	fd = open("/dev/net/tun", O_RDWR);
+	if (fd == -1)
+		error(1, errno, "open /dev/net/tun");
+
+	ifr.ifr_flags = IFF_TAP;
+	if (cfg_do_frags)
+		ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS;
+
+	strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1);
+
+	ret = ioctl(fd, TUNSETIFF, &ifr);
+	if (ret)
+		error(1, ret, "ioctl TUNSETIFF");
+
+	return fd;
+}
+
+static void sk_set_filter(int fd)
+{
+	const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt);
+	const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest);
+
+	/* Filter UDP packets with destination port cfg_dst_port */
+	struct sock_filter filter_code[] = {
+		BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+		BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, SKF_NET_OFF + offset_proto),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2),
+		BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, SKF_NET_OFF + offset_dport),
+		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0),
+		BPF_STMT(BPF_RET + BPF_K, 0),
+		BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+	};
+
+	struct sock_fprog filter = {
+		sizeof(filter_code) / sizeof(filter_code[0]),
+		filter_code,
+	};
+
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)))
+		error(1, errno, "setsockopt attach filter");
+}
+
+static int raw_open(void)
+{
+	int fd;
+
+	fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP);
+	if (fd == -1)
+		error(1, errno, "socket raw (udp)");
+
+	if (cfg_do_filter)
+		sk_set_filter(fd);
+
+	return fd;
+}
+
+static void tun_write(int fd)
+{
+	const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 };
+	const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 };
+	struct tun_pi pi = {0};
+	struct ipv6hdr ip6h = {0};
+	struct udphdr uh = {0};
+	struct ethhdr eth = {0};
+	uint32_t payload;
+	struct iovec iov[5];
+	int ret;
+
+	pi.proto = htons(ETH_P_IPV6);
+
+	memcpy(eth.h_source, eth_src, sizeof(eth_src));
+	memcpy(eth.h_dest, eth_dst, sizeof(eth_dst));
+	eth.h_proto = htons(ETH_P_IPV6);
+
+	ip6h.version = 6;
+	ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t));
+	ip6h.nexthdr = IPPROTO_UDP;
+	ip6h.hop_limit = 8;
+	if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1)
+		error(1, errno, "inet_pton src");
+	if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1)
+		error(1, errno, "inet_pton src");
+
+	uh.source = htons(8000);
+	uh.dest = htons(cfg_dst_port);
+	uh.len = ip6h.payload_len;
+	uh.check = 0;
+
+	payload = htonl(0xABABABAB);		/* Covered in IPv6 length */
+
+	iov[0].iov_base = &pi;
+	iov[0].iov_len  = sizeof(pi);
+	iov[1].iov_base = &eth;
+	iov[1].iov_len  = sizeof(eth);
+	iov[2].iov_base = &ip6h;
+	iov[2].iov_len  = sizeof(ip6h);
+	iov[3].iov_base = &uh;
+	iov[3].iov_len  = sizeof(uh);
+	iov[4].iov_base = &payload;
+	iov[4].iov_len  = sizeof(payload);
+
+	ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0]));
+	if (ret <= 0)
+		error(1, errno, "writev");
+}
+
+static void raw_read(int fd)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	struct msghdr msg = {0};
+	struct iovec iov[2];
+	struct udphdr uh;
+	uint32_t payload[2];
+	int ret;
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcvtimeo udp");
+
+	iov[0].iov_base = &uh;
+	iov[0].iov_len = sizeof(uh);
+
+	iov[1].iov_base = payload;
+	iov[1].iov_len = sizeof(payload);
+
+	msg.msg_iov = iov;
+	msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]);
+
+	ret = recvmsg(fd, &msg, 0);
+	if (ret <= 0)
+		error(1, errno, "read raw");
+	if (ret != sizeof(uh) + sizeof(payload[0]))
+		error(1, errno, "read raw: len=%d\n", ret);
+
+	fprintf(stderr, "raw recv: 0x%x\n", payload[0]);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "fFi:")) != -1) {
+		switch (c) {
+		case 'f':
+			cfg_do_filter = true;
+			printf("bpf filter enabled\n");
+			break;
+		case 'F':
+			cfg_do_frags = true;
+			printf("napi frags mode enabled\n");
+			break;
+		case 'i':
+			cfg_ifname = optarg;
+			break;
+		default:
+			error(1, 0, "unknown option %c", optopt);
+			break;
+		}
+	}
+
+	if (!cfg_ifname)
+		error(1, 0, "must specify tap interface name (-i)");
+}
+
+int main(int argc, char **argv)
+{
+	int fdt, fdr;
+
+	parse_opts(argc, argv);
+
+	fdr = raw_open();
+	fdt = tun_open(cfg_ifname);
+
+	tun_write(fdt);
+	raw_read(fdr);
+
+	if (close(fdt))
+		error(1, errno, "close tun");
+	if (close(fdr))
+		error(1, errno, "close udp");
+
+	fprintf(stderr, "OK\n");
+	return 0;
+}
+
diff --git a/tools/testing/selftests/net/skf_net_off.sh b/tools/testing/selftests/net/skf_net_off.sh
new file mode 100755
index 000000000000..5da5066fb465
--- /dev/null
+++ b/tools/testing/selftests/net/skf_net_off.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly NS="ns-$(mktemp -u XXXXXX)"
+
+cleanup() {
+	ip netns del $NS
+}
+
+ip netns add $NS
+trap cleanup EXIT
+
+ip -netns $NS link set lo up
+ip -netns $NS tuntap add name tap1 mode tap
+ip -netns $NS link set tap1 up
+ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01
+ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad
+ip netns exec $NS ethtool -K tap1 gro off
+
+# disable early demux, else udp_v6_early_demux pulls udp header into linear
+ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0
+
+echo "no filter"
+ip netns exec $NS ./skf_net_off -i tap1
+
+echo "filter, linear skb (-f)"
+ip netns exec $NS ./skf_net_off -i tap1 -f
+
+echo "filter, fragmented skb (-f) (-F)"
+ip netns exec $NS ./skf_net_off -i tap1 -f -F
diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c
index e9fa14e10732..4740701f1a9a 100644
--- a/tools/testing/selftests/net/so_incoming_cpu.c
+++ b/tools/testing/selftests/net/so_incoming_cpu.c
@@ -9,7 +9,7 @@
 #include <sys/socket.h>
 #include <sys/sysinfo.h>
 
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 FIXTURE(so_incoming_cpu)
 {
diff --git a/tools/testing/selftests/net/so_rcv_listener.c b/tools/testing/selftests/net/so_rcv_listener.c
new file mode 100644
index 000000000000..bc5841192aa6
--- /dev/null
+++ b/tools/testing/selftests/net/so_rcv_listener.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <netdb.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#ifndef SO_RCVPRIORITY
+#define SO_RCVPRIORITY 82
+#endif
+
+struct options {
+	__u32 val;
+	int name;
+	int rcvname;
+	const char *host;
+	const char *service;
+} opt;
+
+static void __attribute__((noreturn)) usage(const char *bin)
+{
+	printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin);
+	printf("Options:\n"
+		"\t\t-M val  Test SO_RCVMARK\n"
+		"\t\t-P val  Test SO_RCVPRIORITY\n"
+		"");
+	exit(EXIT_FAILURE);
+}
+
+static void parse_args(int argc, char *argv[])
+{
+	int o;
+
+	while ((o = getopt(argc, argv, "M:P:")) != -1) {
+		switch (o) {
+		case 'M':
+			opt.val = atoi(optarg);
+			opt.name = SO_MARK;
+			opt.rcvname = SO_RCVMARK;
+			break;
+		case 'P':
+			opt.val = atoi(optarg);
+			opt.name = SO_PRIORITY;
+			opt.rcvname = SO_RCVPRIORITY;
+			break;
+		default:
+			usage(argv[0]);
+			break;
+		}
+	}
+
+	if (optind != argc - 2)
+		usage(argv[0]);
+
+	opt.host = argv[optind];
+	opt.service = argv[optind + 1];
+}
+
+int main(int argc, char *argv[])
+{
+	int err = 0;
+	int recv_fd = -1;
+	int ret_value = 0;
+	__u32 recv_val;
+	struct cmsghdr *cmsg;
+	char cbuf[CMSG_SPACE(sizeof(__u32))];
+	char recv_buf[CMSG_SPACE(sizeof(__u32))];
+	struct iovec iov[1];
+	struct msghdr msg;
+	struct sockaddr_in recv_addr4;
+	struct sockaddr_in6 recv_addr6;
+
+	parse_args(argc, argv);
+
+	int family = strchr(opt.host, ':') ? AF_INET6 : AF_INET;
+
+	recv_fd = socket(family, SOCK_DGRAM, IPPROTO_UDP);
+	if (recv_fd < 0) {
+		perror("Can't open recv socket");
+		ret_value = -errno;
+		goto cleanup;
+	}
+
+	err = setsockopt(recv_fd, SOL_SOCKET, opt.rcvname, &opt.val, sizeof(opt.val));
+	if (err < 0) {
+		perror("Recv setsockopt error");
+		ret_value = -errno;
+		goto cleanup;
+	}
+
+	if (family == AF_INET) {
+		memset(&recv_addr4, 0, sizeof(recv_addr4));
+		recv_addr4.sin_family = family;
+		recv_addr4.sin_port = htons(atoi(opt.service));
+
+		if (inet_pton(family, opt.host, &recv_addr4.sin_addr) <= 0) {
+			perror("Invalid IPV4 address");
+			ret_value = -errno;
+			goto cleanup;
+		}
+
+		err = bind(recv_fd, (struct sockaddr *)&recv_addr4, sizeof(recv_addr4));
+	} else {
+		memset(&recv_addr6, 0, sizeof(recv_addr6));
+		recv_addr6.sin6_family = family;
+		recv_addr6.sin6_port = htons(atoi(opt.service));
+
+		if (inet_pton(family, opt.host, &recv_addr6.sin6_addr) <= 0) {
+			perror("Invalid IPV6 address");
+			ret_value = -errno;
+			goto cleanup;
+		}
+
+		err = bind(recv_fd, (struct sockaddr *)&recv_addr6, sizeof(recv_addr6));
+	}
+
+	if (err < 0) {
+		perror("Recv bind error");
+		ret_value = -errno;
+		goto cleanup;
+	}
+
+	iov[0].iov_base = recv_buf;
+	iov[0].iov_len = sizeof(recv_buf);
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_iov = iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cbuf;
+	msg.msg_controllen = sizeof(cbuf);
+
+	err = recvmsg(recv_fd, &msg, 0);
+	if (err < 0) {
+		perror("Message receive error");
+		ret_value = -errno;
+		goto cleanup;
+	}
+
+	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+		if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == opt.name) {
+			recv_val = *(__u32 *)CMSG_DATA(cmsg);
+			printf("Received value: %u\n", recv_val);
+
+			if (recv_val != opt.val) {
+				fprintf(stderr, "Error: expected value: %u, got: %u\n",
+					opt.val, recv_val);
+				ret_value = -EINVAL;
+			}
+			goto cleanup;
+		}
+	}
+
+	fprintf(stderr, "Error: No matching cmsg received\n");
+	ret_value = -ENOMSG;
+
+cleanup:
+	if (recv_fd >= 0)
+		close(recv_fd);
+
+	return ret_value;
+}
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 8457b7ccbc09..b76df1efc2ef 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -174,7 +174,7 @@ static int do_recv_errqueue_timeout(int fdt)
 	msg.msg_controllen = sizeof(control);
 
 	while (1) {
-		const char *reason;
+		const char *reason = NULL;
 
 		ret = recvmsg(fdt, &msg, MSG_ERRQUEUE);
 		if (ret == -1 && errno == EAGAIN)
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c
index db1aeb8c5d1e..9e270548dad8 100644
--- a/tools/testing/selftests/net/socket.c
+++ b/tools/testing/selftests/net/socket.c
@@ -7,7 +7,7 @@
 #include <sys/socket.h>
 #include <netinet/in.h>
 
-#include "../kselftest.h"
+#include "kselftest.h"
 
 struct socket_testcase {
 	int	domain;
@@ -39,6 +39,7 @@ static int run_tests(void)
 {
 	char err_string1[ERR_STRING_SZ];
 	char err_string2[ERR_STRING_SZ];
+	const char *msg1, *msg2;
 	int i, err;
 
 	err = 0;
@@ -56,13 +57,13 @@ static int run_tests(void)
 			    errno == -s->expect)
 				continue;
 
-			strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
-			strerror_r(errno, err_string2, ERR_STRING_SZ);
+			msg1 = strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
+			msg2 = strerror_r(errno, err_string2, ERR_STRING_SZ);
 
 			fprintf(stderr, "socket(%d, %d, %d) expected "
 				"err (%s) got (%s)\n",
 				s->domain, s->type, s->protocol,
-				err_string1, err_string2);
+				msg1, msg2);
 
 			err = -1;
 			break;
@@ -70,12 +71,12 @@ static int run_tests(void)
 			close(fd);
 
 			if (s->expect < 0) {
-				strerror_r(errno, err_string1, ERR_STRING_SZ);
+				msg1 = strerror_r(errno, err_string1, ERR_STRING_SZ);
 
 				fprintf(stderr, "socket(%d, %d, %d) expected "
 					"success got err (%s)\n",
 					s->domain, s->type, s->protocol,
-					err_string1);
+					msg1);
 
 				err = -1;
 				break;
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
index 02d617040793..a5e959a080bb 100755
--- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -285,11 +285,6 @@ setup_hs()
 	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
 
-	# disable the rp_filter otherwise the kernel gets confused about how
-	# to route decap ipv4 packets.
-	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
 	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
 	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
 	ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
index 79fb81e63c59..a649dba3cb77 100755
--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -250,11 +250,6 @@ setup_hs()
 	eval local rtname=\${rt_${rid}}
 	local rtveth=veth-t${tid}
 
-	# disable the rp_filter otherwise the kernel gets confused about how
-	# to route decap ipv4 packets.
-	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
 	ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
 	ip -netns ${hsname} link set ${rtveth} netns ${rtname}
 	ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0
diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh
index 50563443a4ad..318487eda671 100755
--- a/tools/testing/selftests/net/srv6_end_flavors_test.sh
+++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh
@@ -399,7 +399,7 @@ __get_srv6_rtcfg_id()
 
 # Given the description of a router <id:op> as an input, the function returns
 # the <op> token which represents the operation (e.g. End behavior with or
-# withouth flavors) configured for the node.
+# without flavors) configured for the node.
 
 # Note that when the operation represents an End behavior with a list of
 # flavors, the output is the ordered version of that list.
@@ -480,7 +480,7 @@ setup_rt_local_sids()
 
 
 	# all SIDs start with a common locator. Routes and SRv6 Endpoint
-	# behavior instaces are grouped together in the 'localsid' table.
+	# behavior instances are grouped together in the 'localsid' table.
 	ip -netns "${nsname}" -6 rule \
 		add to "${LOCATOR_SERVICE}::/16" \
 		lookup "${LOCALSID_TABLE_ID}" prio 999
diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
index 87e414cc417c..4bc135e5c22c 100755
--- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
@@ -245,10 +245,8 @@
 # that adopted in the use cases already examined (of course, it is necessary to
 # consider the different SIDs/C-SIDs).
 
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
 
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
 readonly DUMMY_DEVNAME="dum0"
 readonly VRF_TID=100
 readonly VRF_DEVNAME="vrf-${VRF_TID}"
@@ -376,32 +374,18 @@ test_command_or_ksft_skip()
 	fi
 }
 
-get_nodename()
-{
-	local name="$1"
-
-	echo "${name}-${RDMSUFF}"
-}
-
 get_rtname()
 {
 	local rtid="$1"
 
-	get_nodename "rt-${rtid}"
+	echo "rt_${rtid}"
 }
 
 get_hsname()
 {
 	local hsid="$1"
 
-	get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
-	local name="$1"
-
-	ip netns add "${name}"
+	echo "hs_${hsid}"
 }
 
 create_router()
@@ -410,8 +394,7 @@ create_router()
 	local nsname
 
 	nsname="$(get_rtname "${rtid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 create_host()
@@ -420,28 +403,12 @@ create_host()
 	local nsname
 
 	nsname="$(get_hsname "${hsid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 cleanup()
 {
-	local nsname
-	local i
-
-	# destroy routers
-	for i in ${ROUTERS}; do
-		nsname="$(get_rtname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
-
-	# destroy hosts
-	for i in ${HOSTS}; do
-		nsname="$(get_hsname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
+	cleanup_all_ns
 
 	# check whether the setup phase was completed successfully or not. In
 	# case of an error during the setup phase of the testing environment,
@@ -462,10 +429,10 @@ add_link_rt_pairs()
 	local nsname
 	local neigh_nsname
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
-		neigh_nsname="$(get_rtname "${neigh}")"
+		eval neigh_nsname=\${$(get_rtname "${neigh}")}
 
 		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
 			type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -497,7 +464,7 @@ setup_rt_networking()
 	local devname
 	local neigh
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -518,9 +485,6 @@ setup_rt_networking()
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
 }
 
@@ -596,7 +560,7 @@ setup_rt_local_sids()
 	local lcnode_func_prefix
 	local lcblock_prefix
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -630,7 +594,7 @@ setup_rt_local_sids()
 		dev "${DUMMY_DEVNAME}"
 
 	# all SIDs for VPNs start with a common locator. Routes and SRv6
-	# Endpoint behavior instaces are grouped together in the 'localsid'
+	# Endpoint behavior instances are grouped together in the 'localsid'
 	# table.
 	ip -netns "${nsname}" -6 rule \
 		add to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -668,8 +632,8 @@ __setup_l3vpn()
 	local rtsrc_nsname
 	local rtdst_nsname
 
-	rtsrc_nsname="$(get_rtname "${src}")"
-	rtdst_nsname="$(get_rtname "${dst}")"
+	eval rtsrc_nsname=\${$(get_rtname "${src}")}
+	eval rtdst_nsname=\${$(get_rtname "${dst}")}
 
 	container="${LCBLOCK_ADDR}"
 
@@ -744,8 +708,8 @@ setup_hs()
 	local hsname
 	local rtname
 
-	hsname="$(get_hsname "${hs}")"
-	rtname="$(get_rtname "${rt}")"
+	eval hsname=\${$(get_hsname "${hs}")}
+	eval rtname=\${$(get_rtname "${rt}")}
 
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -791,11 +755,6 @@ setup_hs()
 	ip netns exec "${rtname}" \
 		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
 
-	# disable the rp_filter otherwise the kernel gets confused about how
-	# to route decap ipv4 packets.
-	ip netns exec "${rtname}" \
-		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
 	ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
 }
 
@@ -880,7 +839,7 @@ check_rt_connectivity()
 	local prefix
 	local rtsrc_nsname
 
-	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+	eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
 
 	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
 
@@ -903,7 +862,7 @@ check_hs_ipv6_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -915,7 +874,7 @@ check_hs_ipv4_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
@@ -1025,7 +984,7 @@ rt_x_nextcsid_end_behavior_test()
 	local nsname
 	local ret
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	__nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}"
 	ret="$?"
diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
index c79cb8ede17f..34b781a2ae74 100755
--- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
@@ -72,6 +72,9 @@
 # Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
 # the selftest network.
 #
+# In addition, every router interface connecting rt-x to rt-y is assigned an
+# IPv6 link-local address fe80::x:y/64.
+#
 # Local SID/C-SID table
 # =====================
 #
@@ -287,10 +290,8 @@
 # packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46)
 # and sends it to the host hs-1.
 
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
 
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
 readonly DUMMY_DEVNAME="dum0"
 readonly VRF_TID=100
 readonly VRF_DEVNAME="vrf-${VRF_TID}"
@@ -418,32 +419,18 @@ test_command_or_ksft_skip()
 	fi
 }
 
-get_nodename()
-{
-	local name="$1"
-
-	echo "${name}-${RDMSUFF}"
-}
-
 get_rtname()
 {
 	local rtid="$1"
 
-	get_nodename "rt-${rtid}"
+	echo "rt_${rtid}"
 }
 
 get_hsname()
 {
 	local hsid="$1"
 
-	get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
-	local name="$1"
-
-	ip netns add "${name}"
+	echo "hs_${hsid}"
 }
 
 create_router()
@@ -452,15 +439,12 @@ create_router()
 	local nsname
 
 	nsname="$(get_rtname "${rtid}")"
+	setup_ns "${nsname}"
 
-	__create_namespace "${nsname}"
-
+	eval nsname=\${$(get_rtname "${rtid}")}
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
 }
 
@@ -470,29 +454,12 @@ create_host()
 	local nsname
 
 	nsname="$(get_hsname "${hsid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 cleanup()
 {
-	local nsname
-	local i
-
-	# destroy routers
-	for i in ${ROUTERS}; do
-		nsname="$(get_rtname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
-
-	# destroy hosts
-	for i in ${HOSTS}; do
-		nsname="$(get_hsname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
-
+	cleanup_all_ns
 	# check whether the setup phase was completed successfully or not. In
 	# case of an error during the setup phase of the testing environment,
 	# the selftest is considered as "skipped".
@@ -512,10 +479,10 @@ add_link_rt_pairs()
 	local nsname
 	local neigh_nsname
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
-		neigh_nsname="$(get_rtname "${neigh}")"
+		eval neigh_nsname=\${$(get_rtname "${neigh}")}
 
 		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
 			type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -547,7 +514,7 @@ setup_rt_networking()
 	local devname
 	local neigh
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -557,6 +524,9 @@ setup_rt_networking()
 		ip -netns "${nsname}" addr \
 			add "${net_prefix}::${rt}/64" dev "${devname}" nodad
 
+		ip -netns "${nsname}" addr \
+			add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad
+
 		ip -netns "${nsname}" link set "${devname}" up
 	done
 
@@ -631,7 +601,7 @@ set_end_x_nextcsid()
 	local rt="$1"
 	local adj="$2"
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 	net_prefix="$(get_network_prefix "${rt}" "${adj}")"
 	lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
 
@@ -645,12 +615,33 @@ set_end_x_nextcsid()
 		nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
 }
 
+set_end_x_ll_nextcsid()
+{
+	local rt="$1"
+	local adj="$2"
+
+	eval nsname=\${$(get_rtname "${rt}")}
+	lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
+	nh6_ll_addr="fe80::${adj}:${rt}"
+	oifname="veth-rt-${rt}-${adj}"
+
+	# enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop
+	# address (note that "dev" is the dummy dum0 device chosen for the sake
+	# of simplicity).
+	ip -netns "${nsname}" -6 route \
+		replace "${lcnode_func_prefix}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End.X nh6 "${nh6_ll_addr}" \
+		oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \
+		nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
+}
+
 set_underlay_sids_reachability()
 {
 	local rt="$1"
 	local rt_neighs="$2"
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -685,12 +676,12 @@ setup_rt_local_sids()
 	local lcnode_func_prefix
 	local lcblock_prefix
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
         set_underlay_sids_reachability "${rt}" "${rt_neighs}"
 
 	# all SIDs for VPNs start with a common locator. Routes and SRv6
-	# Endpoint behavior instaces are grouped together in the 'localsid'
+	# Endpoint behavior instances are grouped together in the 'localsid'
 	# table.
 	ip -netns "${nsname}" -6 rule \
 		add to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -728,8 +719,8 @@ __setup_l3vpn()
 	local rtsrc_nsname
 	local rtdst_nsname
 
-	rtsrc_nsname="$(get_rtname "${src}")"
-	rtdst_nsname="$(get_rtname "${dst}")"
+	eval rtsrc_nsname=\${$(get_rtname "${src}")}
+	eval rtdst_nsname=\${$(get_rtname "${dst}")}
 
 	container="${LCBLOCK_ADDR}"
 
@@ -804,8 +795,8 @@ setup_hs()
 	local hsname
 	local rtname
 
-	hsname="$(get_hsname "${hs}")"
-	rtname="$(get_rtname "${rt}")"
+	eval hsname=\${$(get_hsname "${hs}")}
+	eval rtname=\${$(get_rtname "${rt}")}
 
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -851,11 +842,6 @@ setup_hs()
 	ip netns exec "${rtname}" \
 		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
 
-	# disable the rp_filter otherwise the kernel gets confused about how
-	# to route decap ipv4 packets.
-	ip netns exec "${rtname}" \
-		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
 	ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
 }
 
@@ -947,7 +933,7 @@ check_rt_connectivity()
 	local prefix
 	local rtsrc_nsname
 
-	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+	eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
 
 	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
 
@@ -970,7 +956,7 @@ check_hs_ipv6_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -982,7 +968,7 @@ check_hs_ipv4_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
@@ -1057,6 +1043,27 @@ host_vpn_tests()
 
 	check_and_log_hs_ipv4_connectivity 1 2
 	check_and_log_hs_ipv4_connectivity 2 1
+
+	# Setup the adjacencies in the SRv6 aware routers using IPv6 link-local
+	# addresses.
+	# - rt-3 SRv6 End.X adjacency with rt-4
+	# - rt-4 SRv6 End.X adjacency with rt-1
+	set_end_x_ll_nextcsid 3 4
+	set_end_x_ll_nextcsid 4 1
+
+	log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local"
+
+	check_and_log_hs_ipv6_connectivity 1 2
+	check_and_log_hs_ipv6_connectivity 2 1
+
+	log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local"
+
+	check_and_log_hs_ipv4_connectivity 1 2
+	check_and_log_hs_ipv4_connectivity 2 1
+
+	# Restore the previous adjacencies.
+	set_end_x_nextcsid 3 4
+	set_end_x_nextcsid 4 1
 }
 
 __nextcsid_end_x_behavior_test()
@@ -1093,7 +1100,7 @@ rt_x_nextcsid_end_x_behavior_test()
 	local nsname
 	local ret
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	__nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}"
 	ret="$?"
diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
index 28a775654b92..cd7d061e21f8 100755
--- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
@@ -166,10 +166,8 @@
 #  hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d)
 #
 
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
 
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
 readonly VRF_TID=100
 readonly VRF_DEVNAME="vrf-${VRF_TID}"
 readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
@@ -195,6 +193,8 @@ ret=${ksft_skip}
 nsuccess=0
 nfail=0
 
+HAS_TUNSRC=false
+
 log_test()
 {
 	local rc="$1"
@@ -248,32 +248,18 @@ test_command_or_ksft_skip()
 	fi
 }
 
-get_nodename()
-{
-	local name="$1"
-
-	echo "${name}-${RDMSUFF}"
-}
-
 get_rtname()
 {
 	local rtid="$1"
 
-	get_nodename "rt-${rtid}"
+	echo "rt_${rtid}"
 }
 
 get_hsname()
 {
 	local hsid="$1"
 
-	get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
-	local name="$1"
-
-	ip netns add "${name}"
+	echo "hs_${hsid}"
 }
 
 create_router()
@@ -282,8 +268,7 @@ create_router()
 	local nsname
 
 	nsname="$(get_rtname "${rtid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 create_host()
@@ -292,29 +277,12 @@ create_host()
 	local nsname
 
 	nsname="$(get_hsname "${hsid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 cleanup()
 {
-	local nsname
-	local i
-
-	# destroy routers
-	for i in ${ROUTERS}; do
-		nsname="$(get_rtname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
-
-	# destroy hosts
-	for i in ${HOSTS}; do
-		nsname="$(get_hsname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
-
+	cleanup_all_ns
 	# check whether the setup phase was completed successfully or not. In
 	# case of an error during the setup phase of the testing environment,
 	# the selftest is considered as "skipped".
@@ -334,10 +302,10 @@ add_link_rt_pairs()
 	local nsname
 	local neigh_nsname
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
-		neigh_nsname="$(get_rtname "${neigh}")"
+		eval neigh_nsname=\${$(get_rtname "${neigh}")}
 
 		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
 			type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -369,7 +337,7 @@ setup_rt_networking()
 	local devname
 	local neigh
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -379,6 +347,17 @@ setup_rt_networking()
 		ip -netns "${nsname}" addr \
 			add "${net_prefix}::${rt}/64" dev "${devname}" nodad
 
+		# A dedicated ::dead:<rt> address (with preferred_lft 0, i.e.,
+		# deprecated) is added when there is support for tunsrc. Because
+		# it is deprecated, the kernel should never auto-select it as
+		# source with current config. Only an explicit tunsrc can place
+		# it in the outer header.
+		if $HAS_TUNSRC; then
+			ip -netns "${nsname}" addr \
+				add "${net_prefix}::dead:${rt}/64" \
+				dev "${devname}" nodad preferred_lft 0
+		fi
+
 		ip -netns "${nsname}" link set "${devname}" up
 	done
 
@@ -387,9 +366,6 @@ setup_rt_networking()
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
 }
 
@@ -403,7 +379,7 @@ setup_rt_local_sids()
 	local nsname
 	local neigh
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -432,7 +408,7 @@ setup_rt_local_sids()
 		dev "${VRF_DEVNAME}"
 
 	# all SIDs for VPNs start with a common locator. Routes and SRv6
-	# Endpoint behavior instaces are grouped together in the 'localsid'
+	# Endpoint behavior instances are grouped together in the 'localsid'
 	# table.
 	ip -netns "${nsname}" -6 rule \
 		add to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -457,6 +433,7 @@ setup_rt_local_sids()
 #       to the destination host)
 #  $5 - encap mode (full or red)
 #  $6 - traffic type (IPv6 or IPv4)
+#  $7 - force tunsrc (true or false)
 __setup_rt_policy()
 {
 	local dst="$1"
@@ -465,11 +442,47 @@ __setup_rt_policy()
 	local dec_rt="$4"
 	local mode="$5"
 	local traffic="$6"
+	local with_tunsrc="$7"
 	local nsname
 	local policy=''
+	local tunsrc=''
 	local n
 
-	nsname="$(get_rtname "${encap_rt}")"
+	# Verify the per-route tunnel source address ("tunsrc") feature.
+	# If it is not supported, fallback on encap config without tunsrc.
+	if $with_tunsrc && $HAS_TUNSRC; then
+		local net_prefix
+		local drule
+		local nxt
+
+		eval nsname=\${$(get_rtname "${dec_rt}")}
+
+		# Next SRv6 hop: first End router if any, or the decap router
+		[ -z "${end_rts}" ] && nxt="${dec_rt}" || nxt="${end_rts%% *}"
+
+		# Use the right prefix for tunsrc depending on the next SRv6 hop
+		net_prefix="$(get_network_prefix "${encap_rt}" "${nxt}")"
+		tunsrc="tunsrc ${net_prefix}::dead:${encap_rt}"
+
+		# To verify that the outer source address matches the one
+		# configured with tunsrc, the decap router discards packets
+		# with any other source address.
+		ip netns exec "${nsname}" ip6tables -t raw -I PREROUTING 1 \
+			-s "${net_prefix}::dead:${encap_rt}" \
+			-d "${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}" \
+			-j ACCEPT
+
+		drule="PREROUTING \
+		       -d ${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC} \
+		       -j DROP"
+
+		if ! ip netns exec "${nsname}" \
+				ip6tables -t raw -C ${drule} &>/dev/null; then
+			ip netns exec "${nsname}" ip6tables -t raw -A ${drule}
+		fi
+	fi
+
+	eval nsname=\${$(get_rtname "${encap_rt}")}
 
 	for n in ${end_rts}; do
 		policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
@@ -481,7 +494,7 @@ __setup_rt_policy()
 	if [ "${traffic}" -eq 6 ]; then
 		ip -netns "${nsname}" -6 route \
 			add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \
-			encap seg6 mode "${mode}" segs "${policy}" \
+			encap seg6 mode "${mode}" ${tunsrc} segs "${policy}" \
 			dev "${VRF_DEVNAME}"
 
 		ip -netns "${nsname}" -6 neigh \
@@ -492,7 +505,7 @@ __setup_rt_policy()
 		# received, otherwise the proxy arp does not work.
 		ip -netns "${nsname}" -4 route \
 			add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \
-			encap seg6 mode "${mode}" segs "${policy}" \
+			encap seg6 mode "${mode}" ${tunsrc} segs "${policy}" \
 			dev "${VRF_DEVNAME}"
 	fi
 }
@@ -500,13 +513,13 @@ __setup_rt_policy()
 # see __setup_rt_policy
 setup_rt_policy_ipv6()
 {
-	__setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6
+	__setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 "$6"
 }
 
 #see __setup_rt_policy
 setup_rt_policy_ipv4()
 {
-	__setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4
+	__setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 "$6"
 }
 
 setup_hs()
@@ -516,8 +529,8 @@ setup_hs()
 	local hsname
 	local rtname
 
-	hsname="$(get_hsname "${hs}")"
-	rtname="$(get_rtname "${rt}")"
+	eval hsname=\${$(get_hsname "${hs}")}
+	eval rtname=\${$(get_rtname "${rt}")}
 
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -555,11 +568,6 @@ setup_hs()
 	ip netns exec "${rtname}" \
 		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
 
-	# disable the rp_filter otherwise the kernel gets confused about how
-	# to route decap ipv4 packets.
-	ip netns exec "${rtname}" \
-		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
 	ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
 }
 
@@ -609,41 +617,41 @@ setup()
 	# the network path between hs-1 and hs-2 traverses several routers
 	# depending on the direction of traffic.
 	#
-	# Direction hs-1 -> hs-2 (H.Encaps.Red)
+	# Direction hs-1 -> hs-2 (H.Encaps.Red + tunsrc)
 	#  - rt-3,rt-4 (SRv6 End behaviors)
 	#  - rt-2 (SRv6 End.DT46 behavior)
 	#
 	# Direction hs-2 -> hs-1 (H.Encaps.Red)
 	#  - rt-1 (SRv6 End.DT46 behavior)
-	setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red
-	setup_rt_policy_ipv6 1 2 "" 1 encap.red
+	setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red true
+	setup_rt_policy_ipv6 1 2 "" 1 encap.red false
 
 	# create an IPv4 VPN between hosts hs-1 and hs-2
 	# the network path between hs-1 and hs-2 traverses several routers
 	# depending on the direction of traffic.
 	#
-	# Direction hs-1 -> hs-2 (H.Encaps.Red)
+	# Direction hs-1 -> hs-2 (H.Encaps.Red + tunsrc)
 	# - rt-2 (SRv6 End.DT46 behavior)
 	#
 	# Direction hs-2 -> hs-1 (H.Encaps.Red)
 	#  - rt-4,rt-3 (SRv6 End behaviors)
 	#  - rt-1 (SRv6 End.DT46 behavior)
-	setup_rt_policy_ipv4 2 1 "" 2 encap.red
-	setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red
+	setup_rt_policy_ipv4 2 1 "" 2 encap.red true
+	setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red false
 
 	# create an IPv6 VPN between hosts hs-3 and hs-4
 	# the network path between hs-3 and hs-4 traverses several routers
 	# depending on the direction of traffic.
 	#
-	# Direction hs-3 -> hs-4 (H.Encaps.Red)
+	# Direction hs-3 -> hs-4 (H.Encaps.Red + tunsrc)
 	# - rt-2 (SRv6 End Behavior)
 	# - rt-4 (SRv6 End.DT46 behavior)
 	#
 	# Direction hs-4 -> hs-3 (H.Encaps.Red)
 	#  - rt-1 (SRv6 End behavior)
 	#  - rt-3 (SRv6 End.DT46 behavior)
-	setup_rt_policy_ipv6 4 3 "2" 4 encap.red
-	setup_rt_policy_ipv6 3 4 "1" 3 encap.red
+	setup_rt_policy_ipv6 4 3 "2" 4 encap.red true
+	setup_rt_policy_ipv6 3 4 "1" 3 encap.red false
 
 	# testing environment was set up successfully
 	SETUP_ERR=0
@@ -656,7 +664,7 @@ check_rt_connectivity()
 	local prefix
 	local rtsrc_nsname
 
-	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+	eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
 
 	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
 
@@ -679,7 +687,7 @@ check_hs_ipv6_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -691,7 +699,7 @@ check_hs_ipv4_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
@@ -851,6 +859,38 @@ test_vrf_or_ksft_skip()
 	fi
 }
 
+# Before enabling tunsrc tests, make sure tunsrc and ip6tables are supported.
+check_tunsrc_support()
+{
+	setup_ns tunsrc_ns
+
+	ip -netns "${tunsrc_ns}" link add veth0 type veth \
+		peer name veth1 netns "${tunsrc_ns}"
+
+	ip -netns "${tunsrc_ns}" link set veth0 up
+
+	if ! ip -netns "${tunsrc_ns}" -6 route add fc00::dead:beef/128 \
+			encap seg6 mode encap.red tunsrc fc00::1 segs fc00::2 \
+			dev veth0 &>/dev/null; then
+		cleanup_ns "${tunsrc_ns}"
+		return
+	fi
+
+	if ! ip -netns "${tunsrc_ns}" -6 route show | grep -q "tunsrc"; then
+		cleanup_ns "${tunsrc_ns}"
+		return
+	fi
+
+	if ! ip netns exec "${tunsrc_ns}" ip6tables -t raw -A PREROUTING \
+			-d fc00::dead:beef -j DROP &>/dev/null; then
+		cleanup_ns "${tunsrc_ns}"
+		return
+	fi
+
+	cleanup_ns "${tunsrc_ns}"
+	HAS_TUNSRC=true
+}
+
 if [ "$(id -u)" -ne 0 ]; then
 	echo "SKIP: Need root privileges"
 	exit "${ksft_skip}"
@@ -868,6 +908,7 @@ test_vrf_or_ksft_skip
 set -e
 trap cleanup EXIT
 
+check_tunsrc_support
 setup
 set +e
 
diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
index cb4177d41b21..0979b5316fdf 100755
--- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
@@ -116,10 +116,8 @@
 #  hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b)
 #
 
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
 
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
 readonly DUMMY_DEVNAME="dum0"
 readonly RT2HS_DEVNAME="veth-hs"
 readonly HS_VETH_NAME="veth0"
@@ -199,32 +197,18 @@ test_command_or_ksft_skip()
 	fi
 }
 
-get_nodename()
-{
-	local name="$1"
-
-	echo "${name}-${RDMSUFF}"
-}
-
 get_rtname()
 {
 	local rtid="$1"
 
-	get_nodename "rt-${rtid}"
+	echo "rt_${rtid}"
 }
 
 get_hsname()
 {
 	local hsid="$1"
 
-	get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
-	local name="$1"
-
-	ip netns add "${name}"
+	echo "hs_${hsid}"
 }
 
 create_router()
@@ -233,8 +217,7 @@ create_router()
 	local nsname
 
 	nsname="$(get_rtname "${rtid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 create_host()
@@ -243,28 +226,12 @@ create_host()
 	local nsname
 
 	nsname="$(get_hsname "${hsid}")"
-
-	__create_namespace "${nsname}"
+	setup_ns "${nsname}"
 }
 
 cleanup()
 {
-	local nsname
-	local i
-
-	# destroy routers
-	for i in ${ROUTERS}; do
-		nsname="$(get_rtname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
-
-	# destroy hosts
-	for i in ${HOSTS}; do
-		nsname="$(get_hsname "${i}")"
-
-		ip netns del "${nsname}" &>/dev/null || true
-	done
+	cleanup_all_ns
 
 	# check whether the setup phase was completed successfully or not. In
 	# case of an error during the setup phase of the testing environment,
@@ -285,10 +252,10 @@ add_link_rt_pairs()
 	local nsname
 	local neigh_nsname
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
-		neigh_nsname="$(get_rtname "${neigh}")"
+		eval neigh_nsname=\${$(get_rtname "${neigh}")}
 
 		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
 			type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -320,7 +287,7 @@ setup_rt_networking()
 	local devname
 	local neigh
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -341,9 +308,6 @@ setup_rt_networking()
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
 	ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
 }
 
@@ -357,7 +321,7 @@ setup_rt_local_sids()
 	local nsname
 	local neigh
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	for neigh in ${rt_neighs}; do
 		devname="veth-rt-${rt}-${neigh}"
@@ -379,7 +343,7 @@ setup_rt_local_sids()
 		encap seg6local action End dev "${DUMMY_DEVNAME}"
 
 	# all SIDs for VPNs start with a common locator. Routes and SRv6
-	# Endpoint behaviors instaces are grouped together in the 'localsid'
+	# Endpoint behaviors instances are grouped together in the 'localsid'
 	# table.
 	ip -netns "${nsname}" -6 rule add \
 		to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -407,7 +371,7 @@ __setup_rt_policy()
 	local policy=''
 	local n
 
-	nsname="$(get_rtname "${encap_rt}")"
+	eval nsname=\${$(get_rtname "${encap_rt}")}
 
 	for n in ${end_rts}; do
 		policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
@@ -446,7 +410,7 @@ setup_decap()
 	local rt="$1"
 	local nsname
 
-	nsname="$(get_rtname "${rt}")"
+	eval nsname=\${$(get_rtname "${rt}")}
 
 	# Local End.DX2 behavior
 	ip -netns "${nsname}" -6 route \
@@ -463,8 +427,8 @@ setup_hs()
 	local hsname
 	local rtname
 
-	hsname="$(get_hsname "${hs}")"
-	rtname="$(get_rtname "${rt}")"
+	eval hsname=\${$(get_hsname "${hs}")}
+	eval rtname=\${$(get_rtname "${rt}")}
 
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
 	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -486,11 +450,6 @@ setup_hs()
 		add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
 
 	ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
-
-	# disable the rp_filter otherwise the kernel gets confused about how
-	# to route decap ipv4 packets.
-	ip netns exec "${rtname}" \
-		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
 }
 
 # set an auto-generated mac address
@@ -508,7 +467,7 @@ set_mac_address()
 	local ifname="$4"
 	local nsname
 
-	nsname=$(get_nodename "${nodename}")
+	eval nsname=\${${nodename}}
 
 	ip -netns "${nsname}" link set dev "${ifname}" down
 
@@ -532,7 +491,7 @@ set_host_l2peer()
 	local hssrc_name
 	local ipaddr
 
-	hssrc_name="$(get_hsname "${hssrc}")"
+	eval hssrc_name=\${$(get_hsname "${hssrc}")}
 
 	if [ "${proto}" -eq 6 ]; then
 		ipaddr="${ipprefix}::${hsdst}"
@@ -562,7 +521,7 @@ setup_l2vpn()
 	local rtdst="${hsdst}"
 
 	# set fixed mac for source node and the neigh MAC address
-	set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
+	set_mac_address "hs_${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
 	set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6
 	set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4
 
@@ -570,7 +529,7 @@ setup_l2vpn()
 	# to the mac address of the remote peer (L2 VPN destination host).
 	# Otherwise, traffic coming from the source host is dropped at the
 	# ingress router.
-	set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
+	set_mac_address "rt_${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
 
 	# set the SRv6 Policies at the ingress router
 	setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \
@@ -647,7 +606,7 @@ check_rt_connectivity()
 	local prefix
 	local rtsrc_nsname
 
-	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+	eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
 
 	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
 
@@ -670,7 +629,7 @@ check_hs_ipv6_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -682,7 +641,7 @@ check_hs_ipv4_connectivity()
 	local hsdst="$2"
 	local hssrc_nsname
 
-	hssrc_nsname="$(get_hsname "${hssrc}")"
+	eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
 
 	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
 		"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
diff --git a/tools/testing/selftests/net/srv6_iptunnel_cache.sh b/tools/testing/selftests/net/srv6_iptunnel_cache.sh
new file mode 100755
index 000000000000..62638ab679d9
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_iptunnel_cache.sh
@@ -0,0 +1,197 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+
+# This test verifies that the seg6 lwtunnel does not share the dst_cache
+# between the input (forwarding) and output (locally generated) paths.
+#
+# A shared dst_cache allows a forwarded packet to populate the cache and a
+# subsequent locally generated packet to silently reuse that entry, bypassing
+# its own route lookup. To expose this, the SID is made reachable only for
+# forwarded traffic (via an ip rule matching iif) and blackholed for everything
+# else. A local ping on ns_router must always hit the blackhole;
+# if it succeeds after a forwarded packet has populated the
+# cache, the bug is confirmed.
+#
+# Both forwarded and local packets are pinned to the same CPU with taskset,
+# since dst_cache is per-cpu.
+#
+#
+# +--------------------+                        +--------------------+
+# |      ns_src        |                        |      ns_dst        |
+# |                    |                        |                    |
+# |  veth-s0           |                        |           veth-d0  |
+# |  fd00::1/64        |                        |        fd01::2/64  |
+# +-------+------------+                        +----------+---------+
+#         |                                                |
+#         |            +--------------------+              |
+#         |            |     ns_router      |              |
+#         |            |                    |              |
+#         +------------+ veth-r0    veth-r1 +--------------+
+#                      | fd00::2    fd01::1 |
+#                      +--------------------+
+#
+#
+# ns_router: encap (main table)
+# +---------+---------------------------------------+
+# | dst     | action                                |
+# +---------+---------------------------------------+
+# | cafe::1 | encap seg6 mode encap segs fc00::100  |
+# +---------+---------------------------------------+
+#
+# ns_router: post-encap SID resolution
+# +-------+------------+----------------------------+
+# | table | dst        | action                     |
+# +-------+------------+----------------------------+
+# | 100   | fc00::100  | via fd01::2 dev veth-r1    |
+# +-------+------------+----------------------------+
+# | main  | fc00::100  | blackhole                  |
+# +-------+------------+----------------------------+
+#
+# ns_router: ip rule
+# +------------------+------------------------------+
+# | match            | action                       |
+# +------------------+------------------------------+
+# | iif veth-r0      | lookup 100                   |
+# +------------------+------------------------------+
+#
+# ns_dst: SRv6 decap (main table)
+# +--------------+----------------------------------+
+# | SID          | action                           |
+# +--------------+----------------------------------+
+# | fc00::100    | End.DT6 table 255 (local)        |
+# +--------------+----------------------------------+
+
+source lib.sh
+
+readonly SID="fc00::100"
+readonly DEST="cafe::1"
+
+readonly SRC_MAC="02:00:00:00:00:01"
+readonly RTR_R0_MAC="02:00:00:00:00:02"
+readonly RTR_R1_MAC="02:00:00:00:00:03"
+readonly DST_MAC="02:00:00:00:00:04"
+
+cleanup()
+{
+	cleanup_ns "${NS_SRC}" "${NS_RTR}" "${NS_DST}"
+}
+
+check_prerequisites()
+{
+	if ! command -v ip &>/dev/null; then
+		echo "SKIP: ip tool not found"
+		exit "${ksft_skip}"
+	fi
+
+	if ! command -v ping &>/dev/null; then
+		echo "SKIP: ping not found"
+		exit "${ksft_skip}"
+	fi
+
+	if ! command -v sysctl &>/dev/null; then
+		echo "SKIP: sysctl not found"
+		exit "${ksft_skip}"
+	fi
+
+	if ! command -v taskset &>/dev/null; then
+		echo "SKIP: taskset not found"
+		exit "${ksft_skip}"
+	fi
+}
+
+setup()
+{
+	setup_ns NS_SRC NS_RTR NS_DST
+
+	ip link add veth-s0 netns "${NS_SRC}" type veth \
+		peer name veth-r0 netns "${NS_RTR}"
+	ip link add veth-r1 netns "${NS_RTR}" type veth \
+		peer name veth-d0 netns "${NS_DST}"
+
+	ip -n "${NS_SRC}" link set veth-s0 address "${SRC_MAC}"
+	ip -n "${NS_RTR}" link set veth-r0 address "${RTR_R0_MAC}"
+	ip -n "${NS_RTR}" link set veth-r1 address "${RTR_R1_MAC}"
+	ip -n "${NS_DST}" link set veth-d0 address "${DST_MAC}"
+
+	# ns_src
+	ip -n "${NS_SRC}" link set veth-s0 up
+	ip -n "${NS_SRC}" addr add fd00::1/64 dev veth-s0 nodad
+	ip -n "${NS_SRC}" -6 route add "${DEST}"/128 via fd00::2
+
+	# ns_router
+	ip -n "${NS_RTR}" link set veth-r0 up
+	ip -n "${NS_RTR}" addr add fd00::2/64 dev veth-r0 nodad
+	ip -n "${NS_RTR}" link set veth-r1 up
+	ip -n "${NS_RTR}" addr add fd01::1/64 dev veth-r1 nodad
+	ip netns exec "${NS_RTR}" sysctl -qw net.ipv6.conf.all.forwarding=1
+
+	ip -n "${NS_RTR}" -6 route add "${DEST}"/128 \
+		encap seg6 mode encap segs "${SID}" dev veth-r0
+	ip -n "${NS_RTR}" -6 route add "${SID}"/128 table 100 \
+		via fd01::2 dev veth-r1
+	ip -n "${NS_RTR}" -6 route add blackhole "${SID}"/128
+	ip -n "${NS_RTR}" -6 rule add iif veth-r0 lookup 100
+
+	# ns_dst
+	ip -n "${NS_DST}" link set veth-d0 up
+	ip -n "${NS_DST}" addr add fd01::2/64 dev veth-d0 nodad
+	ip -n "${NS_DST}" addr add "${DEST}"/128 dev lo nodad
+	ip -n "${NS_DST}" -6 route add "${SID}"/128 \
+		encap seg6local action End.DT6 table 255 dev veth-d0
+	ip -n "${NS_DST}" -6 route add fd00::/64 via fd01::1
+
+	# static neighbors
+	ip -n "${NS_SRC}" -6 neigh add fd00::2 dev veth-s0 \
+		lladdr "${RTR_R0_MAC}" nud permanent
+	ip -n "${NS_RTR}" -6 neigh add fd00::1 dev veth-r0 \
+		lladdr "${SRC_MAC}" nud permanent
+	ip -n "${NS_RTR}" -6 neigh add fd01::2 dev veth-r1 \
+		lladdr "${DST_MAC}" nud permanent
+	ip -n "${NS_DST}" -6 neigh add fd01::1 dev veth-d0 \
+		lladdr "${RTR_R1_MAC}" nud permanent
+}
+
+test_cache_isolation()
+{
+	RET=0
+
+	# local ping with empty cache: must fail (SID is blackholed)
+	if ip netns exec "${NS_RTR}" taskset -c 0 \
+			ping -c 1 -W 2 "${DEST}" &>/dev/null; then
+		echo "SKIP: local ping succeeded, topology broken"
+		exit "${ksft_skip}"
+	fi
+
+	# forward from ns_src to populate the input cache
+	if ! ip netns exec "${NS_SRC}" taskset -c 0 \
+			ping -c 1 -W 2 "${DEST}" &>/dev/null; then
+		echo "SKIP: forwarded ping failed, topology broken"
+		exit "${ksft_skip}"
+	fi
+
+	# local ping again: must still fail; if the output path reuses
+	# the input cache, it bypasses the blackhole and the ping succeeds
+	if ip netns exec "${NS_RTR}" taskset -c 0 \
+			ping -c 1 -W 2 "${DEST}" &>/dev/null; then
+		echo "FAIL: output path used dst cached by input path"
+		RET="${ksft_fail}"
+	else
+		echo "PASS: output path dst_cache is independent"
+	fi
+
+	return "${RET}"
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+	echo "SKIP: Need root privileges"
+	exit "${ksft_skip}"
+fi
+
+trap cleanup EXIT
+
+check_prerequisites
+setup
+test_cache_isolation
+exit "${RET}"
diff --git a/tools/testing/selftests/net/tap.c b/tools/testing/selftests/net/tap.c
index 247c3b3ac1c9..a0c9418132c8 100644
--- a/tools/testing/selftests/net/tap.c
+++ b/tools/testing/selftests/net/tap.c
@@ -17,7 +17,7 @@
 #include <linux/virtio_net.h>
 #include <netinet/ip.h>
 #include <netinet/udp.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 static const char param_dev_tap_name[] = "xmacvtap0";
 static const char param_dev_dummy_name[] = "xdummy0";
@@ -56,18 +56,12 @@ static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
 static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type,
 				     const char *s)
 {
-	struct rtattr *rta = rtattr_add(nh, type, strlen(s));
+	unsigned int strsz = strlen(s) + 1;
+	struct rtattr *rta;
 
-	memcpy(RTA_DATA(rta), s, strlen(s));
-	return rta;
-}
-
-static struct rtattr *rtattr_add_strsz(struct nlmsghdr *nh, unsigned short type,
-				       const char *s)
-{
-	struct rtattr *rta = rtattr_add(nh, type, strlen(s) + 1);
+	rta = rtattr_add(nh, type, strsz);
 
-	strcpy(RTA_DATA(rta), s);
+	memcpy(RTA_DATA(rta), s, strsz);
 	return rta;
 }
 
@@ -119,7 +113,7 @@ static int dev_create(const char *dev, const char *link_type,
 
 	link_info = rtattr_begin(&req.nh, IFLA_LINKINFO);
 
-	rtattr_add_strsz(&req.nh, IFLA_INFO_KIND, link_type);
+	rtattr_add_str(&req.nh, IFLA_INFO_KIND, link_type);
 
 	if (fill_info_data) {
 		info_data = rtattr_begin(&req.nh, IFLA_INFO_DATA);
diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config
index 3605e38711cb..971cb6fa2d63 100644
--- a/tools/testing/selftests/net/tcp_ao/config
+++ b/tools/testing/selftests/net/tcp_ao/config
@@ -1,8 +1,8 @@
 CONFIG_CRYPTO_HMAC=y
 CONFIG_CRYPTO_RMD160=y
 CONFIG_CRYPTO_SHA1=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_NET_L3_MASTER_DEV=y
 CONFIG_NET_VRF=y
 CONFIG_TCP_AO=y
diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c
index d418162d335f..93b61e9a36f1 100644
--- a/tools/testing/selftests/net/tcp_ao/connect-deny.c
+++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c
@@ -4,6 +4,7 @@
 #include "aolib.h"
 
 #define fault(type)	(inj == FAULT_ ## type)
+static volatile int sk_pair;
 
 static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen,
 				      union tcp_addr in_addr, uint8_t prefix,
@@ -34,10 +35,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
 		       const char *cnt_name, test_cnt cnt_expected,
 		       fault_t inj)
 {
-	struct tcp_ao_counters ao_cnt1, ao_cnt2;
+	struct tcp_counters cnt1, cnt2;
 	uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
+	test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
 	int lsk, err, sk = 0;
-	time_t timeout;
 
 	lsk = test_listen_socket(this_ip_addr, port, 1);
 
@@ -46,21 +47,24 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
 
 	if (cnt_name)
 		before_cnt = netstat_get_one(cnt_name, NULL);
-	if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1))
-		test_error("test_get_tcp_ao_counters()");
+	if (pwd && test_get_tcp_counters(lsk, &cnt1))
+		test_error("test_get_tcp_counters()");
 
 	synchronize_threads(); /* preparations done */
 
-	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
-	err = test_wait_fd(lsk, timeout, 0);
+	err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair);
 	if (err == -ETIMEDOUT) {
+		sk_pair = err;
 		if (!fault(TIMEOUT))
-			test_fail("timed out for accept()");
+			test_fail("%s: timed out for accept()", tst_name);
+	} else if (err == -EKEYREJECTED) {
+		if (!fault(KEYREJECT))
+			test_fail("%s: key was rejected", tst_name);
 	} else if (err < 0) {
-		test_error("test_wait_fd()");
+		test_error("test_skpair_wait_poll()");
 	} else {
 		if (fault(TIMEOUT))
-			test_fail("ready to accept");
+			test_fail("%s: ready to accept", tst_name);
 
 		sk = accept(lsk, NULL, NULL);
 		if (sk < 0) {
@@ -72,13 +76,13 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
 	}
 
 	synchronize_threads(); /* before counter checks */
-	if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2))
-		test_error("test_get_tcp_ao_counters()");
+	if (pwd && test_get_tcp_counters(lsk, &cnt2))
+		test_error("test_get_tcp_counters()");
 
 	close(lsk);
 
 	if (pwd)
-		test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+		test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
 
 	if (!cnt_name)
 		goto out;
@@ -109,7 +113,7 @@ static void *server_fn(void *arg)
 
 	try_accept("Non-AO server + AO client", port++, NULL,
 		   this_ip_dest, -1, 100, 100, 0,
-		   "TCPAOKeyNotFound", 0, FAULT_TIMEOUT);
+		   "TCPAOKeyNotFound", TEST_CNT_NS_KEY_NOT_FOUND, FAULT_TIMEOUT);
 
 	try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD,
 		   this_ip_dest, -1, 100, 100, 0,
@@ -135,8 +139,9 @@ static void *server_fn(void *arg)
 		   wrong_addr, -1, 100, 100, 0,
 		   "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT);
 
+	/* Key rejected by the other side, failing short through skpair */
 	try_accept("Client: Wrong addr", port++, NULL,
-		   this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT);
+		   this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_KEYREJECT);
 
 	try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD,
 		   this_ip_dest, -1, 200, 100, 0,
@@ -163,8 +168,7 @@ static void try_connect(const char *tst_name, unsigned int port,
 			uint8_t sndid, uint8_t rcvid,
 			test_cnt cnt_expected, fault_t inj)
 {
-	struct tcp_ao_counters ao_cnt1, ao_cnt2;
-	time_t timeout;
+	struct tcp_counters cnt1, cnt2;
 	int sk, ret;
 
 	sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
@@ -174,16 +178,15 @@ static void try_connect(const char *tst_name, unsigned int port,
 	if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid))
 		test_error("setsockopt(TCP_AO_ADD_KEY)");
 
-	if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1))
-		test_error("test_get_tcp_ao_counters()");
+	if (pwd && test_get_tcp_counters(sk, &cnt1))
+		test_error("test_get_tcp_counters()");
 
 	synchronize_threads(); /* preparations done */
 
-	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
-	ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
-
+	ret = test_skpair_connect_poll(sk, this_ip_dest, port, cnt_expected, &sk_pair);
 	synchronize_threads(); /* before counter checks */
 	if (ret < 0) {
+		sk_pair = ret;
 		if (fault(KEYREJECT) && ret == -EKEYREJECTED) {
 			test_ok("%s: connect() was prevented", tst_name);
 		} else if (ret == -ETIMEDOUT && fault(TIMEOUT)) {
@@ -202,9 +205,11 @@ static void try_connect(const char *tst_name, unsigned int port,
 	else
 		test_ok("%s: connected", tst_name);
 	if (pwd && ret > 0) {
-		if (test_get_tcp_ao_counters(sk, &ao_cnt2))
-			test_error("test_get_tcp_ao_counters()");
-		test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+		if (test_get_tcp_counters(sk, &cnt2))
+			test_error("test_get_tcp_counters()");
+		test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
+	} else if (pwd) {
+		test_tcp_counters_free(&cnt1);
 	}
 out:
 	synchronize_threads(); /* close() */
@@ -241,6 +246,11 @@ static void *client_fn(void *arg)
 	try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD,
 			this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
 
+	/*
+	 * XXX: The test doesn't increase any counters, see tcp_make_synack().
+	 * Potentially, it can be speed up by setting sk_pair = -ETIMEDOUT
+	 * but the price would be increased complexity of the tracer thread.
+	 */
 	trace_ao_event_sk_expect(TCP_AO_SYNACK_NO_KEY, this_ip_dest, addr_any,
 				 port, 0, 100, 100);
 	try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD,
diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c
index f1d8d29e393f..340f00e979ea 100644
--- a/tools/testing/selftests/net/tcp_ao/connect.c
+++ b/tools/testing/selftests/net/tcp_ao/connect.c
@@ -35,7 +35,7 @@ static void *client_fn(void *arg)
 	uint64_t before_aogood, after_aogood;
 	const size_t nr_packets = 20;
 	struct netstat *ns_before, *ns_after;
-	struct tcp_ao_counters ao1, ao2;
+	struct tcp_counters ao1, ao2;
 
 	if (sk < 0)
 		test_error("socket()");
@@ -50,18 +50,18 @@ static void *client_fn(void *arg)
 
 	ns_before = netstat_read();
 	before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
-	if (test_get_tcp_ao_counters(sk, &ao1))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &ao1))
+		test_error("test_get_tcp_counters()");
 
-	if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+	if (test_client_verify(sk, 100, nr_packets)) {
 		test_fail("verify failed");
 		return NULL;
 	}
 
 	ns_after = netstat_read();
 	after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
-	if (test_get_tcp_ao_counters(sk, &ao2))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &ao2))
+		test_error("test_get_tcp_counters()");
 	netstat_print_diff(ns_before, ns_after);
 	netstat_free(ns_before);
 	netstat_free(ns_after);
@@ -71,14 +71,14 @@ static void *client_fn(void *arg)
 				nr_packets, after_aogood, before_aogood);
 		return NULL;
 	}
-	if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD))
+	if (test_assert_counters("connect", &ao1, &ao2, TEST_CNT_GOOD))
 		return NULL;
 
 	test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %zu",
-			before_aogood, ao1.ao_info_pkt_good,
-			ao1.key_cnts[0].pkt_good,
-			after_aogood, ao2.ao_info_pkt_good,
-			ao2.key_cnts[0].pkt_good,
+			before_aogood, ao1.ao.ao_info_pkt_good,
+			ao1.ao.key_cnts[0].pkt_good,
+			after_aogood, ao2.ao.ao_info_pkt_good,
+			ao2.ao.key_cnts[0].pkt_good,
 			nr_packets);
 	return NULL;
 }
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
index a1614f0d8c44..85c1a1e958c6 100644
--- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c
+++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
@@ -53,7 +53,7 @@ static void serve_interfered(int sk)
 	ssize_t test_quota = packet_size * packets_nr * 10;
 	uint64_t dest_unreach_a, dest_unreach_b;
 	uint64_t icmp_ignored_a, icmp_ignored_b;
-	struct tcp_ao_counters ao_cnt1, ao_cnt2;
+	struct tcp_counters cnt1, cnt2;
 	bool counter_not_found;
 	struct netstat *ns_after, *ns_before;
 	ssize_t bytes;
@@ -61,16 +61,16 @@ static void serve_interfered(int sk)
 	ns_before = netstat_read();
 	dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL);
 	icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL);
-	if (test_get_tcp_ao_counters(sk, &ao_cnt1))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &cnt1))
+		test_error("test_get_tcp_counters()");
 	bytes = test_server_run(sk, test_quota, 0);
 	ns_after = netstat_read();
 	netstat_print_diff(ns_before, ns_after);
 	dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL);
 	icmp_ignored_b = netstat_get(ns_after, tcpao_icmps,
 					&counter_not_found);
-	if (test_get_tcp_ao_counters(sk, &ao_cnt2))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &cnt2))
+		test_error("test_get_tcp_counters()");
 
 	netstat_free(ns_before);
 	netstat_free(ns_after);
@@ -91,9 +91,9 @@ static void serve_interfered(int sk)
 		return;
 	}
 #ifdef TEST_ICMPS_ACCEPT
-	test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD);
+	test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD);
 #else
-	test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP);
+	test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP);
 #endif
 	if (icmp_ignored_a >= icmp_ignored_b) {
 		test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64,
@@ -395,7 +395,6 @@ static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *d
 
 static void send_interfered(int sk)
 {
-	const unsigned int timeout = TEST_TIMEOUT_SEC;
 	struct sockaddr_in6 src, dst;
 	socklen_t addr_sz;
 
@@ -409,7 +408,7 @@ static void send_interfered(int sk)
 	while (1) {
 		uint32_t rcv_nxt;
 
-		if (test_client_verify(sk, packet_size, packets_nr, timeout)) {
+		if (test_client_verify(sk, packet_size, packets_nr)) {
 			test_fail("client: connection is broken");
 			return;
 		}
diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c
index d4385b52c10b..69d9a7a05d5c 100644
--- a/tools/testing/selftests/net/tcp_ao/key-management.c
+++ b/tools/testing/selftests/net/tcp_ao/key-management.c
@@ -629,11 +629,11 @@ static int key_collection_socket(bool server, unsigned int port)
 }
 
 static void verify_counters(const char *tst_name, bool is_listen_sk, bool server,
-			    struct tcp_ao_counters *a, struct tcp_ao_counters *b)
+			    struct tcp_counters *a, struct tcp_counters *b)
 {
 	unsigned int i;
 
-	__test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD);
+	test_assert_counters_sk(tst_name, a, b, TEST_CNT_GOOD);
 
 	for (i = 0; i < collection.nr_keys; i++) {
 		struct test_key *key = &collection.keys[i];
@@ -652,12 +652,12 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server
 			rx_cnt_expected = key->used_on_server_tx;
 		}
 
-		test_tcp_ao_key_counters_cmp(tst_name, a, b,
-					     rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0,
-					     sndid, rcvid);
+		test_assert_counters_key(tst_name, &a->ao, &b->ao,
+					 rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0,
+					 sndid, rcvid);
 	}
-	test_tcp_ao_counters_free(a);
-	test_tcp_ao_counters_free(b);
+	test_tcp_counters_free(a);
+	test_tcp_counters_free(b);
 	test_ok("%s: passed counters checks", tst_name);
 }
 
@@ -791,17 +791,17 @@ out:
 }
 
 static int start_server(const char *tst_name, unsigned int port, size_t quota,
-			struct tcp_ao_counters *begin,
+			struct tcp_counters *begin,
 			unsigned int current_index, unsigned int rnext_index)
 {
-	struct tcp_ao_counters lsk_c1, lsk_c2;
+	struct tcp_counters lsk_c1, lsk_c2;
 	ssize_t bytes;
 	int sk, lsk;
 
 	synchronize_threads(); /* 1: key collection initialized */
 	lsk = key_collection_socket(true, port);
-	if (test_get_tcp_ao_counters(lsk, &lsk_c1))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(lsk, &lsk_c1))
+		test_error("test_get_tcp_counters()");
 	synchronize_threads(); /* 2: MKTs added => connect() */
 	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
 		test_error("test_wait_fd()");
@@ -809,12 +809,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota,
 	sk = accept(lsk, NULL, NULL);
 	if (sk < 0)
 		test_error("accept()");
-	if (test_get_tcp_ao_counters(sk, begin))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, begin))
+		test_error("test_get_tcp_counters()");
 
 	synchronize_threads(); /* 3: accepted => send data */
-	if (test_get_tcp_ao_counters(lsk, &lsk_c2))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(lsk, &lsk_c2))
+		test_error("test_get_tcp_counters()");
 	verify_keys(tst_name, lsk, true, true);
 	close(lsk);
 
@@ -830,12 +830,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota,
 }
 
 static void end_server(const char *tst_name, int sk,
-		       struct tcp_ao_counters *begin)
+		       struct tcp_counters *begin)
 {
-	struct tcp_ao_counters end;
+	struct tcp_counters end;
 
-	if (test_get_tcp_ao_counters(sk, &end))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &end))
+		test_error("test_get_tcp_counters()");
 	verify_keys(tst_name, sk, false, true);
 
 	synchronize_threads(); /* 4: verified => closed */
@@ -848,7 +848,7 @@ static void end_server(const char *tst_name, int sk,
 static void try_server_run(const char *tst_name, unsigned int port, size_t quota,
 			   unsigned int current_index, unsigned int rnext_index)
 {
-	struct tcp_ao_counters tmp;
+	struct tcp_counters tmp;
 	int sk;
 
 	sk = start_server(tst_name, port, quota, &tmp,
@@ -860,7 +860,7 @@ static void server_rotations(const char *tst_name, unsigned int port,
 			     size_t quota, unsigned int rotations,
 			     unsigned int current_index, unsigned int rnext_index)
 {
-	struct tcp_ao_counters tmp;
+	struct tcp_counters tmp;
 	unsigned int i;
 	int sk;
 
@@ -886,7 +886,7 @@ static void server_rotations(const char *tst_name, unsigned int port,
 
 static int run_client(const char *tst_name, unsigned int port,
 		      unsigned int nr_keys, int current_index, int rnext_index,
-		      struct tcp_ao_counters *before,
+		      struct tcp_counters *before,
 		      const size_t msg_sz, const size_t msg_nr)
 {
 	int sk;
@@ -904,8 +904,8 @@ static int run_client(const char *tst_name, unsigned int port,
 		if (test_set_key(sk, sndid, rcvid))
 			test_error("failed to set current/rnext keys");
 	}
-	if (before && test_get_tcp_ao_counters(sk, before))
-		test_error("test_get_tcp_ao_counters()");
+	if (before && test_get_tcp_counters(sk, before))
+		test_error("test_get_tcp_counters()");
 
 	synchronize_threads(); /* 2: MKTs added => connect() */
 	if (test_connect_socket(sk, this_ip_dest, port++) <= 0)
@@ -918,11 +918,11 @@ static int run_client(const char *tst_name, unsigned int port,
 	collection.keys[rnext_index].used_on_server_tx = 1;
 
 	synchronize_threads(); /* 3: accepted => send data */
-	if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) {
+	if (test_client_verify(sk, msg_sz, msg_nr)) {
 		test_fail("verify failed");
 		close(sk);
 		if (before)
-			test_tcp_ao_counters_free(before);
+			test_tcp_counters_free(before);
 		return -1;
 	}
 
@@ -931,7 +931,7 @@ static int run_client(const char *tst_name, unsigned int port,
 
 static int start_client(const char *tst_name, unsigned int port,
 			unsigned int nr_keys, int current_index, int rnext_index,
-			struct tcp_ao_counters *before,
+			struct tcp_counters *before,
 			const size_t msg_sz, const size_t msg_nr)
 {
 	if (init_default_key_collection(nr_keys, true))
@@ -943,9 +943,9 @@ static int start_client(const char *tst_name, unsigned int port,
 
 static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
 		       int current_index, int rnext_index,
-		       struct tcp_ao_counters *start)
+		       struct tcp_counters *start)
 {
-	struct tcp_ao_counters end;
+	struct tcp_counters end;
 
 	/* Some application may become dependent on this kernel choice */
 	if (current_index < 0)
@@ -955,8 +955,8 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
 	verify_current_rnext(tst_name, sk,
 			     collection.keys[current_index].client_keyid,
 			     collection.keys[rnext_index].server_keyid);
-	if (start && test_get_tcp_ao_counters(sk, &end))
-		test_error("test_get_tcp_ao_counters()");
+	if (start && test_get_tcp_counters(sk, &end))
+		test_error("test_get_tcp_counters()");
 	verify_keys(tst_name, sk, false, false);
 	synchronize_threads(); /* 4: verify => closed */
 	close(sk);
@@ -1016,7 +1016,7 @@ static void try_unmatched_keys(int sk, int *rnext_index, unsigned int port)
 	trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_addr, this_ip_dest,
 			      -1, port, 0, -1, -1, -1, -1, -1,
 			      -1, key->server_keyid, -1);
-	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+	if (test_client_verify(sk, msg_len, nr_packets))
 		test_fail("verify failed");
 	*rnext_index = i;
 }
@@ -1048,7 +1048,7 @@ static void check_current_back(const char *tst_name, unsigned int port,
 			       unsigned int current_index, unsigned int rnext_index,
 			       unsigned int rotate_to_index)
 {
-	struct tcp_ao_counters tmp;
+	struct tcp_counters tmp;
 	int sk;
 
 	sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
@@ -1061,7 +1061,7 @@ static void check_current_back(const char *tst_name, unsigned int port,
 			      port, -1, 0, -1, -1, -1, -1, -1,
 			      collection.keys[rotate_to_index].client_keyid,
 			      collection.keys[current_index].client_keyid, -1);
-	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+	if (test_client_verify(sk, msg_len, nr_packets))
 		test_fail("verify failed");
 	/* There is a race here: between setting the current_key with
 	 * setsockopt(TCP_AO_INFO) and starting to send some data - there
@@ -1081,7 +1081,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
 			   unsigned int nr_keys, unsigned int rotations,
 			   unsigned int current_index, unsigned int rnext_index)
 {
-	struct tcp_ao_counters tmp;
+	struct tcp_counters tmp;
 	unsigned int i;
 	int sk;
 
@@ -1099,10 +1099,10 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
 				collection.keys[i].server_keyid, -1);
 		if (test_set_key(sk, -1, collection.keys[i].server_keyid))
 			test_error("Can't change the Rnext key");
-		if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+		if (test_client_verify(sk, msg_len, nr_packets)) {
 			test_fail("verify failed");
 			close(sk);
-			test_tcp_ao_counters_free(&tmp);
+			test_tcp_counters_free(&tmp);
 			return;
 		}
 		verify_current_rnext(tst_name, sk, -1,
@@ -1116,7 +1116,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
 static void try_client_run(const char *tst_name, unsigned int port,
 			   unsigned int nr_keys, int current_index, int rnext_index)
 {
-	struct tcp_ao_counters tmp;
+	struct tcp_counters tmp;
 	int sk;
 
 	sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
index 5db2f65cddc4..ebb2899c12fe 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h
+++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
@@ -289,7 +289,7 @@ extern int link_set_up(const char *intf);
 extern const unsigned int test_server_port;
 extern int test_wait_fd(int sk, time_t sec, bool write);
 extern int __test_connect_socket(int sk, const char *device,
-				 void *addr, size_t addr_sz, time_t timeout);
+				 void *addr, size_t addr_sz, bool async);
 extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz);
 
 static inline int test_listen_socket(const union tcp_addr taddr,
@@ -331,25 +331,26 @@ static inline int test_listen_socket(const union tcp_addr taddr,
  * If set to 0 - kernel will try to retransmit SYN number of times, set in
  * /proc/sys/net/ipv4/tcp_syn_retries
  * By default set to 1 to make tests pass faster on non-busy machine.
+ * [in process of removal, don't use in new tests]
  */
 #ifndef TEST_RETRANSMIT_SEC
 #define TEST_RETRANSMIT_SEC	1
 #endif
 
 static inline int _test_connect_socket(int sk, const union tcp_addr taddr,
-				       unsigned int port, time_t timeout)
+				       unsigned int port, bool async)
 {
 	sockaddr_af addr;
 
 	tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
 	return __test_connect_socket(sk, veth_name,
-				     (void *)&addr, sizeof(addr), timeout);
+				     (void *)&addr, sizeof(addr), async);
 }
 
 static inline int test_connect_socket(int sk, const union tcp_addr taddr,
 				      unsigned int port)
 {
-	return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC);
+	return _test_connect_socket(sk, taddr, port, false);
 }
 
 extern int __test_set_md5(int sk, void *addr, size_t addr_sz,
@@ -483,10 +484,7 @@ static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps)
 }
 
 extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec);
-extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
-				const size_t msg_len, time_t timeout_sec);
-extern int test_client_verify(int sk, const size_t msg_len, const size_t nr,
-			      time_t timeout_sec);
+extern int test_client_verify(int sk, const size_t msg_len, const size_t nr);
 
 struct tcp_ao_key_counters {
 	uint8_t sndid;
@@ -512,7 +510,15 @@ struct tcp_ao_counters {
 	size_t nr_keys;
 	struct tcp_ao_key_counters *key_cnts;
 };
-extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out);
+
+struct tcp_counters {
+	struct tcp_ao_counters ao;
+	uint64_t netns_md5_notfound;
+	uint64_t netns_md5_unexpected;
+	uint64_t netns_md5_failure;
+};
+
+extern int test_get_tcp_counters(int sk, struct tcp_counters *out);
 
 #define TEST_CNT_KEY_GOOD		BIT(0)
 #define TEST_CNT_KEY_BAD		BIT(1)
@@ -526,8 +532,31 @@ extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out);
 #define TEST_CNT_NS_KEY_NOT_FOUND	BIT(9)
 #define TEST_CNT_NS_AO_REQUIRED		BIT(10)
 #define TEST_CNT_NS_DROPPED_ICMP	BIT(11)
+#define TEST_CNT_NS_MD5_NOT_FOUND	BIT(12)
+#define TEST_CNT_NS_MD5_UNEXPECTED	BIT(13)
+#define TEST_CNT_NS_MD5_FAILURE		BIT(14)
 typedef uint16_t test_cnt;
 
+#define _for_each_counter(f)						\
+do {									\
+	/* per-netns */							\
+	f(ao.netns_ao_good,		TEST_CNT_NS_GOOD);		\
+	f(ao.netns_ao_bad,		TEST_CNT_NS_BAD);		\
+	f(ao.netns_ao_key_not_found,	TEST_CNT_NS_KEY_NOT_FOUND);	\
+	f(ao.netns_ao_required,		TEST_CNT_NS_AO_REQUIRED);	\
+	f(ao.netns_ao_dropped_icmp,	TEST_CNT_NS_DROPPED_ICMP);	\
+	/* per-socket */						\
+	f(ao.ao_info_pkt_good,		TEST_CNT_SOCK_GOOD);		\
+	f(ao.ao_info_pkt_bad,		TEST_CNT_SOCK_BAD);		\
+	f(ao.ao_info_pkt_key_not_found,	TEST_CNT_SOCK_KEY_NOT_FOUND);	\
+	f(ao.ao_info_pkt_ao_required,	TEST_CNT_SOCK_AO_REQUIRED);	\
+	f(ao.ao_info_pkt_dropped_icmp,	TEST_CNT_SOCK_DROPPED_ICMP);	\
+	/* non-AO */							\
+	f(netns_md5_notfound,		TEST_CNT_NS_MD5_NOT_FOUND);	\
+	f(netns_md5_unexpected,		TEST_CNT_NS_MD5_UNEXPECTED);	\
+	f(netns_md5_failure,		TEST_CNT_NS_MD5_FAILURE);	\
+} while (0)
+
 #define TEST_CNT_AO_GOOD		(TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD)
 #define TEST_CNT_AO_BAD			(TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD)
 #define TEST_CNT_AO_KEY_NOT_FOUND	(TEST_CNT_SOCK_KEY_NOT_FOUND | \
@@ -539,34 +568,71 @@ typedef uint16_t test_cnt;
 #define TEST_CNT_GOOD			(TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD)
 #define TEST_CNT_BAD			(TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD)
 
-extern int __test_tcp_ao_counters_cmp(const char *tst_name,
-		struct tcp_ao_counters *before, struct tcp_ao_counters *after,
+extern test_cnt test_cmp_counters(struct tcp_counters *before,
+				  struct tcp_counters *after);
+extern int test_assert_counters_sk(const char *tst_name,
+		struct tcp_counters *before, struct tcp_counters *after,
 		test_cnt expected);
-extern int test_tcp_ao_key_counters_cmp(const char *tst_name,
+extern int test_assert_counters_key(const char *tst_name,
 		struct tcp_ao_counters *before, struct tcp_ao_counters *after,
 		test_cnt expected, int sndid, int rcvid);
-extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts);
+extern void test_tcp_counters_free(struct tcp_counters *cnts);
+
+/*
+ * Polling for netns and socket counters during select()/connect() and also
+ * client/server messaging. Instead of constant timeout on underlying select(),
+ * check the counters and return early. This allows to pass the tests where
+ * timeout is expected without waiting for that fixing timeout (tests speed-up).
+ * Previously shorter timeouts were used for tests expecting to time out,
+ * but that leaded to sporadic false positives on counter checks failures,
+ * as one second timeouts aren't enough for TCP retransmit.
+ *
+ * Two sides of the socketpair (client/server) should synchronize failures
+ * using a shared variable *err, so that they can detect the other side's
+ * failure.
+ */
+extern int test_skpair_wait_poll(int sk, bool write, test_cnt cond,
+				 volatile int *err);
+extern int _test_skpair_connect_poll(int sk, const char *device,
+				     void *addr, size_t addr_sz,
+				     test_cnt cond, volatile int *err);
+static inline int test_skpair_connect_poll(int sk, const union tcp_addr taddr,
+					   unsigned int port,
+					   test_cnt cond, volatile int *err)
+{
+	sockaddr_af addr;
+
+	tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
+	return _test_skpair_connect_poll(sk, veth_name,
+					 (void *)&addr, sizeof(addr), cond, err);
+}
+
+extern int test_skpair_client(int sk, const size_t msg_len, const size_t nr,
+			      test_cnt cond, volatile int *err);
+extern int test_skpair_server(int sk, ssize_t quota,
+			      test_cnt cond, volatile int *err);
+
 /*
- * Frees buffers allocated in test_get_tcp_ao_counters().
+ * Frees buffers allocated in test_get_tcp_counters().
  * The function doesn't expect new keys or keys removed between calls
- * to test_get_tcp_ao_counters(). Check key counters manually if they
+ * to test_get_tcp_counters(). Check key counters manually if they
  * may change.
  */
-static inline int test_tcp_ao_counters_cmp(const char *tst_name,
-					   struct tcp_ao_counters *before,
-					   struct tcp_ao_counters *after,
-					   test_cnt expected)
+static inline int test_assert_counters(const char *tst_name,
+				       struct tcp_counters *before,
+				       struct tcp_counters *after,
+				       test_cnt expected)
 {
 	int ret;
 
-	ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected);
+	ret = test_assert_counters_sk(tst_name, before, after, expected);
 	if (ret)
 		goto out;
-	ret = test_tcp_ao_key_counters_cmp(tst_name, before, after,
-					   expected, -1, -1);
+	ret = test_assert_counters_key(tst_name, &before->ao, &after->ao,
+				       expected, -1, -1);
 out:
-	test_tcp_ao_counters_free(before);
-	test_tcp_ao_counters_free(after);
+	test_tcp_counters_free(before);
+	test_tcp_counters_free(after);
 	return ret;
 }
 
diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
index 24380c68fec6..27403f875054 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
@@ -427,11 +427,8 @@ static void dump_trace_event(struct expected_trace_point *e)
 	test_print("trace event filter %s [%s:%d => %s:%d, L3index %d, flags: %s%s%s%s%s, keyid: %d, rnext: %d, maclen: %d, sne: %d] = %zu",
 		   trace_event_names[e->type],
 		   src, e->src_port, dst, e->dst_port, e->L3index,
-		   (e->fin > 0) ? "F" : (e->fin == 0) ? "!F" : "",
-		   (e->syn > 0) ? "S" : (e->syn == 0) ? "!S" : "",
-		   (e->rst > 0) ? "R" : (e->rst == 0) ? "!R" : "",
-		   (e->psh > 0) ? "P" : (e->psh == 0) ? "!P" : "",
-		   (e->ack > 0) ? "." : (e->ack == 0) ? "!." : "",
+		   e->fin ? "F" : "", e->syn ? "S" : "", e->rst ? "R" : "",
+		   e->psh ? "P" : "", e->ack ? "." : "",
 		   e->keyid, e->rnext, e->maclen, e->sne, e->matched);
 }
 
diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c
index a27cc03c9fbd..49aec2922a31 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/setup.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c
@@ -9,7 +9,7 @@
  * Can't be included in the header: it defines static variables which
  * will be unique to every object. Let's include it only once here.
  */
-#include "../../../kselftest.h"
+#include "kselftest.h"
 
 /* Prevent overriding of one thread's output by another */
 static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER;
diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c
index 0ffda966c677..ef8e9031d47a 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/sock.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c
@@ -34,10 +34,8 @@ int __test_listen_socket(int backlog, void *addr, size_t addr_sz)
 	return sk;
 }
 
-int test_wait_fd(int sk, time_t sec, bool write)
+static int __test_wait_fd(int sk, struct timeval *tv, bool write)
 {
-	struct timeval tv = { .tv_sec = sec };
-	struct timeval *ptv = NULL;
 	fd_set fds, efds;
 	int ret;
 	socklen_t slen = sizeof(ret);
@@ -47,14 +45,11 @@ int test_wait_fd(int sk, time_t sec, bool write)
 	FD_ZERO(&efds);
 	FD_SET(sk, &efds);
 
-	if (sec)
-		ptv = &tv;
-
 	errno = 0;
 	if (write)
-		ret = select(sk + 1, NULL, &fds, &efds, ptv);
+		ret = select(sk + 1, NULL, &fds, &efds, tv);
 	else
-		ret = select(sk + 1, &fds, NULL, &efds, ptv);
+		ret = select(sk + 1, &fds, NULL, &efds, tv);
 	if (ret < 0)
 		return -errno;
 	if (ret == 0) {
@@ -69,8 +64,54 @@ int test_wait_fd(int sk, time_t sec, bool write)
 	return 0;
 }
 
+int test_wait_fd(int sk, time_t sec, bool write)
+{
+	struct timeval tv = { .tv_sec = sec, };
+
+	return __test_wait_fd(sk, sec ? &tv : NULL, write);
+}
+
+static bool __skpair_poll_should_stop(int sk, struct tcp_counters *c,
+				      test_cnt condition)
+{
+	struct tcp_counters c2;
+	test_cnt diff;
+
+	if (test_get_tcp_counters(sk, &c2))
+		test_error("test_get_tcp_counters()");
+
+	diff = test_cmp_counters(c, &c2);
+	test_tcp_counters_free(&c2);
+	return (diff & condition) == condition;
+}
+
+/* How often wake up and check netns counters & paired (*err) */
+#define POLL_USEC	150
+static int __test_skpair_poll(int sk, bool write, uint64_t timeout,
+			      struct tcp_counters *c, test_cnt cond,
+			      volatile int *err)
+{
+	uint64_t t;
+
+	for (t = 0; t <= timeout * 1000000; t += POLL_USEC) {
+		struct timeval tv = { .tv_usec = POLL_USEC, };
+		int ret;
+
+		ret = __test_wait_fd(sk, &tv, write);
+		if (ret != -ETIMEDOUT)
+			return ret;
+		if (c && cond && __skpair_poll_should_stop(sk, c, cond))
+			break;
+		if (err && *err)
+			return *err;
+	}
+	if (err)
+		*err = -ETIMEDOUT;
+	return -ETIMEDOUT;
+}
+
 int __test_connect_socket(int sk, const char *device,
-			  void *addr, size_t addr_sz, time_t timeout)
+			  void *addr, size_t addr_sz, bool async)
 {
 	long flags;
 	int err;
@@ -82,15 +123,6 @@ int __test_connect_socket(int sk, const char *device,
 			test_error("setsockopt(SO_BINDTODEVICE, %s)", device);
 	}
 
-	if (!timeout) {
-		err = connect(sk, addr, addr_sz);
-		if (err) {
-			err = -errno;
-			goto out;
-		}
-		return 0;
-	}
-
 	flags = fcntl(sk, F_GETFL);
 	if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0))
 		test_error("fcntl()");
@@ -100,9 +132,9 @@ int __test_connect_socket(int sk, const char *device,
 			err = -errno;
 			goto out;
 		}
-		if (timeout < 0)
+		if (async)
 			return sk;
-		err = test_wait_fd(sk, timeout, 1);
+		err = test_wait_fd(sk, TEST_TIMEOUT_SEC, 1);
 		if (err)
 			goto out;
 	}
@@ -113,6 +145,45 @@ out:
 	return err;
 }
 
+int test_skpair_wait_poll(int sk, bool write,
+			  test_cnt cond, volatile int *err)
+{
+	struct tcp_counters c;
+	int ret;
+
+	*err = 0;
+	if (test_get_tcp_counters(sk, &c))
+		test_error("test_get_tcp_counters()");
+	synchronize_threads(); /* 1: init skpair & read nscounters */
+
+	ret = __test_skpair_poll(sk, write, TEST_TIMEOUT_SEC, &c, cond, err);
+	test_tcp_counters_free(&c);
+	return ret;
+}
+
+int _test_skpair_connect_poll(int sk, const char *device,
+			      void *addr, size_t addr_sz,
+			      test_cnt condition, volatile int *err)
+{
+	struct tcp_counters c;
+	int ret;
+
+	*err = 0;
+	if (test_get_tcp_counters(sk, &c))
+		test_error("test_get_tcp_counters()");
+	synchronize_threads(); /* 1: init skpair & read nscounters */
+	ret = __test_connect_socket(sk, device, addr, addr_sz, true);
+	if (ret < 0) {
+		test_tcp_counters_free(&c);
+		return (*err = ret);
+	}
+	ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, &c, condition, err);
+	if (ret < 0)
+		close(sk);
+	test_tcp_counters_free(&c);
+	return ret;
+}
+
 int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix,
 		   int vrf, const char *password)
 {
@@ -333,12 +404,12 @@ do {									\
 	return 0;
 }
 
-int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
+int test_get_tcp_counters(int sk, struct tcp_counters *out)
 {
 	struct tcp_ao_getsockopt *key_dump;
 	socklen_t key_dump_sz = sizeof(*key_dump);
 	struct tcp_ao_info_opt info = {};
-	bool c1, c2, c3, c4, c5;
+	bool c1, c2, c3, c4, c5, c6, c7, c8;
 	struct netstat *ns;
 	int err, nr_keys;
 
@@ -346,25 +417,30 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
 
 	/* per-netns */
 	ns = netstat_read();
-	out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1);
-	out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2);
-	out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3);
-	out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4);
-	out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5);
+	out->ao.netns_ao_good = netstat_get(ns, "TCPAOGood", &c1);
+	out->ao.netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2);
+	out->ao.netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3);
+	out->ao.netns_ao_required = netstat_get(ns, "TCPAORequired", &c4);
+	out->ao.netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5);
+	out->netns_md5_notfound = netstat_get(ns, "TCPMD5NotFound", &c6);
+	out->netns_md5_unexpected = netstat_get(ns, "TCPMD5Unexpected", &c7);
+	out->netns_md5_failure = netstat_get(ns, "TCPMD5Failure", &c8);
 	netstat_free(ns);
-	if (c1 || c2 || c3 || c4 || c5)
+	if (c1 || c2 || c3 || c4 || c5 || c6 || c7 || c8)
 		return -EOPNOTSUPP;
 
 	err = test_get_ao_info(sk, &info);
+	if (err == -ENOENT)
+		return 0;
 	if (err)
 		return err;
 
 	/* per-socket */
-	out->ao_info_pkt_good		= info.pkt_good;
-	out->ao_info_pkt_bad		= info.pkt_bad;
-	out->ao_info_pkt_key_not_found	= info.pkt_key_not_found;
-	out->ao_info_pkt_ao_required	= info.pkt_ao_required;
-	out->ao_info_pkt_dropped_icmp	= info.pkt_dropped_icmp;
+	out->ao.ao_info_pkt_good = info.pkt_good;
+	out->ao.ao_info_pkt_bad = info.pkt_bad;
+	out->ao.ao_info_pkt_key_not_found = info.pkt_key_not_found;
+	out->ao.ao_info_pkt_ao_required = info.pkt_ao_required;
+	out->ao.ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp;
 
 	/* per-key */
 	nr_keys = test_get_ao_keys_nr(sk);
@@ -372,7 +448,7 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
 		return nr_keys;
 	if (nr_keys == 0)
 		test_error("test_get_ao_keys_nr() == 0");
-	out->nr_keys = (size_t)nr_keys;
+	out->ao.nr_keys = (size_t)nr_keys;
 	key_dump = calloc(nr_keys, key_dump_sz);
 	if (!key_dump)
 		return -errno;
@@ -386,72 +462,84 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
 		return -errno;
 	}
 
-	out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0]));
-	if (!out->key_cnts) {
+	out->ao.key_cnts = calloc(nr_keys, sizeof(out->ao.key_cnts[0]));
+	if (!out->ao.key_cnts) {
 		free(key_dump);
 		return -errno;
 	}
 
 	while (nr_keys--) {
-		out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid;
-		out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid;
-		out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good;
-		out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad;
+		out->ao.key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid;
+		out->ao.key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid;
+		out->ao.key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good;
+		out->ao.key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad;
 	}
 	free(key_dump);
 
 	return 0;
 }
 
-int __test_tcp_ao_counters_cmp(const char *tst_name,
-			       struct tcp_ao_counters *before,
-			       struct tcp_ao_counters *after,
-			       test_cnt expected)
+test_cnt test_cmp_counters(struct tcp_counters *before,
+			   struct tcp_counters *after)
+{
+#define __cmp(cnt, e_cnt)						\
+do {									\
+	if (before->cnt > after->cnt)					\
+		test_error("counter " __stringify(cnt) " decreased");	\
+	if (before->cnt != after->cnt)					\
+		ret |= e_cnt;						\
+} while (0)
+
+	test_cnt ret = 0;
+	size_t i;
+
+	if (before->ao.nr_keys != after->ao.nr_keys)
+		test_error("the number of keys has changed");
+
+	_for_each_counter(__cmp);
+
+	i = before->ao.nr_keys;
+	while (i--) {
+		__cmp(ao.key_cnts[i].pkt_good, TEST_CNT_KEY_GOOD);
+		__cmp(ao.key_cnts[i].pkt_bad, TEST_CNT_KEY_BAD);
+	}
+#undef __cmp
+	return ret;
+}
+
+int test_assert_counters_sk(const char *tst_name,
+			    struct tcp_counters *before,
+			    struct tcp_counters *after,
+			    test_cnt expected)
 {
-#define __cmp_ao(cnt, expecting_inc)					\
+#define __cmp_ao(cnt, e_cnt)						\
 do {									\
 	if (before->cnt > after->cnt) {					\
 		test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \
-			  tst_name ?: "", before->cnt, after->cnt);		\
+			  tst_name ?: "", before->cnt, after->cnt);	\
 		return -1;						\
 	}								\
-	if ((before->cnt != after->cnt) != (expecting_inc)) {		\
+	if ((before->cnt != after->cnt) != !!(expected & e_cnt)) {	\
 		test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \
-			  tst_name ?: "", (expecting_inc) ? "" : "not ",	\
+			  tst_name ?: "", (expected & e_cnt) ? "" : "not ",	\
 			  before->cnt, after->cnt);			\
 		return -1;						\
 	}								\
-} while(0)
+} while (0)
 
 	errno = 0;
-	/* per-netns */
-	__cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD));
-	__cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD));
-	__cmp_ao(netns_ao_key_not_found,
-		 !!(expected & TEST_CNT_NS_KEY_NOT_FOUND));
-	__cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED));
-	__cmp_ao(netns_ao_dropped_icmp,
-		 !!(expected & TEST_CNT_NS_DROPPED_ICMP));
-	/* per-socket */
-	__cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD));
-	__cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD));
-	__cmp_ao(ao_info_pkt_key_not_found,
-		 !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND));
-	__cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED));
-	__cmp_ao(ao_info_pkt_dropped_icmp,
-		 !!(expected & TEST_CNT_SOCK_DROPPED_ICMP));
+	_for_each_counter(__cmp_ao);
 	return 0;
 #undef __cmp_ao
 }
 
-int test_tcp_ao_key_counters_cmp(const char *tst_name,
-				 struct tcp_ao_counters *before,
-				 struct tcp_ao_counters *after,
-				 test_cnt expected,
-				 int sndid, int rcvid)
+int test_assert_counters_key(const char *tst_name,
+			     struct tcp_ao_counters *before,
+			     struct tcp_ao_counters *after,
+			     test_cnt expected, int sndid, int rcvid)
 {
 	size_t i;
-#define __cmp_ao(i, cnt, expecting_inc)					\
+#define __cmp_ao(i, cnt, e_cnt)					\
 do {									\
 	if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) {		\
 		test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \
@@ -461,16 +549,16 @@ do {									\
 			  before->key_cnts[i].rcvid);			\
 		return -1;						\
 	}								\
-	if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) {		\
+	if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != !!(expected & e_cnt)) {		\
 		test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \
-			  tst_name ?: "", (expecting_inc) ? "" : "not ",\
+			  tst_name ?: "", (expected & e_cnt) ? "" : "not ",\
 			  before->key_cnts[i].cnt,			\
 			  after->key_cnts[i].cnt,			\
 			  before->key_cnts[i].sndid,			\
 			  before->key_cnts[i].rcvid);			\
 		return -1;						\
 	}								\
-} while(0)
+} while (0)
 
 	if (before->nr_keys != after->nr_keys) {
 		test_fail("%s: Keys changed on the socket %zu != %zu",
@@ -485,20 +573,22 @@ do {									\
 			continue;
 		if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid)
 			continue;
-		__cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD));
-		__cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD));
+		__cmp_ao(i, pkt_good, TEST_CNT_KEY_GOOD);
+		__cmp_ao(i, pkt_bad, TEST_CNT_KEY_BAD);
 	}
 	return 0;
 #undef __cmp_ao
 }
 
-void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts)
+void test_tcp_counters_free(struct tcp_counters *cnts)
 {
-	free(cnts->key_cnts);
+	free(cnts->ao.key_cnts);
 }
 
 #define TEST_BUF_SIZE 4096
-ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
+static ssize_t _test_server_run(int sk, ssize_t quota, struct tcp_counters *c,
+				test_cnt cond, volatile int *err,
+				time_t timeout_sec)
 {
 	ssize_t total = 0;
 
@@ -507,7 +597,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
 		ssize_t bytes, sent;
 		int ret;
 
-		ret = test_wait_fd(sk, timeout_sec, 0);
+		ret = __test_skpair_poll(sk, 0, timeout_sec, c, cond, err);
 		if (ret)
 			return ret;
 
@@ -518,7 +608,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
 		if (bytes == 0)
 			break;
 
-		ret = test_wait_fd(sk, timeout_sec, 1);
+		ret = __test_skpair_poll(sk, 1, timeout_sec, c, cond, err);
 		if (ret)
 			return ret;
 
@@ -533,13 +623,41 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
 	return total;
 }
 
-ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
-			 const size_t msg_len, time_t timeout_sec)
+ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
+{
+	return _test_server_run(sk, quota, NULL, 0, NULL,
+				timeout_sec ?: TEST_TIMEOUT_SEC);
+}
+
+int test_skpair_server(int sk, ssize_t quota, test_cnt cond, volatile int *err)
+{
+	struct tcp_counters c;
+	ssize_t ret;
+
+	*err = 0;
+	if (test_get_tcp_counters(sk, &c))
+		test_error("test_get_tcp_counters()");
+	synchronize_threads(); /* 1: init skpair & read nscounters */
+
+	ret = _test_server_run(sk, quota, &c, cond, err, TEST_TIMEOUT_SEC);
+	test_tcp_counters_free(&c);
+	return ret;
+}
+
+static ssize_t test_client_loop(int sk, size_t buf_sz, const size_t msg_len,
+				struct tcp_counters *c, test_cnt cond,
+				volatile int *err)
 {
 	char msg[msg_len];
 	int nodelay = 1;
+	char *buf;
 	size_t i;
 
+	buf = alloca(buf_sz);
+	if (!buf)
+		return -ENOMEM;
+	randomize_buffer(buf, buf_sz);
+
 	if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay)))
 		test_error("setsockopt(TCP_NODELAY)");
 
@@ -547,7 +665,7 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
 		size_t sent, bytes = min(msg_len, buf_sz - i);
 		int ret;
 
-		ret = test_wait_fd(sk, timeout_sec, 1);
+		ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, c, cond, err);
 		if (ret)
 			return ret;
 
@@ -561,7 +679,8 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
 		do {
 			ssize_t got;
 
-			ret = test_wait_fd(sk, timeout_sec, 0);
+			ret = __test_skpair_poll(sk, 0, TEST_TIMEOUT_SEC,
+						 c, cond, err);
 			if (ret)
 				return ret;
 
@@ -580,15 +699,31 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
 	return i;
 }
 
-int test_client_verify(int sk, const size_t msg_len, const size_t nr,
-		       time_t timeout_sec)
+int test_client_verify(int sk, const size_t msg_len, const size_t nr)
 {
 	size_t buf_sz = msg_len * nr;
-	char *buf = alloca(buf_sz);
 	ssize_t ret;
 
-	randomize_buffer(buf, buf_sz);
-	ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec);
+	ret = test_client_loop(sk, buf_sz, msg_len, NULL, 0, NULL);
+	if (ret < 0)
+		return (int)ret;
+	return ret != buf_sz ? -1 : 0;
+}
+
+int test_skpair_client(int sk, const size_t msg_len, const size_t nr,
+		       test_cnt cond, volatile int *err)
+{
+	struct tcp_counters c;
+	size_t buf_sz = msg_len * nr;
+	ssize_t ret;
+
+	*err = 0;
+	if (test_get_tcp_counters(sk, &c))
+		test_error("test_get_tcp_counters()");
+	synchronize_threads(); /* 1: init skpair & read nscounters */
+
+	ret = test_client_loop(sk, buf_sz, msg_len, &c, cond, err);
+	test_tcp_counters_free(&c);
 	if (ret < 0)
 		return (int)ret;
 	return ret != buf_sz ? -1 : 0;
diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c
index ecc6f1e3a414..9a059b6c4523 100644
--- a/tools/testing/selftests/net/tcp_ao/restore.c
+++ b/tools/testing/selftests/net/tcp_ao/restore.c
@@ -16,11 +16,11 @@ const size_t quota = nr_packets * msg_len;
 static void try_server_run(const char *tst_name, unsigned int port,
 			   fault_t inj, test_cnt cnt_expected)
 {
+	test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
 	const char *cnt_name = "TCPAOGood";
-	struct tcp_ao_counters ao1, ao2;
+	struct tcp_counters cnt1, cnt2;
 	uint64_t before_cnt, after_cnt;
-	int sk, lsk;
-	time_t timeout;
+	int sk, lsk, dummy;
 	ssize_t bytes;
 
 	if (fault(TIMEOUT))
@@ -48,11 +48,10 @@ static void try_server_run(const char *tst_name, unsigned int port,
 	}
 
 	before_cnt = netstat_get_one(cnt_name, NULL);
-	if (test_get_tcp_ao_counters(sk, &ao1))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &cnt1))
+		test_error("test_get_tcp_counters()");
 
-	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
-	bytes = test_server_run(sk, quota, timeout);
+	bytes = test_skpair_server(sk, quota, poll_cnt, &dummy);
 	if (fault(TIMEOUT)) {
 		if (bytes > 0)
 			test_fail("%s: server served: %zd", tst_name, bytes);
@@ -65,17 +64,17 @@ static void try_server_run(const char *tst_name, unsigned int port,
 			test_ok("%s: server alive", tst_name);
 	}
 	synchronize_threads(); /* 3: counters checks */
-	if (test_get_tcp_ao_counters(sk, &ao2))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &cnt2))
+		test_error("test_get_tcp_counters()");
 	after_cnt = netstat_get_one(cnt_name, NULL);
 
-	test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+	test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
 
 	if (after_cnt <= before_cnt) {
-		test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
-				tst_name, cnt_name, after_cnt, before_cnt);
+		test_fail("%s(server): %s counter did not increase: %" PRIu64 " <= %" PRIu64,
+			  tst_name, cnt_name, after_cnt, before_cnt);
 	} else {
-		test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
+		test_ok("%s(server): counter %s increased %" PRIu64 " => %" PRIu64,
 			tst_name, cnt_name, before_cnt, after_cnt);
 	}
 
@@ -92,16 +91,16 @@ static void *server_fn(void *arg)
 {
 	unsigned int port = test_server_port;
 
-	try_server_run("TCP-AO migrate to another socket", port++,
+	try_server_run("TCP-AO migrate to another socket (server)", port++,
 		       0, TEST_CNT_GOOD);
-	try_server_run("TCP-AO with wrong send ISN", port++,
+	try_server_run("TCP-AO with wrong send ISN (server)", port++,
 		       FAULT_TIMEOUT, TEST_CNT_BAD);
-	try_server_run("TCP-AO with wrong receive ISN", port++,
+	try_server_run("TCP-AO with wrong receive ISN (server)", port++,
 		       FAULT_TIMEOUT, TEST_CNT_BAD);
-	try_server_run("TCP-AO with wrong send SEQ ext number", port++,
+	try_server_run("TCP-AO with wrong send SEQ ext number (server)", port++,
 		       FAULT_TIMEOUT, TEST_CNT_BAD);
-	try_server_run("TCP-AO with wrong receive SEQ ext number", port++,
-		       FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD);
+	try_server_run("TCP-AO with wrong receive SEQ ext number (server)",
+		       port++, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD);
 
 	synchronize_threads(); /* don't race to exit: client exits */
 	return NULL;
@@ -125,7 +124,7 @@ static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr,
 		test_error("failed to connect()");
 
 	synchronize_threads(); /* 2: accepted => send data */
-	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+	if (test_client_verify(sk, msg_len, nr_packets))
 		test_fail("pre-migrate verify failed");
 
 	test_enable_repair(sk);
@@ -139,11 +138,11 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
 			    struct tcp_ao_repair *ao_img,
 			    fault_t inj, test_cnt cnt_expected)
 {
+	test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
 	const char *cnt_name = "TCPAOGood";
-	struct tcp_ao_counters ao1, ao2;
+	struct tcp_counters cnt1, cnt2;
 	uint64_t before_cnt, after_cnt;
-	time_t timeout;
-	int sk;
+	int sk, dummy;
 
 	if (fault(TIMEOUT))
 		cnt_name = "TCPAOBad";
@@ -159,30 +158,30 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
 		test_error("setsockopt(TCP_AO_ADD_KEY)");
 	test_ao_restore(sk, ao_img);
 
-	if (test_get_tcp_ao_counters(sk, &ao1))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &cnt1))
+		test_error("test_get_tcp_counters()");
 
 	test_disable_repair(sk);
 	test_sock_state_free(img);
 
-	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
-	if (test_client_verify(sk, msg_len, nr_packets, timeout)) {
+	if (test_skpair_client(sk, msg_len, nr_packets, poll_cnt, &dummy)) {
 		if (fault(TIMEOUT))
 			test_ok("%s: post-migrate connection is broken", tst_name);
 		else
 			test_fail("%s: post-migrate connection is working", tst_name);
 	} else {
 		if (fault(TIMEOUT))
-			test_fail("%s: post-migrate connection still working", tst_name);
+			test_fail("%s: post-migrate connection is working", tst_name);
 		else
 			test_ok("%s: post-migrate connection is alive", tst_name);
 	}
+
 	synchronize_threads(); /* 3: counters checks */
-	if (test_get_tcp_ao_counters(sk, &ao2))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &cnt2))
+		test_error("test_get_tcp_counters()");
 	after_cnt = netstat_get_one(cnt_name, NULL);
 
-	test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+	test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
 
 	if (after_cnt <= before_cnt) {
 		test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
@@ -203,7 +202,7 @@ static void *client_fn(void *arg)
 	sockaddr_af saddr;
 
 	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
-	test_sk_restore("TCP-AO migrate to another socket", port++,
+	test_sk_restore("TCP-AO migrate to another socket (client)", port++,
 			&saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD);
 
 	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
@@ -212,7 +211,7 @@ static void *client_fn(void *arg)
 			      -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
 	trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
 			      port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
-	test_sk_restore("TCP-AO with wrong send ISN", port++,
+	test_sk_restore("TCP-AO with wrong send ISN (client)", port++,
 			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
 
 	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
@@ -221,7 +220,7 @@ static void *client_fn(void *arg)
 			      -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
 	trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
 			      port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
-	test_sk_restore("TCP-AO with wrong receive ISN", port++,
+	test_sk_restore("TCP-AO with wrong receive ISN (client)", port++,
 			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
 
 	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
@@ -229,8 +228,8 @@ static void *client_fn(void *arg)
 	trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
 			      -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
 	/* not expecting server => client mismatches as only snd sne is broken */
-	test_sk_restore("TCP-AO with wrong send SEQ ext number", port++,
-			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+	test_sk_restore("TCP-AO with wrong send SEQ ext number (client)",
+			port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
 			TEST_CNT_NS_BAD | TEST_CNT_GOOD);
 
 	test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
@@ -238,8 +237,8 @@ static void *client_fn(void *arg)
 	/* not expecting client => server mismatches as only rcv sne is broken */
 	trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
 			      port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
-	test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++,
-			&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+	test_sk_restore("TCP-AO with wrong receive SEQ ext number (client)",
+			port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
 			TEST_CNT_NS_GOOD | TEST_CNT_BAD);
 
 	return NULL;
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c
index 6364facaa63e..883cddf377cf 100644
--- a/tools/testing/selftests/net/tcp_ao/rst.c
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -84,15 +84,15 @@ static void close_forced(int sk)
 
 static void test_server_active_rst(unsigned int port)
 {
-	struct tcp_ao_counters cnt1, cnt2;
+	struct tcp_counters cnt1, cnt2;
 	ssize_t bytes;
 	int sk, lsk;
 
 	lsk = test_listen_socket(this_ip_addr, port, backlog);
 	if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
 		test_error("setsockopt(TCP_AO_ADD_KEY)");
-	if (test_get_tcp_ao_counters(lsk, &cnt1))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(lsk, &cnt1))
+		test_error("test_get_tcp_counters()");
 
 	synchronize_threads(); /* 1: MKT added */
 	if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
@@ -103,8 +103,8 @@ static void test_server_active_rst(unsigned int port)
 		test_error("accept()");
 
 	synchronize_threads(); /* 2: connection accept()ed, another queued */
-	if (test_get_tcp_ao_counters(lsk, &cnt2))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(lsk, &cnt2))
+		test_error("test_get_tcp_counters()");
 
 	synchronize_threads(); /* 3: close listen socket */
 	close(lsk);
@@ -120,7 +120,7 @@ static void test_server_active_rst(unsigned int port)
 	synchronize_threads(); /* 5: closed active sk */
 
 	synchronize_threads(); /* 6: counters checks */
-	if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD))
+	if (test_assert_counters("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD))
 		test_fail("MKT counters (server) have not only good packets");
 	else
 		test_ok("MKT counters are good on server");
@@ -128,7 +128,7 @@ static void test_server_active_rst(unsigned int port)
 
 static void test_server_passive_rst(unsigned int port)
 {
-	struct tcp_ao_counters ao1, ao2;
+	struct tcp_counters cnt1, cnt2;
 	int sk, lsk;
 	ssize_t bytes;
 
@@ -147,8 +147,8 @@ static void test_server_passive_rst(unsigned int port)
 
 	synchronize_threads(); /* 2: accepted => send data */
 	close(lsk);
-	if (test_get_tcp_ao_counters(sk, &ao1))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &cnt1))
+		test_error("test_get_tcp_counters()");
 
 	bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
 	if (bytes != quota) {
@@ -160,12 +160,12 @@ static void test_server_passive_rst(unsigned int port)
 
 	synchronize_threads(); /* 3: checkpoint the client */
 	synchronize_threads(); /* 4: close the server, creating twsk */
-	if (test_get_tcp_ao_counters(sk, &ao2))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &cnt2))
+		test_error("test_get_tcp_counters()");
 	close(sk);
 
 	synchronize_threads(); /* 5: restore the socket, send more data */
-	test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD);
+	test_assert_counters("passive RST server", &cnt1, &cnt2, TEST_CNT_GOOD);
 
 	synchronize_threads(); /* 6: server exits */
 }
@@ -271,8 +271,7 @@ static void test_client_active_rst(unsigned int port)
 
 	synchronize_threads(); /* 1: MKT added */
 	for (i = 0; i < last; i++) {
-		err = _test_connect_socket(sk[i], this_ip_dest, port,
-					       (i == 0) ? TEST_TIMEOUT_SEC : -1);
+		err = _test_connect_socket(sk[i], this_ip_dest, port, i != 0);
 		if (err < 0)
 			test_error("failed to connect()");
 	}
@@ -283,12 +282,12 @@ static void test_client_active_rst(unsigned int port)
 		test_error("test_wait_fds(): %d", err);
 
 	/* async connect() with third sk to get into request_sock_queue */
-	err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
+	err = _test_connect_socket(sk[last], this_ip_dest, port, 1);
 	if (err < 0)
 		test_error("failed to connect()");
 
 	synchronize_threads(); /* 3: close listen socket */
-	if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
+	if (test_client_verify(sk[0], packet_sz, quota / packet_sz))
 		test_fail("Failed to send data on connected socket");
 	else
 		test_ok("Verified established tcp connection");
@@ -323,7 +322,7 @@ static void test_client_active_rst(unsigned int port)
 
 static void test_client_passive_rst(unsigned int port)
 {
-	struct tcp_ao_counters ao1, ao2;
+	struct tcp_counters cnt1, cnt2;
 	struct tcp_ao_repair ao_img;
 	struct tcp_sock_state img;
 	sockaddr_af saddr;
@@ -341,7 +340,7 @@ static void test_client_passive_rst(unsigned int port)
 		test_error("failed to connect()");
 
 	synchronize_threads(); /* 2: accepted => send data */
-	if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
+	if (test_client_verify(sk, packet_sz, quota / packet_sz))
 		test_fail("Failed to send data on connected socket");
 	else
 		test_ok("Verified established tcp connection");
@@ -397,8 +396,8 @@ static void test_client_passive_rst(unsigned int port)
 		test_error("setsockopt(TCP_AO_ADD_KEY)");
 	test_ao_restore(sk, &ao_img);
 
-	if (test_get_tcp_ao_counters(sk, &ao1))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &cnt1))
+		test_error("test_get_tcp_counters()");
 
 	test_disable_repair(sk);
 	test_sock_state_free(&img);
@@ -417,7 +416,7 @@ static void test_client_passive_rst(unsigned int port)
 	 * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0,
 	 *    options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0
 	 */
-	err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC);
+	err = test_client_verify(sk, packet_sz, quota / packet_sz);
 	/* Make sure that the connection was reset, not timeouted */
 	if (err && err == -ECONNRESET)
 		test_ok("client sock was passively reset post-seq-adjust");
@@ -426,12 +425,12 @@ static void test_client_passive_rst(unsigned int port)
 	else
 		test_fail("client sock is yet connected post-seq-adjust");
 
-	if (test_get_tcp_ao_counters(sk, &ao2))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &cnt2))
+		test_error("test_get_tcp_counters()");
 
 	synchronize_threads(); /* 6: server exits */
 	close(sk);
-	test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD);
+	test_assert_counters("client passive RST", &cnt1, &cnt2, TEST_CNT_GOOD);
 }
 
 static void *client_fn(void *arg)
diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c
index 3ecd2b58de6a..2c73bea698a6 100644
--- a/tools/testing/selftests/net/tcp_ao/self-connect.c
+++ b/tools/testing/selftests/net/tcp_ao/self-connect.c
@@ -16,6 +16,9 @@ static void __setup_lo_intf(const char *lo_intf,
 
 	if (link_set_up(lo_intf))
 		test_error("Failed to bring %s up", lo_intf);
+
+	if (ip_route_add(lo_intf, TEST_FAMILY, local_addr, local_addr))
+		test_error("Failed to add a local route %s", lo_intf);
 }
 
 static void setup_lo_intf(const char *lo_intf)
@@ -30,7 +33,7 @@ static void setup_lo_intf(const char *lo_intf)
 static void tcp_self_connect(const char *tst, unsigned int port,
 			     bool different_keyids, bool check_restore)
 {
-	struct tcp_ao_counters before_ao, after_ao;
+	struct tcp_counters before, after;
 	uint64_t before_aogood, after_aogood;
 	struct netstat *ns_before, *ns_after;
 	const size_t nr_packets = 20;
@@ -60,17 +63,17 @@ static void tcp_self_connect(const char *tst, unsigned int port,
 
 	ns_before = netstat_read();
 	before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
-	if (test_get_tcp_ao_counters(sk, &before_ao))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &before))
+		test_error("test_get_tcp_counters()");
 
 	if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr,
-				  sizeof(addr), TEST_TIMEOUT_SEC) < 0) {
+				  sizeof(addr), 0) < 0) {
 		ns_after = netstat_read();
 		netstat_print_diff(ns_before, ns_after);
 		test_error("failed to connect()");
 	}
 
-	if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+	if (test_client_verify(sk, 100, nr_packets)) {
 		test_fail("%s: tcp connection verify failed", tst);
 		close(sk);
 		return;
@@ -78,8 +81,8 @@ static void tcp_self_connect(const char *tst, unsigned int port,
 
 	ns_after = netstat_read();
 	after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
-	if (test_get_tcp_ao_counters(sk, &after_ao))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &after))
+		test_error("test_get_tcp_counters()");
 	if (!check_restore) {
 		/* to debug: netstat_print_diff(ns_before, ns_after); */
 		netstat_free(ns_before);
@@ -93,7 +96,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
 		return;
 	}
 
-	if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) {
+	if (test_assert_counters(tst, &before, &after, TEST_CNT_GOOD)) {
 		close(sk);
 		return;
 	}
@@ -136,7 +139,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
 	test_ao_restore(sk, &ao_img);
 	test_disable_repair(sk);
 	test_sock_state_free(&img);
-	if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+	if (test_client_verify(sk, 100, nr_packets)) {
 		test_fail("%s: tcp connection verify failed", tst);
 		close(sk);
 		return;
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
index 8901a6785dc8..6478da6a71c3 100644
--- a/tools/testing/selftests/net/tcp_ao/seq-ext.c
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Check that after SEQ number wrap-around:
  * 1. SEQ-extension has upper bytes set
- * 2. TCP conneciton is alive and no TCPAOBad segments
+ * 2. TCP connection is alive and no TCPAOBad segments
  * In order to test (2), the test doesn't just adjust seq number for a queue
  * on a connected socket, but migrates it to another sk+port number, so
  * that there won't be any delayed packets that will fail to verify
@@ -40,7 +40,7 @@ static void test_adjust_seqs(struct tcp_sock_state *img,
 static int test_sk_restore(struct tcp_sock_state *img,
 			   struct tcp_ao_repair *ao_img, sockaddr_af *saddr,
 			   const union tcp_addr daddr, unsigned int dport,
-			   struct tcp_ao_counters *cnt)
+			   struct tcp_counters *cnt)
 {
 	int sk;
 
@@ -54,8 +54,8 @@ static int test_sk_restore(struct tcp_sock_state *img,
 		test_error("setsockopt(TCP_AO_ADD_KEY)");
 	test_ao_restore(sk, ao_img);
 
-	if (test_get_tcp_ao_counters(sk, cnt))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, cnt))
+		test_error("test_get_tcp_counters()");
 
 	test_disable_repair(sk);
 	test_sock_state_free(img);
@@ -65,7 +65,7 @@ static int test_sk_restore(struct tcp_sock_state *img,
 static void *server_fn(void *arg)
 {
 	uint64_t before_good, after_good, after_bad;
-	struct tcp_ao_counters ao1, ao2;
+	struct tcp_counters cnt1, cnt2;
 	struct tcp_sock_state img;
 	struct tcp_ao_repair ao_img;
 	sockaddr_af saddr;
@@ -114,7 +114,7 @@ static void *server_fn(void *arg)
 	test_adjust_seqs(&img, &ao_img, true);
 	synchronize_threads(); /* 4: dump finished */
 	sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
-			     client_new_port, &ao1);
+			     client_new_port, &cnt1);
 
 	trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_addr,
 			this_ip_dest, test_server_port + 1, client_new_port, 1);
@@ -136,11 +136,11 @@ static void *server_fn(void *arg)
 	}
 
 	synchronize_threads(); /* 6: verify counters after SEQ-number rollover */
-	if (test_get_tcp_ao_counters(sk, &ao2))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &cnt2))
+		test_error("test_get_tcp_counters()");
 	after_good = netstat_get_one("TCPAOGood", NULL);
 
-	test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+	test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD);
 
 	if (after_good <= before_good) {
 		test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
@@ -173,7 +173,7 @@ out:
 static void *client_fn(void *arg)
 {
 	uint64_t before_good, after_good, after_bad;
-	struct tcp_ao_counters ao1, ao2;
+	struct tcp_counters cnt1, cnt2;
 	struct tcp_sock_state img;
 	struct tcp_ao_repair ao_img;
 	sockaddr_af saddr;
@@ -191,7 +191,7 @@ static void *client_fn(void *arg)
 		test_error("failed to connect()");
 
 	synchronize_threads(); /* 2: accepted => send data */
-	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+	if (test_client_verify(sk, msg_len, nr_packets)) {
 		test_fail("pre-migrate verify failed");
 		return NULL;
 	}
@@ -213,20 +213,20 @@ static void *client_fn(void *arg)
 	test_adjust_seqs(&img, &ao_img, false);
 	synchronize_threads(); /* 4: dump finished */
 	sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
-			     test_server_port + 1, &ao1);
+			     test_server_port + 1, &cnt1);
 
 	synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */
-	if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+	if (test_client_verify(sk, msg_len, nr_packets))
 		test_fail("post-migrate verify failed");
 	else
 		test_ok("post-migrate connection alive");
 
 	synchronize_threads(); /* 5: verify counters after SEQ-number rollover */
-	if (test_get_tcp_ao_counters(sk, &ao2))
-		test_error("test_get_tcp_ao_counters()");
+	if (test_get_tcp_counters(sk, &cnt2))
+		test_error("test_get_tcp_counters()");
 	after_good = netstat_get_one("TCPAOGood", NULL);
 
-	test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+	test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD);
 
 	if (after_good <= before_good) {
 		test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
index f779e5892bc1..a1467b64390a 100644
--- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -6,6 +6,7 @@
 #define fault(type)	(inj == FAULT_ ## type)
 static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver.";
 static const char *ao_password = DEFAULT_TEST_PASSWORD;
+static volatile int sk_pair;
 
 static union tcp_addr client2;
 static union tcp_addr client3;
@@ -41,10 +42,10 @@ static void try_accept(const char *tst_name, unsigned int port,
 		       const char *cnt_name, test_cnt cnt_expected,
 		       int needs_tcp_md5, fault_t inj)
 {
-	struct tcp_ao_counters ao_cnt1, ao_cnt2;
+	struct tcp_counters cnt1, cnt2;
 	uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
-	int lsk, err, sk = 0;
-	time_t timeout;
+	test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
+	int lsk, err, sk = -1;
 
 	if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
 		return;
@@ -63,22 +64,25 @@ static void try_accept(const char *tst_name, unsigned int port,
 
 	if (cnt_name)
 		before_cnt = netstat_get_one(cnt_name, NULL);
-	if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1))
-		test_error("test_get_tcp_ao_counters()");
+	if (ao_addr && test_get_tcp_counters(lsk, &cnt1))
+		test_error("test_get_tcp_counters()");
 
 	synchronize_threads(); /* preparations done */
 
-	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
-	err = test_wait_fd(lsk, timeout, 0);
+	err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair);
 	synchronize_threads(); /* connect()/accept() timeouts */
 	if (err == -ETIMEDOUT) {
+		sk_pair = err;
 		if (!fault(TIMEOUT))
-			test_fail("timed out for accept()");
+			test_fail("%s: timed out for accept()", tst_name);
+	} else if (err == -EKEYREJECTED) {
+		if (!fault(KEYREJECT))
+			test_fail("%s: key was rejected", tst_name);
 	} else if (err < 0) {
-		test_error("test_wait_fd()");
+		test_error("test_skpair_wait_poll()");
 	} else {
 		if (fault(TIMEOUT))
-			test_fail("ready to accept");
+			test_fail("%s: ready to accept", tst_name);
 
 		sk = accept(lsk, NULL, NULL);
 		if (sk < 0) {
@@ -89,8 +93,8 @@ static void try_accept(const char *tst_name, unsigned int port,
 		}
 	}
 
-	if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2))
-		test_error("test_get_tcp_ao_counters()");
+	if (ao_addr && test_get_tcp_counters(lsk, &cnt2))
+		test_error("test_get_tcp_counters()");
 	close(lsk);
 
 	if (!cnt_name) {
@@ -108,11 +112,11 @@ static void try_accept(const char *tst_name, unsigned int port,
 			tst_name, cnt_name, before_cnt, after_cnt);
 	}
 	if (ao_addr)
-		test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+		test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
 
 out:
 	synchronize_threads(); /* test_kill_sk() */
-	if (sk > 0)
+	if (sk >= 0)
 		test_kill_sk(sk);
 }
 
@@ -153,78 +157,82 @@ static void *server_fn(void *arg)
 
 	server_add_routes();
 
-	try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0,
+	try_accept("[server] AO server (INADDR_ANY): AO client", port++, NULL, 0,
 		   &addr_any, 0, 0, 100, 100, 0, "TCPAOGood",
 		   TEST_CNT_GOOD, 0, 0);
-	try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0,
+	try_accept("[server] AO server (INADDR_ANY): MD5 client", port++, NULL, 0,
 		   &addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected",
-		   0, 1, FAULT_TIMEOUT);
-	try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0,
+		   TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+	try_accept("[server] AO server (INADDR_ANY): no sign client", port++, NULL, 0,
 		   &addr_any, 0, 0, 100, 100, 0, "TCPAORequired",
 		   TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
-	try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
+	try_accept("[server] AO server (AO_REQUIRED): AO client", port++, NULL, 0,
 		   &this_ip_dest, TEST_PREFIX, true,
 		   100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0);
-	try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
+	try_accept("[server] AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
 		   &this_ip_dest, TEST_PREFIX, true,
 		   100, 100, 0, "TCPAORequired",
 		   TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
 
-	try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0,
+	try_accept("[server] MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0,
 		   NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
-		   0, 1, FAULT_TIMEOUT);
-	try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
+		   TEST_CNT_NS_KEY_NOT_FOUND, 1, FAULT_TIMEOUT);
+	try_accept("[server] MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
 		   NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0);
-	try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any,
+	try_accept("[server] MD5 server (INADDR_ANY): no sign client", port++, &addr_any,
 		   0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound",
-		   0, 1, FAULT_TIMEOUT);
+		   TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT);
 
-	try_accept("no sign server: AO client", port++, NULL, 0,
+	try_accept("[server] no sign server: AO client", port++, NULL, 0,
 		   NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
-		   TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT);
-	try_accept("no sign server: MD5 client", port++, NULL, 0,
+		   TEST_CNT_NS_KEY_NOT_FOUND, 0, FAULT_TIMEOUT);
+	try_accept("[server] no sign server: MD5 client", port++, NULL, 0,
 		   NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected",
-		   0, 1, FAULT_TIMEOUT);
-	try_accept("no sign server: no sign client", port++, NULL, 0,
+		   TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+	try_accept("[server] no sign server: no sign client", port++, NULL, 0,
 		   NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0);
 
-	try_accept("AO+MD5 server: AO client (matching)", port++,
+	try_accept("[server] AO+MD5 server: AO client (matching)", port++,
 		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
 		   100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0);
-	try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++,
+	try_accept("[server] AO+MD5 server: AO client (misconfig, matching MD5)", port++,
 		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
 		   100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
 		   1, FAULT_TIMEOUT);
-	try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++,
+	try_accept("[server] AO+MD5 server: AO client (misconfig, non-matching)", port++,
 		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
 		   100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
 		   1, FAULT_TIMEOUT);
-	try_accept("AO+MD5 server: MD5 client (matching)", port++,
+	try_accept("[server] AO+MD5 server: MD5 client (matching)", port++,
 		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
 		   100, 100, 0, NULL, 0, 1, 0);
-	try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++,
+	try_accept("[server] AO+MD5 server: MD5 client (misconfig, matching AO)", port++,
 		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
-		   100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
-	try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++,
+		   100, 100, 0, "TCPMD5Unexpected",
+		   TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+	try_accept("[server] AO+MD5 server: MD5 client (misconfig, non-matching)", port++,
 		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
-		   100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
-	try_accept("AO+MD5 server: no sign client (unmatched)", port++,
+		   100, 100, 0, "TCPMD5Unexpected",
+		   TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+	try_accept("[server] AO+MD5 server: no sign client (unmatched)", port++,
 		   &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
 		   100, 100, 0, "CurrEstab", 0, 1, 0);
-	try_accept("AO+MD5 server: no sign client (misconfig, matching AO)",
+	try_accept("[server] AO+MD5 server: no sign client (misconfig, matching AO)",
 		   port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
 		   100, 100, 0, "TCPAORequired",
 		   TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT);
-	try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)",
+	try_accept("[server] AO+MD5 server: no sign client (misconfig, matching MD5)",
 		   port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
-		   100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT);
+		   100, 100, 0, "TCPMD5NotFound",
+		   TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT);
 
-	try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
+	/* Key rejected by the other side, failing short through skpair */
+	try_accept("[server] AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
 		   port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
-		   100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
-	try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
+		   100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT);
+	try_accept("[server] AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
 		   port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
-		   100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
+		   100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT);
 
 	server_add_fail_tests(&port);
 
@@ -259,7 +267,6 @@ static void try_connect(const char *tst_name, unsigned int port,
 		       uint8_t sndid, uint8_t rcvid, uint8_t vrf,
 		       fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr)
 {
-	time_t timeout;
 	int sk, ret;
 
 	if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
@@ -281,11 +288,10 @@ static void try_connect(const char *tst_name, unsigned int port,
 
 	synchronize_threads(); /* preparations done */
 
-	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
-	ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
-
+	ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair);
 	synchronize_threads(); /* connect()/accept() timeouts */
 	if (ret < 0) {
+		sk_pair = ret;
 		if (fault(KEYREJECT) && ret == -EKEYREJECTED)
 			test_ok("%s: connect() was prevented", tst_name);
 		else if (ret == -ETIMEDOUT && fault(TIMEOUT))
@@ -305,8 +311,7 @@ static void try_connect(const char *tst_name, unsigned int port,
 
 out:
 	synchronize_threads(); /* test_kill_sk() */
-	/* _test_connect_socket() cleans up on failure */
-	if (ret > 0)
+	if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */
 		test_kill_sk(sk);
 }
 
@@ -437,7 +442,6 @@ static void try_to_add(const char *tst_name, unsigned int port,
 		       int ao_vrf, uint8_t sndid, uint8_t rcvid,
 		       int needs_tcp_md5, fault_t inj)
 {
-	time_t timeout;
 	int sk, ret;
 
 	if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
@@ -450,11 +454,10 @@ static void try_to_add(const char *tst_name, unsigned int port,
 
 	synchronize_threads(); /* preparations done */
 
-	timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
-	ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+	ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair);
 
 	synchronize_threads(); /* connect()/accept() timeouts */
-	if (ret <= 0) {
+	if (ret < 0) {
 		test_error("%s: connect() returned %d", tst_name, ret);
 		goto out;
 	}
@@ -490,8 +493,7 @@ static void try_to_add(const char *tst_name, unsigned int port,
 
 out:
 	synchronize_threads(); /* test_kill_sk() */
-	/* _test_connect_socket() cleans up on failure */
-	if (ret > 0)
+	if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */
 		test_kill_sk(sk);
 }
 
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
index c1cb0c75156a..4b3f9b5e50fe 100644
--- a/tools/testing/selftests/net/tcp_fastopen_backup_key.c
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
@@ -26,7 +26,7 @@
 #include <fcntl.h>
 #include <time.h>
 
-#include "../kselftest.h"
+#include "kselftest.h"
 
 #ifndef TCP_FASTOPEN_KEY
 #define TCP_FASTOPEN_KEY 33
diff --git a/tools/testing/selftests/net/tcp_port_share.c b/tools/testing/selftests/net/tcp_port_share.c
new file mode 100644
index 000000000000..6146b62610df
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_port_share.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2025 Cloudflare, Inc.
+
+/* Tests for TCP port sharing (bind bucket reuse). */
+
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdlib.h>
+
+#include "kselftest_harness.h"
+
+#define DST_PORT 30000
+#define SRC_PORT 40000
+
+struct sockaddr_inet {
+	union {
+		struct sockaddr_storage ss;
+		struct sockaddr_in6 v6;
+		struct sockaddr_in v4;
+		struct sockaddr sa;
+	};
+	socklen_t len;
+	char str[INET6_ADDRSTRLEN + __builtin_strlen("[]:65535") + 1];
+};
+
+const int one = 1;
+
+static int disconnect(int fd)
+{
+	return connect(fd, &(struct sockaddr){ AF_UNSPEC }, sizeof(struct sockaddr));
+}
+
+static int getsockname_port(int fd)
+{
+	struct sockaddr_inet addr = {};
+	int err;
+
+	addr.len = sizeof(addr);
+	err = getsockname(fd, &addr.sa, &addr.len);
+	if (err)
+		return -1;
+
+	switch (addr.sa.sa_family) {
+	case AF_INET:
+		return ntohs(addr.v4.sin_port);
+	case AF_INET6:
+		return ntohs(addr.v6.sin6_port);
+	default:
+		errno = EAFNOSUPPORT;
+		return -1;
+	}
+}
+
+static void make_inet_addr(int af, const char *ip, __u16 port,
+			   struct sockaddr_inet *addr)
+{
+	const char *fmt = "";
+
+	memset(addr, 0, sizeof(*addr));
+
+	switch (af) {
+	case AF_INET:
+		addr->len = sizeof(addr->v4);
+		addr->v4.sin_family = af;
+		addr->v4.sin_port = htons(port);
+		inet_pton(af, ip, &addr->v4.sin_addr);
+		fmt = "%s:%hu";
+		break;
+	case AF_INET6:
+		addr->len = sizeof(addr->v6);
+		addr->v6.sin6_family = af;
+		addr->v6.sin6_port = htons(port);
+		inet_pton(af, ip, &addr->v6.sin6_addr);
+		fmt = "[%s]:%hu";
+		break;
+	}
+
+	snprintf(addr->str, sizeof(addr->str), fmt, ip, port);
+}
+
+FIXTURE(tcp_port_share) {};
+
+FIXTURE_VARIANT(tcp_port_share) {
+	int domain;
+	/* IP to listen on and connect to */
+	const char *dst_ip;
+	/* Primary IP to connect from */
+	const char *src1_ip;
+	/* Secondary IP to connect from */
+	const char *src2_ip;
+	/* IP to bind to in order to block the source port */
+	const char *bind_ip;
+};
+
+FIXTURE_VARIANT_ADD(tcp_port_share, ipv4) {
+	.domain = AF_INET,
+	.dst_ip = "127.0.0.1",
+	.src1_ip = "127.1.1.1",
+	.src2_ip = "127.2.2.2",
+	.bind_ip = "127.3.3.3",
+};
+
+FIXTURE_VARIANT_ADD(tcp_port_share, ipv6) {
+	.domain = AF_INET6,
+	.dst_ip = "::1",
+	.src1_ip = "2001:db8::1",
+	.src2_ip = "2001:db8::2",
+	.bind_ip = "2001:db8::3",
+};
+
+FIXTURE_SETUP(tcp_port_share)
+{
+	int sc;
+
+	ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+	ASSERT_EQ(system("ip link set dev lo up"), 0);
+	ASSERT_EQ(system("ip addr add dev lo 2001:db8::1/32 nodad"), 0);
+	ASSERT_EQ(system("ip addr add dev lo 2001:db8::2/32 nodad"), 0);
+	ASSERT_EQ(system("ip addr add dev lo 2001:db8::3/32 nodad"), 0);
+
+	sc = open("/proc/sys/net/ipv4/ip_local_port_range", O_WRONLY);
+	ASSERT_GE(sc, 0);
+	ASSERT_GT(dprintf(sc, "%hu %hu\n", SRC_PORT, SRC_PORT), 0);
+	ASSERT_EQ(close(sc), 0);
+}
+
+FIXTURE_TEARDOWN(tcp_port_share) {}
+
+/* Verify that an ephemeral port becomes available again after the socket
+ * bound to it and blocking it from reuse is closed.
+ */
+TEST_F(tcp_port_share, can_reuse_port_after_bind_and_close)
+{
+	const typeof(variant) v = variant;
+	struct sockaddr_inet addr;
+	int c1, c2, ln, pb;
+
+	/* Listen on <dst_ip>:<DST_PORT> */
+	ln = socket(v->domain, SOCK_STREAM, 0);
+	ASSERT_GE(ln, 0) TH_LOG("socket(): %m");
+	ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+	make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+	ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+	ASSERT_EQ(listen(ln, 2), 0);
+
+	/* Connect from <src1_ip>:<SRC_PORT> */
+	c1 = socket(v->domain, SOCK_STREAM, 0);
+	ASSERT_GE(c1, 0) TH_LOG("socket(): %m");
+	ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0);
+
+	make_inet_addr(v->domain, v->src1_ip, 0, &addr);
+	ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+	make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+	ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str);
+	ASSERT_EQ(getsockname_port(c1), SRC_PORT);
+
+	/* Bind to <bind_ip>:<SRC_PORT>. Block the port from reuse. */
+	pb = socket(v->domain, SOCK_STREAM, 0);
+	ASSERT_GE(pb, 0) TH_LOG("socket(): %m");
+	ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+	make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr);
+	ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+	/* Try to connect from <src2_ip>:<SRC_PORT>. Expect failure. */
+	c2 = socket(v->domain, SOCK_STREAM, 0);
+	ASSERT_GE(c2, 0) TH_LOG("socket");
+	ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0);
+
+	make_inet_addr(v->domain, v->src2_ip, 0, &addr);
+	ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+	make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+	ASSERT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str);
+	ASSERT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m");
+
+	/* Unbind from <bind_ip>:<SRC_PORT>. Unblock the port for reuse. */
+	ASSERT_EQ(close(pb), 0);
+
+	/* Connect again from <src2_ip>:<SRC_PORT> */
+	EXPECT_EQ(connect(c2, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str);
+	EXPECT_EQ(getsockname_port(c2), SRC_PORT);
+
+	ASSERT_EQ(close(c2), 0);
+	ASSERT_EQ(close(c1), 0);
+	ASSERT_EQ(close(ln), 0);
+}
+
+/* Verify that a socket auto-bound during connect() blocks port reuse after
+ * disconnect (connect(AF_UNSPEC)) followed by an explicit port bind().
+ */
+TEST_F(tcp_port_share, port_block_after_disconnect)
+{
+	const typeof(variant) v = variant;
+	struct sockaddr_inet addr;
+	int c1, c2, ln, pb;
+
+	/* Listen on <dst_ip>:<DST_PORT> */
+	ln = socket(v->domain, SOCK_STREAM, 0);
+	ASSERT_GE(ln, 0) TH_LOG("socket(): %m");
+	ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+	make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+	ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+	ASSERT_EQ(listen(ln, 2), 0);
+
+	/* Connect from <src1_ip>:<SRC_PORT> */
+	c1 = socket(v->domain, SOCK_STREAM, 0);
+	ASSERT_GE(c1, 0) TH_LOG("socket(): %m");
+	ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0);
+
+	make_inet_addr(v->domain, v->src1_ip, 0, &addr);
+	ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+	make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+	ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str);
+	ASSERT_EQ(getsockname_port(c1), SRC_PORT);
+
+	/* Disconnect the socket and bind it to <bind_ip>:<SRC_PORT> to block the port */
+	ASSERT_EQ(disconnect(c1), 0) TH_LOG("disconnect: %m");
+	ASSERT_EQ(setsockopt(c1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+	make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr);
+	ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+	/* Trigger port-addr bucket state update with another bind() and close() */
+	pb = socket(v->domain, SOCK_STREAM, 0);
+	ASSERT_GE(pb, 0) TH_LOG("socket(): %m");
+	ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+	make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr);
+	ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+	ASSERT_EQ(close(pb), 0);
+
+	/* Connect from <src2_ip>:<SRC_PORT>. Expect failure. */
+	c2 = socket(v->domain, SOCK_STREAM, 0);
+	ASSERT_GE(c2, 0) TH_LOG("socket: %m");
+	ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0);
+
+	make_inet_addr(v->domain, v->src2_ip, 0, &addr);
+	ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+	make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+	EXPECT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str);
+	EXPECT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m");
+
+	ASSERT_EQ(close(c2), 0);
+	ASSERT_EQ(close(c1), 0);
+	ASSERT_EQ(close(ln), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh
deleted file mode 100755
index 3119b80e711f..000000000000
--- a/tools/testing/selftests/net/test_blackhole_dev.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Runs blackhole-dev test using blackhole-dev kernel module
-
-if /sbin/modprobe -q test_blackhole_dev ; then
-	/sbin/modprobe -q -r test_blackhole_dev;
-	echo "test_blackhole_dev: ok";
-else
-	echo "test_blackhole_dev: [FAIL]";
-	exit 1;
-fi
diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh
index 1b3f89e2b86e..2a7224fe74f2 100755
--- a/tools/testing/selftests/net/test_bridge_backup_port.sh
+++ b/tools/testing/selftests/net/test_bridge_backup_port.sh
@@ -315,6 +315,29 @@ backup_port()
 	tc_check_packets $sw1 "dev vx0 egress" 101 1
 	log_test $? 0 "No forwarding out of vx0"
 
+	# Check that packets are forwarded out of vx0 when swp1 is
+	# administratively down and out of swp1 when it is administratively up
+	# again.
+	run_cmd "ip -n $sw1 link set dev swp1 down"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+	log_test $? 0 "swp1 administratively down"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 3
+	log_test $? 0 "No forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
+	log_test $? 0 "Forwarding out of vx0"
+
+	run_cmd "ip -n $sw1 link set dev swp1 up"
+	busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
+	log_test $? 0 "swp1 administratively up"
+
+	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+	tc_check_packets $sw1 "dev swp1 egress" 101 4
+	log_test $? 0 "Forwarding out of swp1"
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
+	log_test $? 0 "No forwarding out of vx0"
+
 	# Remove vx0 as the backup port of swp1 and check that packets are no
 	# longer forwarded out of vx0 when swp1 does not have a carrier.
 	run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port"
@@ -322,9 +345,9 @@ backup_port()
 	log_test $? 1 "vx0 not configured as backup port of swp1"
 
 	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets $sw1 "dev swp1 egress" 101 4
+	tc_check_packets $sw1 "dev swp1 egress" 101 5
 	log_test $? 0 "Forwarding out of swp1"
-	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
 	log_test $? 0 "No forwarding out of vx0"
 
 	run_cmd "ip -n $sw1 link set dev swp1 carrier off"
@@ -332,9 +355,9 @@ backup_port()
 	log_test $? 0 "swp1 carrier off"
 
 	run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
-	tc_check_packets $sw1 "dev swp1 egress" 101 4
+	tc_check_packets $sw1 "dev swp1 egress" 101 5
 	log_test $? 0 "No forwarding out of swp1"
-	tc_check_packets $sw1 "dev vx0 egress" 101 1
+	tc_check_packets $sw1 "dev vx0 egress" 101 2
 	log_test $? 0 "No forwarding out of vx0"
 }
 
diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
index 02b986c9c247..9067197c9055 100755
--- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
+++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
@@ -51,7 +51,9 @@ ret=0
 # All tests in this script. Can be overridden with -t option.
 TESTS="
 	neigh_suppress_arp
+	neigh_suppress_uc_arp
 	neigh_suppress_ns
+	neigh_suppress_uc_ns
 	neigh_vlan_suppress_arp
 	neigh_vlan_suppress_ns
 "
@@ -388,6 +390,52 @@ neigh_suppress_arp()
 	neigh_suppress_arp_common $vid $sip $tip
 }
 
+neigh_suppress_uc_arp_common()
+{
+	local vid=$1; shift
+	local sip=$1; shift
+	local tip=$1; shift
+	local tmac
+
+	echo
+	echo "Unicast ARP, per-port ARP suppression - VLAN $vid"
+	echo "-----------------------------------------------"
+
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	log_test $? 0 "\"neigh_suppress\" is on"
+
+	tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+	run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+	run_cmd "ip -n $sw1 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid"
+
+	run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+	run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto arp flower arp_sip $tip arp_op reply action pass"
+
+	run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+	run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto arp flower arp_tip $sip arp_op reply action pass"
+
+	run_cmd "ip netns exec $h1 mausezahn eth0.$vid -c 1 -a own -b $tmac -t arp 'request sip=$sip, tip=$tip, tmac=$tmac' -q"
+	tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+	log_test $? 0 "Unicast ARP, suppression on, h1 filter"
+	tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+	log_test $? 0 "Unicast ARP, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_arp()
+{
+	local vid=10
+	local sip=192.0.2.1
+	local tip=192.0.2.2
+
+	neigh_suppress_uc_arp_common $vid $sip $tip
+
+	vid=20
+	sip=192.0.2.17
+	tip=192.0.2.18
+	neigh_suppress_uc_arp_common $vid $sip $tip
+}
+
 neigh_suppress_ns_common()
 {
 	local vid=$1; shift
@@ -494,6 +542,78 @@ neigh_suppress_ns()
 	neigh_suppress_ns_common $vid $saddr $daddr $maddr
 }
 
+icmpv6_header_get()
+{
+	local csum=$1; shift
+	local tip=$1; shift
+	local type
+	local p
+
+	# Type 135 (Neighbor Solicitation), hex format
+	type="87"
+	p=$(:
+		)"$type:"$(                     : ICMPv6.type
+		)"00:"$(                        : ICMPv6.code
+		)"$csum:"$(                     : ICMPv6.checksum
+		)"00:00:00:00:"$(               : Reserved
+	        )"$tip:"$(	                : Target Address
+		)
+	echo $p
+}
+
+neigh_suppress_uc_ns_common()
+{
+	local vid=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local full_dip=$1; shift
+	local csum=$1; shift
+	local tmac
+
+	echo
+	echo "Unicast NS, per-port NS suppression - VLAN $vid"
+	echo "---------------------------------------------"
+
+	run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+	run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+	log_test $? 0 "\"neigh_suppress\" is on"
+
+	tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+	run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+	run_cmd "ip -n $sw1 -6 neigh replace $dip lladdr $tmac nud permanent dev br0.$vid"
+
+	run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+	run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 src_ip $dip type 136 code 0 action pass"
+
+	run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+	run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip $sip type 136 code 0 action pass"
+
+	run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a own -b $tmac -A $sip -B $dip -t ip hop=255,next=58,payload=$(icmpv6_header_get $csum $full_dip) -q"
+	tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+	log_test $? 0 "Unicast NS, suppression on, h1 filter"
+	tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+	log_test $? 0 "Unicast NS, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_ns()
+{
+	local vid=10
+	local saddr=2001:db8:1::1
+	local daddr=2001:db8:1::2
+	local full_daddr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02
+	local csum="ef:79"
+
+	neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+
+	vid=20
+	saddr=2001:db8:2::1
+	daddr=2001:db8:2::2
+	full_daddr=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:02
+	csum="ef:76"
+
+	neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+}
+
 neigh_vlan_suppress_arp()
 {
 	local vid1=10
@@ -825,6 +945,11 @@ if [ ! -x "$(command -v jq)" ]; then
 	exit $ksft_skip
 fi
 
+if [ ! -x "$(command -v mausezahn)" ]; then
+	echo "SKIP: Could not run test without mausezahn tool"
+	exit $ksft_skip
+fi
+
 bridge link help 2>&1 | grep -q "neigh_vlan_suppress"
 if [ $? -ne 0 ]; then
    echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support"
diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh
new file mode 100755
index 000000000000..7c594bf6ead0
--- /dev/null
+++ b/tools/testing/selftests/net/test_neigh.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+TESTS="
+	extern_valid_ipv4
+	extern_valid_ipv6
+"
+VERBOSE=0
+
+################################################################################
+# Utilities
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: $cmd"
+		stderr=
+	fi
+
+	out=$(eval "$cmd" "$stderr")
+	rc=$?
+	if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	return $rc
+}
+
+################################################################################
+# Setup
+
+setup()
+{
+	set -e
+
+	setup_ns ns1 ns2
+
+	ip -n "$ns1" link add veth0 type veth peer name veth1 netns "$ns2"
+	ip -n "$ns1" link set dev veth0 up
+	ip -n "$ns2" link set dev veth1 up
+
+	ip -n "$ns1" address add 192.0.2.1/24 dev veth0
+	ip -n "$ns1" address add 2001:db8:1::1/64 dev veth0 nodad
+	ip -n "$ns2" address add 192.0.2.2/24 dev veth1
+	ip -n "$ns2" address add 2001:db8:1::2/64 dev veth1 nodad
+
+	ip netns exec "$ns1" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+	ip netns exec "$ns2" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+
+	sleep 5
+
+	set +e
+}
+
+exit_cleanup_all()
+{
+	cleanup_all_ns
+	exit "${EXIT_STATUS}"
+}
+
+################################################################################
+# Tests
+
+extern_valid_common()
+{
+	local af_str=$1; shift
+	local ip_addr=$1; shift
+	local tbl_name=$1; shift
+	local subnet=$1; shift
+	local mac
+
+	mac=$(ip -n "$ns2" -j link show dev veth1 | jq -r '.[]["address"]')
+
+	RET=0
+
+	# Check that simple addition works.
+	run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+	run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+	check_err $? "No \"extern_valid\" flag after addition"
+
+	log_test "$af_str \"extern_valid\" flag: Add entry"
+
+	RET=0
+
+	# Check that an entry cannot be added with "extern_valid" flag and an
+	# invalid state.
+	run_cmd "ip -n $ns1 neigh flush dev veth0"
+	run_cmd "ip -n $ns1 neigh add $ip_addr nud none dev veth0 extern_valid"
+	check_fail $? "Managed to add an entry with \"extern_valid\" flag and an invalid state"
+
+	log_test "$af_str \"extern_valid\" flag: Add with an invalid state"
+
+	RET=0
+
+	# Check that entry cannot be added with both "extern_valid" flag and
+	# "use" / "managed" flag.
+	run_cmd "ip -n $ns1 neigh flush dev veth0"
+	run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+	check_fail $? "Managed to add an entry with \"extern_valid\" flag and \"use\" flag"
+
+	log_test "$af_str \"extern_valid\" flag: Add with \"use\" flag"
+
+	RET=0
+
+	# Check that "extern_valid" flag can be toggled using replace.
+	run_cmd "ip -n $ns1 neigh flush dev veth0"
+	run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0"
+	run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+	run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+	check_err $? "Did not manage to set \"extern_valid\" flag with replace"
+	run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0"
+	run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+	check_fail $? "Did not manage to clear \"extern_valid\" flag with replace"
+
+	log_test "$af_str \"extern_valid\" flag: Replace entry"
+
+	RET=0
+
+	# Check that an existing "extern_valid" entry can be marked as
+	# "managed".
+	run_cmd "ip -n $ns1 neigh flush dev veth0"
+	run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+	run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid managed"
+	check_err $? "Did not manage to add \"managed\" flag to an existing \"extern_valid\" entry"
+
+	log_test "$af_str \"extern_valid\" flag: Replace entry with \"managed\" flag"
+
+	RET=0
+
+	# Check that entry cannot be replaced with "extern_valid" flag and an
+	# invalid state.
+	run_cmd "ip -n $ns1 neigh flush dev veth0"
+	run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+	run_cmd "ip -n $ns1 neigh replace $ip_addr nud none dev veth0 extern_valid"
+	check_fail $? "Managed to replace an entry with \"extern_valid\" flag and an invalid state"
+
+	log_test "$af_str \"extern_valid\" flag: Replace with an invalid state"
+
+	RET=0
+
+	# Check that an "extern_valid" entry is flushed when the interface is
+	# put administratively down.
+	run_cmd "ip -n $ns1 neigh flush dev veth0"
+	run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+	run_cmd "ip -n $ns1 link set dev veth0 down"
+	run_cmd "ip -n $ns1 link set dev veth0 up"
+	run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+	check_fail $? "\"extern_valid\" entry not flushed upon interface down"
+
+	log_test "$af_str \"extern_valid\" flag: Interface down"
+
+	RET=0
+
+	# Check that an "extern_valid" entry is not flushed when the interface
+	# loses its carrier.
+	run_cmd "ip -n $ns1 neigh flush dev veth0"
+	run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+	run_cmd "ip -n $ns2 link set dev veth1 down"
+	run_cmd "ip -n $ns2 link set dev veth1 up"
+	run_cmd "sleep 2"
+	run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+	check_err $? "\"extern_valid\" entry flushed upon carrier down"
+
+	log_test "$af_str \"extern_valid\" flag: Carrier down"
+
+	RET=0
+
+	# Check that when entry transitions to "reachable" state it maintains
+	# the "extern_valid" flag. Wait "delay_probe" seconds for ARP request /
+	# NS to be sent.
+	local delay_probe
+
+	delay_probe=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["delay_probe"]')
+	run_cmd "ip -n $ns1 neigh flush dev veth0"
+	run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+	run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+	run_cmd "sleep $((delay_probe / 1000 + 2))"
+	run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"REACHABLE\""
+	check_err $? "Entry did not transition to \"reachable\" state"
+	run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+	check_err $? "Entry did not maintain \"extern_valid\" flag after transition to \"reachable\" state"
+
+	log_test "$af_str \"extern_valid\" flag: Transition to \"reachable\" state"
+
+	RET=0
+
+	# Drop all packets, trigger resolution and check that entry goes back
+	# to "stale" state instead of "failed".
+	local mcast_reprobes
+	local retrans_time
+	local ucast_probes
+	local app_probes
+	local probes
+	local delay
+
+	run_cmd "ip -n $ns1 neigh flush dev veth0"
+	run_cmd "tc -n $ns2 qdisc add dev veth1 clsact"
+	run_cmd "tc -n $ns2 filter add dev veth1 ingress proto all matchall action drop"
+	run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+	run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+	retrans_time=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["retrans"]')
+	ucast_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["ucast_probes"]')
+	app_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["app_probes"]')
+	mcast_reprobes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["mcast_reprobes"]')
+	delay=$((delay_probe + (ucast_probes + app_probes + mcast_reprobes) * retrans_time))
+	run_cmd "sleep $((delay / 1000 + 2))"
+	run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"STALE\""
+	check_err $? "Entry did not return to \"stale\" state"
+	run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+	check_err $? "Entry did not maintain \"extern_valid\" flag after returning to \"stale\" state"
+	probes=$(ip -n "$ns1" -j -s neigh get "$ip_addr" dev veth0 | jq '.[]["probes"]')
+	if [[ $probes -eq 0 ]]; then
+		check_err 1 "No probes were sent"
+	fi
+
+	log_test "$af_str \"extern_valid\" flag: Transition back to \"stale\" state"
+
+	run_cmd "tc -n $ns2 qdisc del dev veth1 clsact"
+
+	RET=0
+
+	# Forced garbage collection runs whenever the number of entries is
+	# larger than "thresh3" and deletes stale entries that have not been
+	# updated in the last 5 seconds.
+	#
+	# Check that an "extern_valid" entry survives a forced garbage
+	# collection. Add an entry, wait 5 seconds and add more entries than
+	# "thresh3" so that forced garbage collection will run.
+	#
+	# Note that the garbage collection thresholds are global resources and
+	# that changes in the initial namespace affect all the namespaces.
+	local forced_gc_runs_t0
+	local forced_gc_runs_t1
+	local orig_thresh1
+	local orig_thresh2
+	local orig_thresh3
+
+	run_cmd "ip -n $ns1 neigh flush dev veth0"
+	orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+	orig_thresh2=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh2")) | .["thresh2"]')
+	orig_thresh3=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh3")) | .["thresh3"]')
+	run_cmd "ip ntable change name $tbl_name thresh3 10 thresh2 9 thresh1 8"
+	run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+	run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+	run_cmd "sleep 5"
+	forced_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+	for i in {1..20}; do
+		run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+	done
+	forced_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+	if [[ $forced_gc_runs_t1 -eq $forced_gc_runs_t0 ]]; then
+		check_err 1 "Forced garbage collection did not run"
+	fi
+	run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+	check_err $? "Entry with \"extern_valid\" flag did not survive forced garbage collection"
+	run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+	check_fail $? "Entry without \"extern_valid\" flag survived forced garbage collection"
+
+	log_test "$af_str \"extern_valid\" flag: Forced garbage collection"
+
+	run_cmd "ip ntable change name $tbl_name thresh3 $orig_thresh3 thresh2 $orig_thresh2 thresh1 $orig_thresh1"
+
+	RET=0
+
+	# Periodic garbage collection runs every "base_reachable"/2 seconds and
+	# if the number of entries is larger than "thresh1", then it deletes
+	# stale entries that have not been used in the last "gc_stale" seconds.
+	#
+	# Check that an "extern_valid" entry survives a periodic garbage
+	# collection. Add an "extern_valid" entry, add more than "thresh1"
+	# regular entries, wait "base_reachable" (longer than "gc_stale")
+	# seconds and check that the "extern_valid" entry was not deleted.
+	#
+	# Note that the garbage collection thresholds and "base_reachable" are
+	# global resources and that changes in the initial namespace affect all
+	# the namespaces.
+	local periodic_gc_runs_t0
+	local periodic_gc_runs_t1
+	local orig_base_reachable
+	local orig_gc_stale
+
+	run_cmd "ip -n $ns1 neigh flush dev veth0"
+	orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+	orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]')
+	run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000"
+	orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]')
+	run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 1000"
+	run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+	run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+	# Wait orig_base_reachable/2 for the new interval to take effect.
+	run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))"
+	for i in {1..20}; do
+		run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+	done
+	periodic_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+	run_cmd "sleep 10"
+	periodic_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+	[[ $periodic_gc_runs_t1 -ne $periodic_gc_runs_t0 ]]
+	check_err $? "Periodic garbage collection did not run"
+	run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+	check_err $? "Entry with \"extern_valid\" flag did not survive periodic garbage collection"
+	run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+	check_fail $? "Entry without \"extern_valid\" flag survived periodic garbage collection"
+
+	log_test "$af_str \"extern_valid\" flag: Periodic garbage collection"
+
+	run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale $orig_gc_stale"
+	run_cmd "ip ntable change name $tbl_name thresh1 $orig_thresh1 base_reachable $orig_base_reachable"
+}
+
+extern_valid_ipv4()
+{
+	extern_valid_common "IPv4" 192.0.2.2 "arp_cache" 192.0.2.
+}
+
+extern_valid_ipv6()
+{
+	extern_valid_common "IPv6" 2001:db8:1::2 "ndisc_cache" 2001:db8:1::
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+        -p          Pause on fail
+        -v          Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+while getopts ":t:pvh" opt; do
+	case $opt in
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		v) VERBOSE=$((VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+require_command jq
+
+if ! ip neigh help 2>&1 | grep -q "extern_valid"; then
+	echo "SKIP: iproute2 ip too old, missing \"extern_valid\" support"
+	exit "$ksft_skip"
+fi
+
+trap exit_cleanup_all EXIT
+
+for t in $TESTS
+do
+	setup; $t; cleanup_all_ns;
+done
diff --git a/tools/testing/selftests/net/test_so_rcv.sh b/tools/testing/selftests/net/test_so_rcv.sh
new file mode 100755
index 000000000000..d8aa4362879d
--- /dev/null
+++ b/tools/testing/selftests/net/test_so_rcv.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+HOSTS=("127.0.0.1" "::1")
+PORT=1234
+TOTAL_TESTS=0
+FAILED_TESTS=0
+
+declare -A TESTS=(
+	["SO_RCVPRIORITY"]="-P 2"
+	["SO_RCVMARK"]="-M 3"
+)
+
+check_result() {
+	((TOTAL_TESTS++))
+	if [ "$1" -ne 0 ]; then
+		((FAILED_TESTS++))
+	fi
+}
+
+cleanup()
+{
+	cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+setup_ns NS
+
+for HOST in "${HOSTS[@]}"; do
+	PROTOCOL="IPv4"
+	if [[ "$HOST" == "::1" ]]; then
+		PROTOCOL="IPv6"
+	fi
+	for test_name in "${!TESTS[@]}"; do
+		echo "Running $test_name test, $PROTOCOL"
+		arg=${TESTS[$test_name]}
+
+		ip netns exec $NS ./so_rcv_listener $arg $HOST $PORT &
+		LISTENER_PID=$!
+
+		sleep 0.5
+
+		if ! ip netns exec $NS ./cmsg_sender $arg $HOST $PORT; then
+			echo "Sender failed for $test_name, $PROTOCOL"
+			kill "$LISTENER_PID" 2>/dev/null
+			wait "$LISTENER_PID"
+			check_result 1
+			continue
+		fi
+
+		wait "$LISTENER_PID"
+		LISTENER_EXIT_CODE=$?
+
+		if [ "$LISTENER_EXIT_CODE" -eq 0 ]; then
+			echo "Rcv test OK for $test_name, $PROTOCOL"
+			check_result 0
+		else
+			echo "Rcv test FAILED for $test_name, $PROTOCOL"
+			check_result 1
+		fi
+	done
+done
+
+if [ "$FAILED_TESTS" -ne 0 ]; then
+	echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed"
+	exit ${KSFT_FAIL}
+else
+	echo "OK - All $TOTAL_TESTS tests passed"
+	exit ${KSFT_PASS}
+fi
diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
index 2d442cdab11e..8b414d0edada 100755
--- a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
+++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
@@ -1,29 +1,114 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-# Check FDB default-remote handling across "ip link set".
+ALL_TESTS="
+	test_set_remote
+	test_change_mc_remote
+"
+source lib.sh
 
 check_remotes()
 {
 	local what=$1; shift
 	local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l)
 
-	echo -ne "expected two remotes after $what\t"
-	if [[ $N != 2 ]]; then
-		echo "[FAIL]"
-		EXIT_STATUS=1
+	((N == 2))
+	check_err $? "expected 2 remotes after $what, got $N"
+}
+
+# Check FDB default-remote handling across "ip link set".
+test_set_remote()
+{
+	RET=0
+
+	adf_ip_link_add vx up type vxlan id 2000 dstport 4789
+	bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
+	bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
+	check_remotes "fdb append"
+
+	ip link set dev vx type vxlan remote 192.0.2.30
+	check_remotes "link set"
+
+	log_test 'FDB default-remote handling across "ip link set"'
+}
+
+fmt_remote()
+{
+	local addr=$1; shift
+
+	if [[ $addr == 224.* ]]; then
+		echo "group $addr"
 	else
-		echo "[ OK ]"
+		echo "remote $addr"
 	fi
 }
 
-ip link add name vx up type vxlan id 2000 dstport 4789
-bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
-bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
-check_remotes "fdb append"
+change_remote()
+{
+	local remote=$1; shift
+
+	ip link set dev vx type vxlan $(fmt_remote $remote) dev v1
+}
+
+check_membership()
+{
+	local check_vec=("$@")
+
+	local memberships
+	memberships=$(
+	    netstat -n --groups |
+		sed -n '/^v1\b/p' |
+		grep -o '[^ ]*$'
+	)
+	check_err $? "Couldn't obtain group memberships"
+
+	local item
+	for item in "${check_vec[@]}"; do
+		eval "local $item"
+		echo "$memberships" | grep -q "\b$group\b"
+		check_err_fail $fail $? "$group is_ex reported in IGMP query response"
+	done
+}
+
+test_change_mc_remote()
+{
+	check_command netstat || return
+
+	adf_ip_link_add v1 up type veth peer name v2
+	adf_ip_link_set_up v2
+
+	RET=0
+
+	adf_ip_link_add vx up type vxlan dstport 4789 \
+		local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000
+
+	check_membership "group=224.1.1.1 fail=0" \
+			 "group=224.1.1.2 fail=1" \
+			 "group=224.1.1.3 fail=1"
+
+	log_test "MC group report after VXLAN creation"
+
+	RET=0
+
+	change_remote 224.1.1.2
+	check_membership "group=224.1.1.1 fail=1" \
+			 "group=224.1.1.2 fail=0" \
+			 "group=224.1.1.3 fail=1"
+
+	log_test "MC group report after changing VXLAN remote MC->MC"
+
+	RET=0
+
+	change_remote 192.0.2.2
+	check_membership "group=224.1.1.1 fail=1" \
+			 "group=224.1.1.2 fail=1" \
+			 "group=224.1.1.3 fail=1"
+
+	log_test "MC group report after changing VXLAN remote MC->UC"
+}
+
+trap defer_scopes_cleanup EXIT
 
-ip link set dev vx type vxlan remote 192.0.2.30
-check_remotes "link set"
+tests_run
 
-ip link del dev vx
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/test_vxlan_nh.sh b/tools/testing/selftests/net/test_vxlan_nh.sh
new file mode 100755
index 000000000000..20f3369f776b
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_nh.sh
@@ -0,0 +1,223 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+TESTS="
+	basic_tx_ipv4
+	basic_tx_ipv6
+	learning
+	proxy_ipv4
+	proxy_ipv6
+"
+VERBOSE=0
+
+################################################################################
+# Utilities
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: $cmd"
+		stderr=
+	fi
+
+	out=$(eval "$cmd" "$stderr")
+	rc=$?
+	if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	return $rc
+}
+
+################################################################################
+# Cleanup
+
+exit_cleanup_all()
+{
+	cleanup_all_ns
+	exit "${EXIT_STATUS}"
+}
+
+################################################################################
+# Tests
+
+nh_stats_get()
+{
+	ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]"
+}
+
+tc_stats_get()
+{
+	tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1"
+}
+
+basic_tx_common()
+{
+	local af_str=$1; shift
+	local proto=$1; shift
+	local local_addr=$1; shift
+	local plen=$1; shift
+	local remote_addr=$1; shift
+
+	RET=0
+
+	# Test basic Tx functionality. Check that stats are incremented on
+	# both the FDB nexthop group and the egress device.
+
+	run_cmd "ip -n $ns1 link add name dummy1 up type dummy"
+	run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1"
+	run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact"
+	run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass"
+
+	run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
+
+	run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
+	run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+	run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789"
+	run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
+
+	run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q"
+
+	busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null
+	check_err $? "FDB nexthop group stats did not increase"
+
+	busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null
+	check_err $? "tc filter stats did not increase"
+
+	log_test "VXLAN FDB nexthop: $af_str basic Tx"
+}
+
+basic_tx_ipv4()
+{
+	basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2
+}
+
+basic_tx_ipv6()
+{
+	basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2
+}
+
+learning()
+{
+	RET=0
+
+	# When learning is enabled on the VXLAN device, an incoming packet
+	# might try to refresh an FDB entry that points to an FDB nexthop group
+	# instead of an ordinary remote destination. Check that the kernel does
+	# not crash in this situation.
+
+	run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo"
+	run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo"
+
+	run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb"
+	run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+	run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass"
+	run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning"
+
+	run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020"
+	run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10"
+
+	run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q"
+
+	log_test "VXLAN FDB nexthop: learning"
+}
+
+proxy_common()
+{
+	local af_str=$1; shift
+	local local_addr=$1; shift
+	local plen=$1; shift
+	local remote_addr=$1; shift
+	local neigh_addr=$1; shift
+	local ping_cmd=$1; shift
+
+	RET=0
+
+	# When the "proxy" option is enabled on the VXLAN device, the device
+	# will suppress ARP requests and IPv6 Neighbor Solicitation messages if
+	# it is able to reply on behalf of the remote host. That is, if a
+	# matching and valid neighbor entry is configured on the VXLAN device
+	# whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The
+	# FDB entry for the neighbor's MAC address might point to an FDB
+	# nexthop group instead of an ordinary remote destination. Check that
+	# the kernel does not crash in this situation.
+
+	run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
+
+	run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
+	run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+	run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy"
+
+	run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0"
+
+	run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
+
+	run_cmd "ip netns exec $ns1 $ping_cmd"
+
+	log_test "VXLAN FDB nexthop: $af_str proxy"
+}
+
+proxy_ipv4()
+{
+	proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \
+		"arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3"
+}
+
+proxy_ipv6()
+{
+	proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \
+		"ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+        -p          Pause on fail
+        -v          Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+while getopts ":t:pvh" opt; do
+	case $opt in
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		v) VERBOSE=$((VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+require_command mausezahn
+require_command arping
+require_command ndisc6
+require_command jq
+
+if ! ip nexthop help 2>&1 | grep -q "stats"; then
+	echo "SKIP: iproute2 ip too old, missing nexthop stats support"
+	exit "$ksft_skip"
+fi
+
+trap exit_cleanup_all EXIT
+
+for t in $TESTS
+do
+	setup_ns ns1; $t; cleanup_all_ns;
+done
diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
index 6127a78ee988..8deacc565afa 100755
--- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -146,18 +146,17 @@ run_cmd()
 }
 
 check_hv_connectivity() {
-	ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
-	sleep 1
-	ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
+	slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
+	slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
 
 	return $?
 }
 
 check_vm_connectivity() {
-	run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
+	slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
 	log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)"
 
-	run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
+	slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
 	log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)"
 }
 
diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c
new file mode 100644
index 000000000000..3b1ee2d3d417
--- /dev/null
+++ b/tools/testing/selftests/net/tfo.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <errno.h>
+
+static int cfg_server;
+static int cfg_client;
+static int cfg_port = 8000;
+static struct sockaddr_in6 cfg_addr;
+static char *cfg_outfile;
+
+static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
+{
+	int ret;
+
+	sin6->sin6_family = AF_INET6;
+	sin6->sin6_port = htons(port);
+
+	ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
+	if (ret != 1) {
+		/* fallback to plain IPv4 */
+		ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
+		if (ret != 1)
+			return -1;
+
+		/* add ::ffff prefix */
+		sin6->sin6_addr.s6_addr32[0] = 0;
+		sin6->sin6_addr.s6_addr32[1] = 0;
+		sin6->sin6_addr.s6_addr16[4] = 0;
+		sin6->sin6_addr.s6_addr16[5] = 0xffff;
+	}
+
+	return 0;
+}
+
+static void run_server(void)
+{
+	unsigned long qlen = 32;
+	int fd, opt, connfd;
+	socklen_t len;
+	char buf[64];
+	FILE *outfile;
+
+	outfile = fopen(cfg_outfile, "w");
+	if (!outfile)
+		error(1, errno, "fopen() outfile");
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket()");
+
+	opt = 1;
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0)
+		error(1, errno, "setsockopt(SO_REUSEADDR)");
+
+	if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) < 0)
+		error(1, errno, "setsockopt(TCP_FASTOPEN)");
+
+	if (bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)) < 0)
+		error(1, errno, "bind()");
+
+	if (listen(fd, 5) < 0)
+		error(1, errno, "listen()");
+
+	len = sizeof(cfg_addr);
+	connfd = accept(fd, (struct sockaddr *)&cfg_addr, &len);
+	if (connfd < 0)
+		error(1, errno, "accept()");
+
+	len = sizeof(opt);
+	if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0)
+		error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)");
+
+	if (read(connfd, buf, 64) < 0)
+		error(1, errno, "read()");
+
+	if (fprintf(outfile, "%d\n", opt) < 0)
+		error(1, errno, "fprintf()");
+
+	fclose(outfile);
+	close(connfd);
+	close(fd);
+}
+
+static void run_client(void)
+{
+	int fd, ret;
+	char *msg = "Hello, world!";
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket()");
+
+	ret = sendto(fd, msg, strlen(msg), MSG_FASTOPEN,
+		     (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+	if (ret < 0)
+		error(1, errno, "sendto()");
+
+	close(fd);
+}
+
+static void usage(const char *filepath)
+{
+	error(1, 0, "Usage: %s (-s|-c) -h<server_ip> -p<port> -o<outfile> ", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	struct sockaddr_in6 *addr6 = (void *) &cfg_addr;
+	char *addr = NULL;
+	int ret;
+	int c;
+
+	if (argc <= 1)
+		usage(argv[0]);
+
+	while ((c = getopt(argc, argv, "sch:p:o:")) != -1) {
+		switch (c) {
+		case 's':
+			if (cfg_client)
+				error(1, 0, "Pass one of -s or -c");
+			cfg_server = 1;
+			break;
+		case 'c':
+			if (cfg_server)
+				error(1, 0, "Pass one of -s or -c");
+			cfg_client = 1;
+			break;
+		case 'h':
+			addr = optarg;
+			break;
+		case 'p':
+			cfg_port = strtoul(optarg, NULL, 0);
+			break;
+		case 'o':
+			cfg_outfile = strdup(optarg);
+			if (!cfg_outfile)
+				error(1, 0, "outfile invalid");
+			break;
+		}
+	}
+
+	if (cfg_server && addr)
+		error(1, 0, "Server cannot have -h specified");
+
+	memset(addr6, 0, sizeof(*addr6));
+	addr6->sin6_family = AF_INET6;
+	addr6->sin6_port = htons(cfg_port);
+	addr6->sin6_addr = in6addr_any;
+	if (addr) {
+		ret = parse_address(addr, cfg_port, addr6);
+		if (ret)
+			error(1, 0, "Client address parse error: %s", addr);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	if (cfg_server)
+		run_server();
+	else if (cfg_client)
+		run_client();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh
new file mode 100755
index 000000000000..f116f888b794
--- /dev/null
+++ b/tools/testing/selftests/net/tfo_passive.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+source lib.sh
+
+NSIM_SV_ID=$((256 + RANDOM % 256))
+NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID
+NSIM_CL_ID=$((512 + RANDOM % 256))
+NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+SERVER_IP=192.168.1.1
+CLIENT_IP=192.168.1.2
+SERVER_PORT=48675
+
+setup_ns()
+{
+	set -e
+	ip netns add nssv
+	ip netns add nscl
+
+	NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \
+		-path $NSIM_SV_SYS/net -exec basename {} \;)
+	NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \
+		-path $NSIM_CL_SYS/net -exec basename {} \;)
+
+	ip link set $NSIM_SV_NAME netns nssv
+	ip link set $NSIM_CL_NAME netns nscl
+
+	ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME
+	ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME
+
+	ip netns exec nssv ip link set dev $NSIM_SV_NAME up
+	ip netns exec nscl ip link set dev $NSIM_CL_NAME up
+
+	# Enable passive TFO
+	ip netns exec nssv sysctl -w net.ipv4.tcp_fastopen=519 > /dev/null
+
+	set +e
+}
+
+cleanup_ns()
+{
+	ip netns del nscl
+	ip netns del nssv
+}
+
+###
+### Code start
+###
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_SV_FD=$((256 + RANDOM % 256))
+exec {NSIM_SV_FD}</var/run/netns/nssv
+NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex)
+
+NSIM_CL_FD=$((256 + RANDOM % 256))
+exec {NSIM_CL_FD}</var/run/netns/nscl
+NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex)
+
+echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \
+     $NSIM_DEV_SYS_LINK
+
+if [ $? -ne 0 ]; then
+	echo "linking netdevsim1 with netdevsim2 should succeed"
+	cleanup_ns
+	exit 1
+fi
+
+out_file=$(mktemp)
+
+timeout -k 1s 30s ip netns exec nssv ./tfo        \
+				-s                \
+				-p ${SERVER_PORT} \
+				-o ${out_file}&
+server_pid="$!"
+
+wait_local_port_listen nssv ${SERVER_PORT} tcp
+
+ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT}
+client_exit_status="$?"
+
+wait "$server_pid"
+server_exit_status="$?"
+
+res=$(cat $out_file)
+rm $out_file
+
+if [ "$res" = "0" ]; then
+	echo "got invalid NAPI ID from passive TFO socket"
+	cleanup_ns
+	exit 1
+fi
+
+if [ "$client_exit_status" -ne 0 ] || [ "$server_exit_status" -ne 0 ]; then
+	# Note: timeout(1) exits with 124 if it timed out
+	echo "client exited with ${client_exit_status}"
+	echo "server exited with ${server_exit_status}"
+	cleanup_ns
+	exit 1
+fi
+
+echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit 0
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 9a85f93c33d8..9e2ccea13d70 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -21,7 +21,7 @@
 #include <sys/socket.h>
 #include <sys/stat.h>
 
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
 
 #define TLS_PAYLOAD_MAX_LEN 16384
 #define SOL_TLS 282
@@ -181,13 +181,12 @@ static int tls_send_cmsg(int fd, unsigned char record_type,
 	return sendmsg(fd, &msg, flags);
 }
 
-static int tls_recv_cmsg(struct __test_metadata *_metadata,
-			 int fd, unsigned char record_type,
-			 void *data, size_t len, int flags)
+static int __tls_recv_cmsg(struct __test_metadata *_metadata,
+			   int fd, unsigned char *ctype,
+			   void *data, size_t len, int flags)
 {
 	char cbuf[CMSG_SPACE(sizeof(char))];
 	struct cmsghdr *cmsg;
-	unsigned char ctype;
 	struct msghdr msg;
 	struct iovec vec;
 	int n;
@@ -206,7 +205,20 @@ static int tls_recv_cmsg(struct __test_metadata *_metadata,
 	EXPECT_NE(cmsg, NULL);
 	EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
 	EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
-	ctype = *((unsigned char *)CMSG_DATA(cmsg));
+	if (ctype)
+		*ctype = *((unsigned char *)CMSG_DATA(cmsg));
+
+	return n;
+}
+
+static int tls_recv_cmsg(struct __test_metadata *_metadata,
+			 int fd, unsigned char record_type,
+			 void *data, size_t len, int flags)
+{
+	unsigned char ctype;
+	int n;
+
+	n = __tls_recv_cmsg(_metadata, fd, &ctype, data, len, flags);
 	EXPECT_EQ(ctype, record_type);
 
 	return n;
@@ -427,6 +439,8 @@ TEST_F(tls, sendfile)
 	EXPECT_GE(filefd, 0);
 	fstat(filefd, &st);
 	EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+
+	close(filefd);
 }
 
 TEST_F(tls, send_then_sendfile)
@@ -448,6 +462,9 @@ TEST_F(tls, send_then_sendfile)
 
 	EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
 	EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size);
+
+	free(buf);
+	close(filefd);
 }
 
 static void chunked_sendfile(struct __test_metadata *_metadata,
@@ -547,6 +564,40 @@ TEST_F(tls, msg_more)
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 }
 
+TEST_F(tls, cmsg_msg_more)
+{
+	char *test_str =  "test_read";
+	char record_type = 100;
+	int send_len = 10;
+
+	/* we don't allow MSG_MORE with non-DATA records */
+	EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len,
+				MSG_MORE), -1);
+	EXPECT_EQ(errno, EINVAL);
+}
+
+TEST_F(tls, msg_more_then_cmsg)
+{
+	char *test_str = "test_read";
+	char record_type = 100;
+	int send_len = 10;
+	char buf[10 * 2];
+	int ret;
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
+
+	ret = tls_send_cmsg(self->fd, record_type, test_str, send_len, 0);
+	EXPECT_EQ(ret, send_len);
+
+	/* initial DATA record didn't get merged with the non-DATA record */
+	EXPECT_EQ(recv(self->cfd, buf, send_len * 2, 0), send_len);
+
+	EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+				buf, sizeof(buf), MSG_WAITALL),
+		  send_len);
+}
+
 TEST_F(tls, msg_more_unsent)
 {
 	char const *test_str = "test_read";
@@ -895,6 +946,37 @@ TEST_F(tls, peek_and_splice)
 	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
 }
 
+#define MAX_FRAGS 48
+TEST_F(tls, splice_short)
+{
+	struct iovec sendchar_iov;
+	char read_buf[0x10000];
+	char sendbuf[0x100];
+	char sendchar = 'S';
+	int pipefds[2];
+	int i;
+
+	sendchar_iov.iov_base = &sendchar;
+	sendchar_iov.iov_len = 1;
+
+	memset(sendbuf, 's', sizeof(sendbuf));
+
+	ASSERT_GE(pipe2(pipefds, O_NONBLOCK), 0);
+	ASSERT_GE(fcntl(pipefds[0], F_SETPIPE_SZ, (MAX_FRAGS + 1) * 0x1000), 0);
+
+	for (i = 0; i < MAX_FRAGS; i++)
+		ASSERT_GE(vmsplice(pipefds[1], &sendchar_iov, 1, 0), 0);
+
+	ASSERT_EQ(write(pipefds[1], sendbuf, sizeof(sendbuf)), sizeof(sendbuf));
+
+	EXPECT_EQ(splice(pipefds[0], NULL, self->fd, NULL, MAX_FRAGS + 0x1000, 0),
+		  MAX_FRAGS + sizeof(sendbuf));
+	EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), 0), MAX_FRAGS + sizeof(sendbuf));
+	EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), MSG_DONTWAIT), -1);
+	EXPECT_EQ(errno, EAGAIN);
+}
+#undef MAX_FRAGS
+
 TEST_F(tls, recvmsg_single)
 {
 	char const *test_str = "test_recvmsg_single";
@@ -1753,6 +1835,42 @@ TEST_F(tls_basic, rekey_tx)
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 }
 
+TEST_F(tls_basic, disconnect)
+{
+	char const *test_str = "test_message";
+	int send_len = strlen(test_str) + 1;
+	struct tls_crypto_info_keys key;
+	struct sockaddr_in addr;
+	char buf[20];
+	int ret;
+
+	if (self->notls)
+		return;
+
+	tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+			     &key, 0);
+
+	ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &key, key.len);
+	ASSERT_EQ(ret, 0);
+
+	/* Pre-queue the data so that setsockopt parses it but doesn't
+	 * dequeue it from the TCP socket. recvmsg would dequeue.
+	 */
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+
+	ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &key, key.len);
+	ASSERT_EQ(ret, 0);
+
+	addr.sin_family = AF_UNSPEC;
+	addr.sin_addr.s_addr = htonl(INADDR_ANY);
+	addr.sin_port = 0;
+	ret = connect(self->cfd, &addr, sizeof(addr));
+	EXPECT_EQ(ret, -1);
+	EXPECT_EQ(errno, EOPNOTSUPP);
+
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+}
+
 TEST_F(tls, rekey)
 {
 	char const *test_str_1 = "test_message_before_rekey";
@@ -2128,6 +2246,284 @@ TEST_F(tls, rekey_poll_delay)
 	}
 }
 
+struct raw_rec {
+	unsigned int plain_len;
+	unsigned char plain_data[100];
+	unsigned int cipher_len;
+	unsigned char cipher_data[128];
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: 'Hello world' */
+static const struct raw_rec id0_data_l11 = {
+	.plain_len = 11,
+	.plain_data = {
+		0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+		0x72, 0x6c, 0x64,
+	},
+	.cipher_len = 40,
+	.cipher_data = {
+		0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0xa2, 0x33,
+		0xde, 0x8d, 0x94, 0xf0, 0x29, 0x6c, 0xb1, 0xaf,
+		0x6a, 0x75, 0xb2, 0x93, 0xad, 0x45, 0xd5, 0xfd,
+		0x03, 0x51, 0x57, 0x8f, 0xf9, 0xcc, 0x3b, 0x42,
+	},
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:0, plaintext: '' */
+static const struct raw_rec id0_ctrl_l0 = {
+	.plain_len = 0,
+	.plain_data = {
+	},
+	.cipher_len = 29,
+	.cipher_data = {
+		0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x38, 0x7b,
+		0xa6, 0x1c, 0xdd, 0xa7, 0x19, 0x33, 0xab, 0xae,
+		0x88, 0xe1, 0xd2, 0x08, 0x4f,
+	},
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: '' */
+static const struct raw_rec id0_data_l0 = {
+	.plain_len = 0,
+	.plain_data = {
+	},
+	.cipher_len = 29,
+	.cipher_data = {
+		0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0xc5, 0x37, 0x90,
+		0x70, 0x45, 0x89, 0xfb, 0x5c, 0xc7, 0x89, 0x03,
+		0x68, 0x80, 0xd3, 0xd8, 0xcc,
+	},
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: 'Hello world' */
+static const struct raw_rec id1_data_l11 = {
+	.plain_len = 11,
+	.plain_data = {
+		0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+		0x72, 0x6c, 0x64,
+	},
+	.cipher_len = 40,
+	.cipher_data = {
+		0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x01, 0x3a, 0x1a, 0x9c,
+		0xd0, 0xa8, 0x9a, 0xd6, 0x69, 0xd6, 0x1a, 0xe3,
+		0xb5, 0x1f, 0x0d, 0x2c, 0xe2, 0x97, 0x46, 0xff,
+		0x2b, 0xcc, 0x5a, 0xc4, 0xa3, 0xb9, 0xef, 0xba,
+	},
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:1, plaintext: '' */
+static const struct raw_rec id1_ctrl_l0 = {
+	.plain_len = 0,
+	.plain_data = {
+	},
+	.cipher_len = 29,
+	.cipher_data = {
+		0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x01, 0x3e, 0xf0, 0xfe,
+		0xee, 0xd9, 0xe2, 0x5d, 0xc7, 0x11, 0x4c, 0xe6,
+		0xb4, 0x7e, 0xef, 0x40, 0x2b,
+	},
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: '' */
+static const struct raw_rec id1_data_l0 = {
+	.plain_len = 0,
+	.plain_data = {
+	},
+	.cipher_len = 29,
+	.cipher_data = {
+		0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x01, 0xce, 0xfc, 0x86,
+		0xc8, 0xf0, 0x55, 0xf9, 0x47, 0x3f, 0x74, 0xdc,
+		0xc9, 0xbf, 0xfe, 0x5b, 0xb1,
+	},
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: 'Hello world' */
+static const struct raw_rec id2_ctrl_l11 = {
+	.plain_len = 11,
+	.plain_data = {
+		0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+		0x72, 0x6c, 0x64,
+	},
+	.cipher_len = 40,
+	.cipher_data = {
+		0x16, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19,
+		0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87,
+		0x2a, 0x04, 0x11, 0x3d, 0xf8, 0x64, 0x5f, 0x36,
+		0x8b, 0xa8, 0xee, 0x4c, 0x6d, 0x62, 0xa5, 0x00,
+	},
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: 'Hello world' */
+static const struct raw_rec id2_data_l11 = {
+	.plain_len = 11,
+	.plain_data = {
+		0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+		0x72, 0x6c, 0x64,
+	},
+	.cipher_len = 40,
+	.cipher_data = {
+		0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19,
+		0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87,
+		0x8e, 0xa1, 0xd0, 0xcd, 0x33, 0xb5, 0x86, 0x2b,
+		0x17, 0xf1, 0x52, 0x2a, 0x55, 0x62, 0x65, 0x11,
+	},
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: '' */
+static const struct raw_rec id2_ctrl_l0 = {
+	.plain_len = 0,
+	.plain_data = {
+	},
+	.cipher_len = 29,
+	.cipher_data = {
+		0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x02, 0xdc, 0x5c, 0x0e,
+		0x41, 0xdd, 0xba, 0xd3, 0xcc, 0xcf, 0x6d, 0xd9,
+		0x06, 0xdb, 0x79, 0xe5, 0x5d,
+	},
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: '' */
+static const struct raw_rec id2_data_l0 = {
+	.plain_len = 0,
+	.plain_data = {
+	},
+	.cipher_len = 29,
+	.cipher_data = {
+		0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x02, 0xc3, 0xca, 0x26,
+		0x22, 0xe4, 0x25, 0xfb, 0x5f, 0x6d, 0xbf, 0x83,
+		0x30, 0x48, 0x69, 0x1a, 0x47,
+	},
+};
+
+FIXTURE(zero_len)
+{
+	int fd, cfd;
+	bool notls;
+};
+
+FIXTURE_VARIANT(zero_len)
+{
+	const struct raw_rec *recs[4];
+	ssize_t recv_ret[4];
+};
+
+FIXTURE_VARIANT_ADD(zero_len, data_data_data)
+{
+	.recs = { &id0_data_l11, &id1_data_l11, &id2_data_l11, },
+	.recv_ret = { 33, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, data_0ctrl_data)
+{
+	.recs = { &id0_data_l11, &id1_ctrl_l0, &id2_data_l11, },
+	.recv_ret = { 11, 0, 11, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0data)
+{
+	.recs = { &id0_data_l0, &id1_data_l0, &id2_data_l0, },
+	.recv_ret = { -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_ctrl)
+{
+	.recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l11, },
+	.recv_ret = { 0, 11, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0ctrl)
+{
+	.recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l0, },
+	.recv_ret = { 0, 0, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0ctrl_0ctrl_0ctrl)
+{
+	.recs = { &id0_ctrl_l0, &id1_ctrl_l0, &id2_ctrl_l0, },
+	.recv_ret = { 0, 0, 0, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_data)
+{
+	.recs = { &id0_data_l0, &id1_data_l0, &id2_data_l11, },
+	.recv_ret = { 11, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, data_0data_0data)
+{
+	.recs = { &id0_data_l11, &id1_data_l0, &id2_data_l0, },
+	.recv_ret = { 11, -EAGAIN, },
+};
+
+FIXTURE_SETUP(zero_len)
+{
+	struct tls_crypto_info_keys tls12;
+	int ret;
+
+	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+			     &tls12, 0);
+
+	ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+	if (self->notls)
+		return;
+
+	/* Don't install keys on fd, we'll send raw records */
+	ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(zero_len)
+{
+	close(self->fd);
+	close(self->cfd);
+}
+
+TEST_F(zero_len, test)
+{
+	const struct raw_rec *const *rec;
+	unsigned char buf[128];
+	int rec_off;
+	int i;
+
+	for (i = 0; i < 4 && variant->recs[i]; i++)
+		EXPECT_EQ(send(self->fd, variant->recs[i]->cipher_data,
+			       variant->recs[i]->cipher_len, 0),
+			  variant->recs[i]->cipher_len);
+
+	rec = &variant->recs[0];
+	rec_off = 0;
+	for (i = 0; i < 4; i++) {
+		int j, ret;
+
+		ret = variant->recv_ret[i] >= 0 ? variant->recv_ret[i] : -1;
+		EXPECT_EQ(__tls_recv_cmsg(_metadata, self->cfd, NULL,
+					  buf, sizeof(buf), MSG_DONTWAIT), ret);
+		if (ret == -1)
+			EXPECT_EQ(errno, -variant->recv_ret[i]);
+		if (variant->recv_ret[i] == -EAGAIN)
+			break;
+
+		for (j = 0; j < ret; j++) {
+			while (rec_off == (*rec)->plain_len) {
+				rec++;
+				rec_off = 0;
+			}
+			EXPECT_EQ(buf[j], (*rec)->plain_data[rec_off]);
+			rec_off++;
+		}
+	}
+};
+
 FIXTURE(tls_err)
 {
 	int fd, cfd;
@@ -2390,10 +2786,10 @@ TEST_F(tls_err, epoll_partial_rec)
 TEST_F(tls_err, poll_partial_rec_async)
 {
 	struct pollfd pfd = { };
+	char token = '\0';
 	ssize_t rec_len;
 	char rec[256];
 	char buf[128];
-	char token;
 	int p[2];
 	int ret;
 
@@ -2444,6 +2840,163 @@ TEST_F(tls_err, poll_partial_rec_async)
 	}
 }
 
+/* Use OOB+large send to trigger copy mode due to memory pressure.
+ * OOB causes a short read.
+ */
+TEST_F(tls_err, oob_pressure)
+{
+	char buf[1<<16];
+	int i;
+
+	memrnd(buf, sizeof(buf));
+
+	EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5);
+	EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf));
+	for (i = 0; i < 64; i++)
+		EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5);
+}
+
+/*
+ * Parse a stream of TLS records and ensure that each record respects
+ * the specified @max_payload_len.
+ */
+static size_t parse_tls_records(struct __test_metadata *_metadata,
+				const __u8 *rx_buf, int rx_len, int overhead,
+				__u16 max_payload_len)
+{
+	const __u8 *rec = rx_buf;
+	size_t total_plaintext_rx = 0;
+	const __u8 rec_header_len = 5;
+
+	while (rec < rx_buf + rx_len) {
+		__u16 record_payload_len;
+		__u16 plaintext_len;
+
+		/* Sanity check that it's a TLS header for application data */
+		ASSERT_EQ(rec[0], 23);
+		ASSERT_EQ(rec[1], 0x3);
+		ASSERT_EQ(rec[2], 0x3);
+
+		memcpy(&record_payload_len, rec + 3, 2);
+		record_payload_len = ntohs(record_payload_len);
+		ASSERT_GE(record_payload_len, overhead);
+
+		plaintext_len = record_payload_len - overhead;
+		total_plaintext_rx += plaintext_len;
+
+		/* Plaintext must not exceed the specified limit */
+		ASSERT_LE(plaintext_len, max_payload_len);
+		rec += rec_header_len + record_payload_len;
+	}
+
+	return total_plaintext_rx;
+}
+
+TEST(tls_12_tx_max_payload_len)
+{
+	struct tls_crypto_info_keys tls12;
+	int cfd, ret, fd, overhead;
+	size_t total_plaintext_rx = 0;
+	__u8 tx[1024], rx[2000];
+	__u16 limit = 128;
+	__u16 opt = 0;
+	unsigned int optlen = sizeof(opt);
+	bool notls;
+
+	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+			     &tls12, 0);
+
+	ulp_sock_pair(_metadata, &fd, &cfd, &notls);
+
+	if (notls)
+		exit(KSFT_SKIP);
+
+	/* Don't install keys on fd, we'll parse raw records */
+	ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit,
+			 sizeof(limit));
+	ASSERT_EQ(ret, 0);
+
+	ret = getsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &opt, &optlen);
+	EXPECT_EQ(ret, 0);
+	EXPECT_EQ(limit, opt);
+	EXPECT_EQ(optlen, sizeof(limit));
+
+	memset(tx, 0, sizeof(tx));
+	ASSERT_EQ(send(cfd, tx, sizeof(tx), 0), sizeof(tx));
+	close(cfd);
+
+	ret = recv(fd, rx, sizeof(rx), 0);
+
+	/*
+	 * 16B tag + 8B IV -- record header (5B) is not counted but we'll
+	 * need it to walk the record stream
+	 */
+	overhead = 16 + 8;
+	total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead,
+					       limit);
+
+	ASSERT_EQ(total_plaintext_rx, sizeof(tx));
+	close(fd);
+}
+
+TEST(tls_12_tx_max_payload_len_open_rec)
+{
+	struct tls_crypto_info_keys tls12;
+	int cfd, ret, fd, overhead;
+	size_t total_plaintext_rx = 0;
+	__u8 tx[1024], rx[2000];
+	__u16 tx_partial = 256;
+	__u16 og_limit = 512, limit = 128;
+	bool notls;
+
+	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+			     &tls12, 0);
+
+	ulp_sock_pair(_metadata, &fd, &cfd, &notls);
+
+	if (notls)
+		exit(KSFT_SKIP);
+
+	/* Don't install keys on fd, we'll parse raw records */
+	ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &og_limit,
+			 sizeof(og_limit));
+	ASSERT_EQ(ret, 0);
+
+	memset(tx, 0, sizeof(tx));
+	ASSERT_EQ(send(cfd, tx, tx_partial, MSG_MORE), tx_partial);
+
+	/*
+	 * Changing the payload limit with a pending open record should
+	 * not be allowed.
+	 */
+	ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit,
+			 sizeof(limit));
+	ASSERT_EQ(ret, -1);
+	ASSERT_EQ(errno, EBUSY);
+
+	ASSERT_EQ(send(cfd, tx + tx_partial, sizeof(tx) - tx_partial, MSG_EOR),
+		  sizeof(tx) - tx_partial);
+	close(cfd);
+
+	ret = recv(fd, rx, sizeof(rx), 0);
+
+	/*
+	 * 16B tag + 8B IV -- record header (5B) is not counted but we'll
+	 * need it to walk the record stream
+	 */
+	overhead = 16 + 8;
+	total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead,
+					       og_limit);
+	ASSERT_EQ(total_plaintext_rx, sizeof(tx));
+	close(fd);
+}
+
 TEST(non_established) {
 	struct tls12_crypto_info_aes_gcm_256 tls12;
 	struct sockaddr_in addr;
@@ -2672,6 +3225,75 @@ TEST(prequeue) {
 	close(cfd);
 }
 
+TEST(data_steal) {
+	struct tls_crypto_info_keys tls;
+	char buf[20000], buf2[20000];
+	struct sockaddr_in addr;
+	int sfd, cfd, ret, fd;
+	int pid, status;
+	socklen_t len;
+
+	len = sizeof(addr);
+	memrnd(buf, sizeof(buf));
+
+	tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls, 0);
+
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = htonl(INADDR_ANY);
+	addr.sin_port = 0;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	sfd = socket(AF_INET, SOCK_STREAM, 0);
+
+	ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0);
+	ASSERT_EQ(listen(sfd, 10), 0);
+	ASSERT_EQ(getsockname(sfd, &addr, &len), 0);
+	ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0);
+	ASSERT_GE(cfd = accept(sfd, &addr, &len), 0);
+	close(sfd);
+
+	ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+	if (ret) {
+		ASSERT_EQ(errno, ENOENT);
+		SKIP(return, "no TLS support");
+	}
+	ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0);
+
+	/* Spawn a child and get it into the read wait path of the underlying
+	 * TCP socket (before kernel .recvmsg is replaced with the TLS one).
+	 */
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (!pid) {
+		EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2 + 1, MSG_WAITALL),
+			  sizeof(buf) / 2 + 1);
+		exit(!__test_passed(_metadata));
+	}
+
+	/* Send a sync byte and poll until it's consumed to ensure
+	 * the child is in recv() before we proceed to install TLS.
+	 */
+	ASSERT_EQ(send(fd, buf, 1, 0), 1);
+	do {
+		usleep(500);
+	} while (recv(cfd, buf, 1, MSG_PEEK | MSG_DONTWAIT) == 1);
+	EXPECT_EQ(errno, EAGAIN);
+
+	ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0);
+	ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0);
+
+	EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf));
+	EXPECT_EQ(wait(&status), pid);
+	EXPECT_EQ(status, 0);
+	EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1);
+	/* Don't check errno, the error will be different depending
+	 * on what random bytes TLS interpreted as the record length.
+	 */
+
+	close(fd);
+	close(cfd);
+}
+
 static void __attribute__((constructor)) fips_check(void) {
 	int res;
 	FILE *f;
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
deleted file mode 100644
index 9ba03164d73a..000000000000
--- a/tools/testing/selftests/net/toeplitz.c
+++ /dev/null
@@ -1,589 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Toeplitz test
- *
- * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
- * 2. Compute the rx_hash in software based on the packet contents
- * 3. Compare the two
- *
- * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
- *
- * If '-C $rx_irq_cpu_list' is given, also
- *
- * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
- * 5. Compute the rxqueue that RSS would select based on this rx_hash
- * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
- * 7. Compare the cpus from 4 and 6
- *
- * Else if '-r $rps_bitmap' is given, also
- *
- * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
- * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
- * 6. Compare the cpus from 4 and 5
- */
-
-#define _GNU_SOURCE
-
-#include <arpa/inet.h>
-#include <errno.h>
-#include <error.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <linux/filter.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <net/if.h>
-#include <netdb.h>
-#include <netinet/ip.h>
-#include <netinet/ip6.h>
-#include <netinet/tcp.h>
-#include <netinet/udp.h>
-#include <poll.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "../kselftest.h"
-
-#define TOEPLITZ_KEY_MIN_LEN	40
-#define TOEPLITZ_KEY_MAX_LEN	60
-
-#define TOEPLITZ_STR_LEN(K)	(((K) * 3) - 1)	/* hex encoded: AA:BB:CC:...:ZZ */
-#define TOEPLITZ_STR_MIN_LEN	TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
-#define TOEPLITZ_STR_MAX_LEN	TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
-
-#define FOUR_TUPLE_MAX_LEN	((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
-
-#define RSS_MAX_CPUS (1 << 16)	/* real constraint is PACKET_FANOUT_MAX */
-
-#define RPS_MAX_CPUS 16UL	/* must be a power of 2 */
-
-/* configuration options (cmdline arguments) */
-static uint16_t cfg_dport =	8000;
-static int cfg_family =		AF_INET6;
-static char *cfg_ifname =	"eth0";
-static int cfg_num_queues;
-static int cfg_num_rps_cpus;
-static bool cfg_sink;
-static int cfg_type =		SOCK_STREAM;
-static int cfg_timeout_msec =	1000;
-static bool cfg_verbose;
-
-/* global vars */
-static int num_cpus;
-static int ring_block_nr;
-static int ring_block_sz;
-
-/* stats */
-static int frames_received;
-static int frames_nohash;
-static int frames_error;
-
-#define log_verbose(args...)	do { if (cfg_verbose) fprintf(stderr, args); } while (0)
-
-/* tpacket ring */
-struct ring_state {
-	int fd;
-	char *mmap;
-	int idx;
-	int cpu;
-};
-
-static unsigned int rx_irq_cpus[RSS_MAX_CPUS];	/* map from rxq to cpu */
-static int rps_silo_to_cpu[RPS_MAX_CPUS];
-static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
-static struct ring_state rings[RSS_MAX_CPUS];
-
-static inline uint32_t toeplitz(const unsigned char *four_tuple,
-				const unsigned char *key)
-{
-	int i, bit, ret = 0;
-	uint32_t key32;
-
-	key32 = ntohl(*((uint32_t *)key));
-	key += 4;
-
-	for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
-		for (bit = 7; bit >= 0; bit--) {
-			if (four_tuple[i] & (1 << bit))
-				ret ^= key32;
-
-			key32 <<= 1;
-			key32 |= !!(key[0] & (1 << bit));
-		}
-		key++;
-	}
-
-	return ret;
-}
-
-/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
-static void verify_rss(uint32_t rx_hash, int cpu)
-{
-	int queue = rx_hash % cfg_num_queues;
-
-	log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
-	if (rx_irq_cpus[queue] != cpu) {
-		log_verbose(". error: rss cpu mismatch (%d)", cpu);
-		frames_error++;
-	}
-}
-
-static void verify_rps(uint64_t rx_hash, int cpu)
-{
-	int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
-
-	log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
-	if (rps_silo_to_cpu[silo] != cpu) {
-		log_verbose(". error: rps cpu mismatch (%d)", cpu);
-		frames_error++;
-	}
-}
-
-static void log_rxhash(int cpu, uint32_t rx_hash,
-		       const char *addrs, int addr_len)
-{
-	char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
-	uint16_t *ports;
-
-	if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
-	    !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
-		error(1, 0, "address parse error");
-
-	ports = (void *)addrs + (addr_len * 2);
-	log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
-		    cpu, rx_hash, saddr, daddr,
-		    ntohs(ports[0]), ntohs(ports[1]));
-}
-
-/* Compare computed rxhash with rxhash received from tpacket_v3 */
-static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
-{
-	unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
-	uint32_t rx_hash_sw;
-	const char *addrs;
-	int addr_len;
-
-	if (cfg_family == AF_INET) {
-		addr_len = sizeof(struct in_addr);
-		addrs = pkt + offsetof(struct iphdr, saddr);
-	} else {
-		addr_len = sizeof(struct in6_addr);
-		addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
-	}
-
-	memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
-	rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
-
-	if (cfg_verbose)
-		log_rxhash(cpu, rx_hash, addrs, addr_len);
-
-	if (rx_hash != rx_hash_sw) {
-		log_verbose(" != expected 0x%x\n", rx_hash_sw);
-		frames_error++;
-		return;
-	}
-
-	log_verbose(" OK");
-	if (cfg_num_queues)
-		verify_rss(rx_hash, cpu);
-	else if (cfg_num_rps_cpus)
-		verify_rps(rx_hash, cpu);
-	log_verbose("\n");
-}
-
-static char *recv_frame(const struct ring_state *ring, char *frame)
-{
-	struct tpacket3_hdr *hdr = (void *)frame;
-
-	if (hdr->hv1.tp_rxhash)
-		verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
-			      ring->cpu);
-	else
-		frames_nohash++;
-
-	return frame + hdr->tp_next_offset;
-}
-
-/* A single TPACKET_V3 block can hold multiple frames */
-static bool recv_block(struct ring_state *ring)
-{
-	struct tpacket_block_desc *block;
-	char *frame;
-	int i;
-
-	block = (void *)(ring->mmap + ring->idx * ring_block_sz);
-	if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
-		return false;
-
-	frame = (char *)block;
-	frame += block->hdr.bh1.offset_to_first_pkt;
-
-	for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
-		frame = recv_frame(ring, frame);
-		frames_received++;
-	}
-
-	block->hdr.bh1.block_status = TP_STATUS_KERNEL;
-	ring->idx = (ring->idx + 1) % ring_block_nr;
-
-	return true;
-}
-
-/* simple test: sleep once unconditionally and then process all rings */
-static void process_rings(void)
-{
-	int i;
-
-	usleep(1000 * cfg_timeout_msec);
-
-	for (i = 0; i < num_cpus; i++)
-		do {} while (recv_block(&rings[i]));
-
-	fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
-		frames_received - frames_nohash - frames_error,
-		frames_nohash, frames_error);
-}
-
-static char *setup_ring(int fd)
-{
-	struct tpacket_req3 req3 = {0};
-	void *ring;
-
-	req3.tp_retire_blk_tov = cfg_timeout_msec / 8;
-	req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
-
-	req3.tp_frame_size = 2048;
-	req3.tp_frame_nr = 1 << 10;
-	req3.tp_block_nr = 16;
-
-	req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
-	req3.tp_block_size /= req3.tp_block_nr;
-
-	if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
-		error(1, errno, "setsockopt PACKET_RX_RING");
-
-	ring_block_sz = req3.tp_block_size;
-	ring_block_nr = req3.tp_block_nr;
-
-	ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
-		    PROT_READ | PROT_WRITE,
-		    MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
-	if (ring == MAP_FAILED)
-		error(1, 0, "mmap failed");
-
-	return ring;
-}
-
-static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
-{
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
-		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
-		BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, off_proto),
-		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
-		BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, off_dport),
-		BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
-		BPF_STMT(BPF_RET + BPF_K, 0),
-		BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
-	};
-	struct sock_fprog prog = {};
-
-	prog.filter = filter;
-	prog.len = ARRAY_SIZE(filter);
-	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
-		error(1, errno, "setsockopt filter");
-}
-
-/* filter on transport protocol and destination port */
-static void set_filter(int fd)
-{
-	const int off_dport = offsetof(struct tcphdr, dest);	/* same for udp */
-	uint8_t proto;
-
-	proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
-	if (cfg_family == AF_INET)
-		__set_filter(fd, offsetof(struct iphdr, protocol), proto,
-			     sizeof(struct iphdr) + off_dport);
-	else
-		__set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
-			     sizeof(struct ip6_hdr) + off_dport);
-}
-
-/* drop everything: used temporarily during setup */
-static void set_filter_null(int fd)
-{
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_RET + BPF_K, 0),
-	};
-	struct sock_fprog prog = {};
-
-	prog.filter = filter;
-	prog.len = ARRAY_SIZE(filter);
-	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
-		error(1, errno, "setsockopt filter");
-}
-
-static int create_ring(char **ring)
-{
-	struct fanout_args args = {
-		.id = 1,
-		.type_flags = PACKET_FANOUT_CPU,
-		.max_num_members = RSS_MAX_CPUS
-	};
-	struct sockaddr_ll ll = { 0 };
-	int fd, val;
-
-	fd = socket(PF_PACKET, SOCK_DGRAM, 0);
-	if (fd == -1)
-		error(1, errno, "socket creation failed");
-
-	val = TPACKET_V3;
-	if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
-		error(1, errno, "setsockopt PACKET_VERSION");
-	*ring = setup_ring(fd);
-
-	/* block packets until all rings are added to the fanout group:
-	 * else packets can arrive during setup and get misclassified
-	 */
-	set_filter_null(fd);
-
-	ll.sll_family = AF_PACKET;
-	ll.sll_ifindex = if_nametoindex(cfg_ifname);
-	ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
-						  htons(ETH_P_IPV6);
-	if (bind(fd, (void *)&ll, sizeof(ll)))
-		error(1, errno, "bind");
-
-	/* must come after bind: verifies all programs in group match */
-	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
-		/* on failure, retry using old API if that is sufficient:
-		 * it has a hard limit of 256 sockets, so only try if
-		 * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
-		 * in this API, the third argument is left implicit.
-		 */
-		if (cfg_num_queues || num_cpus > 256 ||
-		    setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
-			       &args, sizeof(uint32_t)))
-			error(1, errno, "setsockopt PACKET_FANOUT cpu");
-	}
-
-	return fd;
-}
-
-/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
-static int setup_sink(void)
-{
-	int fd, val;
-
-	fd = socket(cfg_family, cfg_type, 0);
-	if (fd == -1)
-		error(1, errno, "socket %d.%d", cfg_family, cfg_type);
-
-	val = 1 << 20;
-	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
-		error(1, errno, "setsockopt rcvbuf");
-
-	return fd;
-}
-
-static void setup_rings(void)
-{
-	int i;
-
-	for (i = 0; i < num_cpus; i++) {
-		rings[i].cpu = i;
-		rings[i].fd = create_ring(&rings[i].mmap);
-	}
-
-	/* accept packets once all rings in the fanout group are up */
-	for (i = 0; i < num_cpus; i++)
-		set_filter(rings[i].fd);
-}
-
-static void cleanup_rings(void)
-{
-	int i;
-
-	for (i = 0; i < num_cpus; i++) {
-		if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
-			error(1, errno, "munmap");
-		if (close(rings[i].fd))
-			error(1, errno, "close");
-	}
-}
-
-static void parse_cpulist(const char *arg)
-{
-	do {
-		rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
-
-		arg = strchr(arg, ',');
-		if (!arg)
-			break;
-		arg++;			// skip ','
-	} while (1);
-}
-
-static void show_cpulist(void)
-{
-	int i;
-
-	for (i = 0; i < cfg_num_queues; i++)
-		fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
-}
-
-static void show_silos(void)
-{
-	int i;
-
-	for (i = 0; i < cfg_num_rps_cpus; i++)
-		fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
-}
-
-static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
-{
-	int i, ret, off;
-
-	if (slen < TOEPLITZ_STR_MIN_LEN ||
-	    slen > TOEPLITZ_STR_MAX_LEN + 1)
-		error(1, 0, "invalid toeplitz key");
-
-	for (i = 0, off = 0; off < slen; i++, off += 3) {
-		ret = sscanf(str + off, "%hhx", &key[i]);
-		if (ret != 1)
-			error(1, 0, "key parse error at %d off %d len %d",
-			      i, off, slen);
-	}
-}
-
-static void parse_rps_bitmap(const char *arg)
-{
-	unsigned long bitmap;
-	int i;
-
-	bitmap = strtoul(arg, NULL, 0);
-
-	if (bitmap & ~(RPS_MAX_CPUS - 1))
-		error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
-		      bitmap, RPS_MAX_CPUS - 1);
-
-	for (i = 0; i < RPS_MAX_CPUS; i++)
-		if (bitmap & 1UL << i)
-			rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
-}
-
-static void parse_opts(int argc, char **argv)
-{
-	static struct option long_options[] = {
-	    {"dport",	required_argument, 0, 'd'},
-	    {"cpus",	required_argument, 0, 'C'},
-	    {"key",	required_argument, 0, 'k'},
-	    {"iface",	required_argument, 0, 'i'},
-	    {"ipv4",	no_argument, 0, '4'},
-	    {"ipv6",	no_argument, 0, '6'},
-	    {"sink",	no_argument, 0, 's'},
-	    {"tcp",	no_argument, 0, 't'},
-	    {"timeout",	required_argument, 0, 'T'},
-	    {"udp",	no_argument, 0, 'u'},
-	    {"verbose",	no_argument, 0, 'v'},
-	    {"rps",	required_argument, 0, 'r'},
-	    {0, 0, 0, 0}
-	};
-	bool have_toeplitz = false;
-	int index, c;
-
-	while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) {
-		switch (c) {
-		case '4':
-			cfg_family = AF_INET;
-			break;
-		case '6':
-			cfg_family = AF_INET6;
-			break;
-		case 'C':
-			parse_cpulist(optarg);
-			break;
-		case 'd':
-			cfg_dport = strtol(optarg, NULL, 0);
-			break;
-		case 'i':
-			cfg_ifname = optarg;
-			break;
-		case 'k':
-			parse_toeplitz_key(optarg, strlen(optarg),
-					   toeplitz_key);
-			have_toeplitz = true;
-			break;
-		case 'r':
-			parse_rps_bitmap(optarg);
-			break;
-		case 's':
-			cfg_sink = true;
-			break;
-		case 't':
-			cfg_type = SOCK_STREAM;
-			break;
-		case 'T':
-			cfg_timeout_msec = strtol(optarg, NULL, 0);
-			break;
-		case 'u':
-			cfg_type = SOCK_DGRAM;
-			break;
-		case 'v':
-			cfg_verbose = true;
-			break;
-
-		default:
-			error(1, 0, "unknown option %c", optopt);
-			break;
-		}
-	}
-
-	if (!have_toeplitz)
-		error(1, 0, "Must supply rss key ('-k')");
-
-	num_cpus = get_nprocs();
-	if (num_cpus > RSS_MAX_CPUS)
-		error(1, 0, "increase RSS_MAX_CPUS");
-
-	if (cfg_num_queues && cfg_num_rps_cpus)
-		error(1, 0,
-		      "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
-	if (cfg_verbose) {
-		show_cpulist();
-		show_silos();
-	}
-}
-
-int main(int argc, char **argv)
-{
-	const int min_tests = 10;
-	int fd_sink = -1;
-
-	parse_opts(argc, argv);
-
-	if (cfg_sink)
-		fd_sink = setup_sink();
-
-	setup_rings();
-	process_rings();
-	cleanup_rings();
-
-	if (cfg_sink && close(fd_sink))
-		error(1, errno, "close sink");
-
-	if (frames_received - frames_nohash < min_tests)
-		error(1, 0, "too few frames for verification");
-
-	return frames_error;
-}
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
deleted file mode 100755
index 8ff172f7bb1b..000000000000
--- a/tools/testing/selftests/net/toeplitz.sh
+++ /dev/null
@@ -1,199 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
-# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
-# ('-rps <rps_map>')
-#
-# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
-# which is a driver-specific encoding.
-#
-# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
-# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
-
-source setup_loopback.sh
-readonly SERVER_IP4="192.168.1.200/24"
-readonly SERVER_IP6="fda8::1/64"
-readonly SERVER_MAC="aa:00:00:00:00:02"
-
-readonly CLIENT_IP4="192.168.1.100/24"
-readonly CLIENT_IP6="fda8::2/64"
-readonly CLIENT_MAC="aa:00:00:00:00:01"
-
-PORT=8000
-KEY="$(</proc/sys/net/core/netdev_rss_key)"
-TEST_RSS=false
-RPS_MAP=""
-PROTO_FLAG=""
-IP_FLAG=""
-DEV="eth0"
-
-# Return the number of rxqs among which RSS is configured to spread packets.
-# This is determined by reading the RSS indirection table using ethtool.
-get_rss_cfg_num_rxqs() {
-	echo $(ethtool -x "${DEV}" |
-		grep -E [[:space:]]+[0-9]+:[[:space:]]+ |
-		cut -d: -f2- |
-		awk '{$1=$1};1' |
-		tr ' ' '\n' |
-		sort -u |
-		wc -l)
-}
-
-# Return a list of the receive irq handler cpus.
-# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
-# Reads /sys/kernel/irq/ in order, so algorithm depends on
-# irq_{rxq-0} < irq_{rxq-1}, etc.
-get_rx_irq_cpus() {
-	CPUS=""
-	# sort so that irq 2 is read before irq 10
-	SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
-	# Consider only as many queues as RSS actually uses. We assume that
-	# if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
-	RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
-	RXQ_COUNT=0
-
-	for i in ${SORTED_IRQS}
-	do
-		[[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
-		# lookup relevant IRQs by action name
-		[[ -e "$i/actions" ]] || continue
-		cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
-		irqname=$(<"$i/actions")
-
-		# does the IRQ get called
-		irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
-		[[ -n "${irqcount}" ]] || continue
-
-		# lookup CPU
-		irq=$(basename "$i")
-		cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
-
-		if [[ -z "${CPUS}" ]]; then
-			CPUS="${cpu}"
-		else
-			CPUS="${CPUS},${cpu}"
-		fi
-		RXQ_COUNT=$((RXQ_COUNT+1))
-	done
-
-	echo "${CPUS}"
-}
-
-get_disable_rfs_cmd() {
-	echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
-}
-
-get_set_rps_bitmaps_cmd() {
-	CMD=""
-	for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
-	do
-		CMD="${CMD} echo $1 > ${i};"
-	done
-
-	echo "${CMD}"
-}
-
-get_disable_rps_cmd() {
-	echo "$(get_set_rps_bitmaps_cmd 0)"
-}
-
-die() {
-	echo "$1"
-	exit 1
-}
-
-check_nic_rxhash_enabled() {
-	local -r pattern="receive-hashing:\ on"
-
-	ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
-}
-
-parse_opts() {
-	local prog=$0
-	shift 1
-
-	while [[ "$1" =~ "-" ]]; do
-		if [[ "$1" = "-irq_prefix" ]]; then
-			shift
-			IRQ_PATTERN="^$1-[0-9]*$"
-		elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
-			PROTO_FLAG="$1"
-		elif [[ "$1" = "-4" ]]; then
-			IP_FLAG="$1"
-			SERVER_IP="${SERVER_IP4}"
-			CLIENT_IP="${CLIENT_IP4}"
-		elif [[ "$1" = "-6" ]]; then
-			IP_FLAG="$1"
-			SERVER_IP="${SERVER_IP6}"
-			CLIENT_IP="${CLIENT_IP6}"
-		elif [[ "$1" = "-rss" ]]; then
-			TEST_RSS=true
-		elif [[ "$1" = "-rps" ]]; then
-			shift
-			RPS_MAP="$1"
-		elif [[ "$1" = "-i" ]]; then
-			shift
-			DEV="$1"
-		else
-			die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
-			     [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
-		fi
-		shift
-	done
-}
-
-setup() {
-	setup_loopback_environment "${DEV}"
-
-	# Set up server_ns namespace and client_ns namespace
-	setup_macvlan_ns "${DEV}" $server_ns server \
-	"${SERVER_MAC}" "${SERVER_IP}"
-	setup_macvlan_ns "${DEV}" $client_ns client \
-	"${CLIENT_MAC}" "${CLIENT_IP}"
-}
-
-cleanup() {
-	cleanup_macvlan_ns $server_ns server $client_ns client
-	cleanup_loopback "${DEV}"
-}
-
-parse_opts $0 $@
-
-setup
-trap cleanup EXIT
-
-check_nic_rxhash_enabled
-
-# Actual test starts here
-if [[ "${TEST_RSS}" = true ]]; then
-	# RPS/RFS must be disabled because they move packets between cpus,
-	# which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
-	eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
-	  ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
-	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
-	  -C "$(get_rx_irq_cpus)" -s -v &
-elif [[ ! -z "${RPS_MAP}" ]]; then
-	eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
-	  ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
-	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
-	  -r "0x${RPS_MAP}" -s -v &
-else
-	ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
-	  -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
-fi
-
-server_pid=$!
-
-ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
-  "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
-
-client_pid=$!
-
-wait "${server_pid}"
-exit_code=$?
-kill -9 "${client_pid}"
-if [[ "${exit_code}" -eq 0 ]]; then
-	echo "Test Succeeded!"
-fi
-exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
deleted file mode 100755
index 2fef34f4aba1..000000000000
--- a/tools/testing/selftests/net/toeplitz_client.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# A simple program for generating traffic for the toeplitz test.
-#
-# This program sends packets periodically for, conservatively, 20 seconds. The
-# intent is for the calling program to kill this program once it is no longer
-# needed, rather than waiting for the 20 second expiration.
-
-send_traffic() {
-	expiration=$((SECONDS+20))
-	while [[ "${SECONDS}" -lt "${expiration}" ]]
-	do
-		if [[ "${PROTO}" == "-u" ]]; then
-			echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
-		else
-			echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
-		fi
-		sleep 0.001
-	done
-}
-
-PROTO=$1
-IPVER=$2
-ADDR=$3
-PORT=$4
-
-send_traffic
diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh
index 282f14760940..a7c6ab8a0347 100755
--- a/tools/testing/selftests/net/traceroute.sh
+++ b/tools/testing/selftests/net/traceroute.sh
@@ -10,28 +10,6 @@ PAUSE_ON_FAIL=no
 
 ################################################################################
 #
-log_test()
-{
-	local rc=$1
-	local expected=$2
-	local msg="$3"
-
-	if [ ${rc} -eq ${expected} ]; then
-		printf "TEST: %-60s  [ OK ]\n" "${msg}"
-		nsuccess=$((nsuccess+1))
-	else
-		ret=1
-		nfail=$((nfail+1))
-		printf "TEST: %-60s  [FAIL]\n" "${msg}"
-		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
-			echo
-			echo "hit enter to continue, 'q' to quit"
-			read a
-			[ "$a" = "q" ] && exit 1
-		fi
-	fi
-}
-
 run_cmd()
 {
 	local ns
@@ -58,6 +36,35 @@ run_cmd()
 	return $rc
 }
 
+__check_traceroute_version()
+{
+	local cmd=$1; shift
+	local req_ver=$1; shift
+	local ver
+
+	req_ver=$(echo "$req_ver" | sed 's/\.//g')
+	ver=$($cmd -V 2>&1 | grep -Eo '[0-9]+.[0-9]+.[0-9]+' | sed 's/\.//g')
+	if [[ $ver -lt $req_ver ]]; then
+		return 1
+	else
+		return 0
+	fi
+}
+
+check_traceroute6_version()
+{
+	local req_ver=$1; shift
+
+	__check_traceroute_version traceroute6 "$req_ver"
+}
+
+check_traceroute_version()
+{
+	local req_ver=$1; shift
+
+	__check_traceroute_version traceroute "$req_ver"
+}
+
 ################################################################################
 # create namespaces and interconnects
 
@@ -81,6 +88,8 @@ create_ns()
 	ip netns exec ${ns} ip -6 ro add unreachable default metric 8192
 
 	ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec ${ns} sysctl -qw net.ipv4.icmp_ratelimit=0
+	ip netns exec ${ns} sysctl -qw net.ipv6.icmp.ratelimit=0
 	ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
 	ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
 	ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
@@ -203,34 +212,275 @@ setup_traceroute6()
 
 run_traceroute6()
 {
-	if [ ! -x "$(command -v traceroute6)" ]; then
-		echo "SKIP: Could not run IPV6 test without traceroute6"
-		return
-	fi
-
 	setup_traceroute6
 
+	RET=0
+
 	# traceroute6 host-2 from host-1 (expects 2000:102::2)
 	run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2"
-	log_test $? 0 "IPV6 traceroute"
+	check_err $? "traceroute6 did not return 2000:102::2"
+	log_test "IPv6 traceroute"
 
 	cleanup_traceroute6
 }
 
 ################################################################################
+# traceroute6 with VRF test
+#
+# Verify that in this scenario
+#
+#        ------------------------ N2
+#         |                    |
+#       ------              ------  N3  ----
+#       | R1 |              | R2 |------|H2|
+#       ------              ------      ----
+#         |                    |
+#        ------------------------ N1
+#                  |
+#                 ----
+#                 |H1|
+#                 ----
+#
+# Where H1's default route goes through R1 and R1's default route goes through
+# R2 over N2, traceroute6 from H1 to H2 reports R2's address on N2 and not N1.
+# The interfaces connecting R2 to the different subnets are membmer in a VRF
+# and the intention is to check that traceroute6 does not report the VRF's
+# address.
+#
+# Addresses are assigned as follows:
+#
+# N1: 2000:101::/64
+# N2: 2000:102::/64
+# N3: 2000:103::/64
+#
+# R1's host part of address: 1
+# R2's host part of address: 2
+# H1's host part of address: 3
+# H2's host part of address: 4
+#
+# For example:
+# the IPv6 address of R1's interface on N2 is 2000:102::1/64
+
+cleanup_traceroute6_vrf()
+{
+	cleanup_all_ns
+}
+
+setup_traceroute6_vrf()
+{
+	# Start clean
+	cleanup_traceroute6_vrf
+
+	setup_ns h1 h2 r1 r2
+	create_ns "$h1"
+	create_ns "$h2"
+	create_ns "$r1"
+	create_ns "$r2"
+
+	ip -n "$r2" link add name vrf100 up type vrf table 100
+	ip -n "$r2" addr add 2001:db8:100::1/64 dev vrf100
+
+	# Setup N3
+	connect_ns "$r2" eth3 - 2000:103::2/64 "$h2" eth3 - 2000:103::4/64
+
+	ip -n "$r2" link set dev eth3 master vrf100
+
+	ip -n "$h2" route add default via 2000:103::2
+
+	# Setup N2
+	connect_ns "$r1" eth2 - 2000:102::1/64 "$r2" eth2 - 2000:102::2/64
+
+	ip -n "$r1" route add default via 2000:102::2
+
+	ip -n "$r2" link set dev eth2 master vrf100
+
+	# Setup N1. host-1 and router-2 connect to a bridge in router-1.
+	ip -n "$r1" link add name br100 up type bridge
+	ip -n "$r1" addr add 2000:101::1/64 dev br100
+
+	connect_ns "$h1" eth0 - 2000:101::3/64 "$r1" eth0 - -
+
+	ip -n "$h1" route add default via 2000:101::1
+
+	ip -n "$r1" link set dev eth0 master br100
+
+	connect_ns "$r2" eth1 - 2000:101::2/64 "$r1" eth1 - -
+
+	ip -n "$r2" link set dev eth1 master vrf100
+
+	ip -n "$r1" link set dev eth1 master br100
+
+	# Prime the network
+	ip netns exec "$h1" ping6 -c5 2000:103::4 >/dev/null 2>&1
+}
+
+run_traceroute6_vrf()
+{
+	setup_traceroute6_vrf
+
+	RET=0
+
+	# traceroute6 host-2 from host-1 (expects 2000:102::2)
+	run_cmd "$h1" "traceroute6 2000:103::4 | grep 2000:102::2"
+	check_err $? "traceroute6 did not return 2000:102::2"
+	log_test "IPv6 traceroute with VRF"
+
+	cleanup_traceroute6_vrf
+}
+
+################################################################################
+# traceroute6 with ICMP extensions test
+#
+# Verify that in this scenario
+#
+# ----                          ----                          ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ----            N1            ----            N2            ----
+#
+# ICMP extensions are correctly reported. The loopback interfaces on all the
+# nodes are assigned global addresses and the interfaces connecting the nodes
+# are assigned IPv6 link-local addresses.
+
+cleanup_traceroute6_ext()
+{
+	cleanup_all_ns
+}
+
+setup_traceroute6_ext()
+{
+	# Start clean
+	cleanup_traceroute6_ext
+
+	setup_ns h1 r1 h2
+	create_ns "$h1"
+	create_ns "$r1"
+	create_ns "$h2"
+
+	# Setup N1
+	connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64
+	# Setup N2
+	connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64
+
+	# Setup H1
+	ip -n "$h1" address add 2001:db8:1::1/128 dev lo
+	ip -n "$h1" route add ::/0 nexthop via fe80::2 dev eth1
+
+	# Setup R1
+	ip -n "$r1" address add 2001:db8:1::2/128 dev lo
+	ip -n "$r1" route add 2001:db8:1::1/128 nexthop via fe80::1 dev eth1
+	ip -n "$r1" route add 2001:db8:1::3/128 nexthop via fe80::4 dev eth2
+
+	# Setup H2
+	ip -n "$h2" address add 2001:db8:1::3/128 dev lo
+	ip -n "$h2" route add ::/0 nexthop via fe80::3 dev eth2
+
+	# Prime the network
+	ip netns exec "$h1" ping6 -c5 2001:db8:1::3 >/dev/null 2>&1
+}
+
+traceroute6_ext_iio_iif_test()
+{
+	local r1_ifindex h2_ifindex
+	local pkt_len=$1; shift
+
+	# Test that incoming interface info is not appended by default.
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+	check_fail $? "Incoming interface info appended by default when should not"
+
+	# Test that the extension is appended when enabled.
+	run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+	check_err $? "Failed to enable incoming interface info extension on R1"
+
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+	check_err $? "Incoming interface info not appended after enable"
+
+	# Test that the extension is not appended when disabled.
+	run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+	check_err $? "Failed to disable incoming interface info extension on R1"
+
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+	check_fail $? "Incoming interface info appended after disable"
+
+	# Test that the extension is sent correctly from both R1 and H2.
+	run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01"
+	r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]')
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'"
+	check_err $? "Wrong incoming interface info reported from R1"
+
+	run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01"
+	h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]')
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'"
+	check_err $? "Wrong incoming interface info reported from H2"
+
+	# Add a global address on the incoming interface of R1 and check that
+	# it is reported.
+	run_cmd "$r1" "ip address add 2001:db8:100::1/64 dev eth1 nodad"
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,2001:db8:100::1,\"eth1\",mtu=1500>'"
+	check_err $? "Wrong incoming interface info reported from R1 after address addition"
+	run_cmd "$r1" "ip address del 2001:db8:100::1/64 dev eth1"
+
+	# Change name and MTU and make sure the result is still correct.
+	run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501"
+	run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'"
+	check_err $? "Wrong incoming interface info reported from R1 after name and MTU change"
+	run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500"
+
+	run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00"
+	run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00"
+}
+
+run_traceroute6_ext()
+{
+	# Need at least version 2.1.5 for RFC 5837 support.
+	if ! check_traceroute6_version 2.1.5; then
+		log_test_skip "traceroute6 too old, missing ICMP extensions support"
+		return
+	fi
+
+	setup_traceroute6_ext
+
+	RET=0
+
+	## General ICMP extensions tests
+
+	# Test that ICMP extensions are disabled by default.
+	run_cmd "$h1" "sysctl net.ipv6.icmp.errors_extension_mask | grep \"= 0$\""
+	check_err $? "ICMP extensions are not disabled by default"
+
+	# Test that unsupported values are rejected. Do not use "sysctl" as
+	# older versions do not return an error code upon failure.
+	run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+	check_fail $? "Unsupported sysctl value was not rejected"
+
+	## Extension-specific tests
+
+	# Incoming interface info test. Test with various packet sizes,
+	# including the default one.
+	traceroute6_ext_iio_iif_test
+	traceroute6_ext_iio_iif_test 127
+	traceroute6_ext_iio_iif_test 128
+	traceroute6_ext_iio_iif_test 129
+
+	log_test "IPv6 traceroute with ICMP extensions"
+
+	cleanup_traceroute6_ext
+}
+
+################################################################################
 # traceroute test
 #
-# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario
+# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when
+# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively.
 #
-#                    1.0.3.1/24
+#      1.0.3.3/24    1.0.3.1/24
 # ---- 1.0.1.3/24    1.0.1.1/24 ---- 1.0.2.1/24    1.0.2.4/24 ----
 # |H1|--------------------------|R1|--------------------------|H2|
 # ----            N1            ----            N2            ----
 #
-# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and
-# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary
-# address on N1.
-#
+# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and 1.0.3.1/24 and
+# 1.0.1.1/24 are R1's primary addresses on N1. The kernel is expected to prefer
+# a source address that is on the same subnet as the destination IP of the ICMP
+# error message.
 
 cleanup_traceroute()
 {
@@ -250,6 +500,7 @@ setup_traceroute()
 
 	connect_ns $h1 eth0 1.0.1.3/24 - \
 	           $router eth1 1.0.3.1/24 -
+	ip -n "$h1" addr add 1.0.3.3/24 dev eth0
 	ip netns exec $h1 ip route add default via 1.0.1.1
 
 	ip netns exec $router ip addr add 1.0.1.1/24 dev eth1
@@ -268,18 +519,232 @@ setup_traceroute()
 
 run_traceroute()
 {
-	if [ ! -x "$(command -v traceroute)" ]; then
-		echo "SKIP: Could not run IPV4 test without traceroute"
+	setup_traceroute
+
+	RET=0
+
+	# traceroute host-2 from host-1. Expect a source IP that is on the same
+	# subnet as destination IP of the ICMP error message.
+	run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep -q 1.0.1.1"
+	check_err $? "traceroute did not return 1.0.1.1"
+	run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep -q 1.0.3.1"
+	check_err $? "traceroute did not return 1.0.3.1"
+	log_test "IPv4 traceroute"
+
+	cleanup_traceroute
+}
+
+################################################################################
+# traceroute with VRF test
+#
+# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when
+# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. The
+# intention is to check that the kernel does not choose an IP assigned to the
+# VRF device, but rather an address from the VRF port (eth1) that received the
+# packet that generates the ICMP error message.
+#
+#                          1.0.4.1/24 (vrf100)
+#      1.0.3.3/24    1.0.3.1/24
+# ---- 1.0.1.3/24    1.0.1.1/24 ---- 1.0.2.1/24    1.0.2.4/24 ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ----            N1            ----            N2            ----
+
+cleanup_traceroute_vrf()
+{
+	cleanup_all_ns
+}
+
+setup_traceroute_vrf()
+{
+	# Start clean
+	cleanup_traceroute_vrf
+
+	setup_ns h1 h2 router
+	create_ns "$h1"
+	create_ns "$h2"
+	create_ns "$router"
+
+	ip -n "$router" link add name vrf100 up type vrf table 100
+	ip -n "$router" addr add 1.0.4.1/24 dev vrf100
+
+	connect_ns "$h1" eth0 1.0.1.3/24 - \
+	           "$router" eth1 1.0.1.1/24 -
+
+	ip -n "$h1" addr add 1.0.3.3/24 dev eth0
+	ip -n "$h1" route add default via 1.0.1.1
+
+	ip -n "$router" link set dev eth1 master vrf100
+	ip -n "$router" addr add 1.0.3.1/24 dev eth1
+	ip netns exec "$router" sysctl -qw \
+		net.ipv4.icmp_errors_use_inbound_ifaddr=1
+
+	connect_ns "$h2" eth0 1.0.2.4/24 - \
+	           "$router" eth2 1.0.2.1/24 -
+
+	ip -n "$h2" route add default via 1.0.2.1
+
+	ip -n "$router" link set dev eth2 master vrf100
+
+	# Prime the network
+	ip netns exec "$h1" ping -c5 1.0.2.4 >/dev/null 2>&1
+}
+
+run_traceroute_vrf()
+{
+	setup_traceroute_vrf
+
+	RET=0
+
+	# traceroute host-2 from host-1. Expect a source IP that is on the same
+	# subnet as destination IP of the ICMP error message.
+	run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep 1.0.1.1"
+	check_err $? "traceroute did not return 1.0.1.1"
+	run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep 1.0.3.1"
+	check_err $? "traceroute did not return 1.0.3.1"
+	log_test "IPv4 traceroute with VRF"
+
+	cleanup_traceroute_vrf
+}
+
+################################################################################
+# traceroute with ICMP extensions test
+#
+# Verify that in this scenario
+#
+# ----                          ----                          ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ----            N1            ----            N2            ----
+#
+# ICMP extensions are correctly reported. The loopback interfaces on all the
+# nodes are assigned global addresses and the interfaces connecting the nodes
+# are assigned IPv6 link-local addresses.
+
+cleanup_traceroute_ext()
+{
+	cleanup_all_ns
+}
+
+setup_traceroute_ext()
+{
+	# Start clean
+	cleanup_traceroute_ext
+
+	setup_ns h1 r1 h2
+	create_ns "$h1"
+	create_ns "$r1"
+	create_ns "$h2"
+
+	# Setup N1
+	connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64
+	# Setup N2
+	connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64
+
+	# Setup H1
+	ip -n "$h1" address add 192.0.2.1/32 dev lo
+	ip -n "$h1" route add 0.0.0.0/0 nexthop via inet6 fe80::2 dev eth1
+
+	# Setup R1
+	ip -n "$r1" address add 192.0.2.2/32 dev lo
+	ip -n "$r1" route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1
+	ip -n "$r1" route add 192.0.2.3/32 nexthop via inet6 fe80::4 dev eth2
+
+	# Setup H2
+	ip -n "$h2" address add 192.0.2.3/32 dev lo
+	ip -n "$h2" route add 0.0.0.0/0 nexthop via inet6 fe80::3 dev eth2
+
+	# Prime the network
+	ip netns exec "$h1" ping -c5 192.0.2.3 >/dev/null 2>&1
+}
+
+traceroute_ext_iio_iif_test()
+{
+	local r1_ifindex h2_ifindex
+	local pkt_len=$1; shift
+
+	# Test that incoming interface info is not appended by default.
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+	check_fail $? "Incoming interface info appended by default when should not"
+
+	# Test that the extension is appended when enabled.
+	run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+	check_err $? "Failed to enable incoming interface info extension on R1"
+
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+	check_err $? "Incoming interface info not appended after enable"
+
+	# Test that the extension is not appended when disabled.
+	run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+	check_err $? "Failed to disable incoming interface info extension on R1"
+
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+	check_fail $? "Incoming interface info appended after disable"
+
+	# Test that the extension is sent correctly from both R1 and H2.
+	run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01"
+	r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]')
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'"
+	check_err $? "Wrong incoming interface info reported from R1"
+
+	run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01"
+	h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]')
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'"
+	check_err $? "Wrong incoming interface info reported from H2"
+
+	# Add a global address on the incoming interface of R1 and check that
+	# it is reported.
+	run_cmd "$r1" "ip address add 198.51.100.1/24 dev eth1"
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,198.51.100.1,\"eth1\",mtu=1500>'"
+	check_err $? "Wrong incoming interface info reported from R1 after address addition"
+	run_cmd "$r1" "ip address del 198.51.100.1/24 dev eth1"
+
+	# Change name and MTU and make sure the result is still correct.
+	# Re-add the route towards H1 since it was deleted when we removed the
+	# last IPv4 address from eth1 on R1.
+	run_cmd "$r1" "ip route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1"
+	run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501"
+	run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'"
+	check_err $? "Wrong incoming interface info reported from R1 after name and MTU change"
+	run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500"
+
+	run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00"
+	run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00"
+}
+
+run_traceroute_ext()
+{
+	# Need at least version 2.1.5 for RFC 5837 support.
+	if ! check_traceroute_version 2.1.5; then
+		log_test_skip "traceroute too old, missing ICMP extensions support"
 		return
 	fi
 
-	setup_traceroute
+	setup_traceroute_ext
 
-	# traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while.
-	run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1"
-	log_test $? 0 "IPV4 traceroute"
+	RET=0
 
-	cleanup_traceroute
+	## General ICMP extensions tests
+
+	# Test that ICMP extensions are disabled by default.
+	run_cmd "$h1" "sysctl net.ipv4.icmp_errors_extension_mask | grep \"= 0$\""
+	check_err $? "ICMP extensions are not disabled by default"
+
+	# Test that unsupported values are rejected. Do not use "sysctl" as
+	# older versions do not return an error code upon failure.
+	run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+	check_fail $? "Unsupported sysctl value was not rejected"
+
+	## Extension-specific tests
+
+	# Incoming interface info test. Test with various packet sizes,
+	# including the default one.
+	traceroute_ext_iio_iif_test
+	traceroute_ext_iio_iif_test 127
+	traceroute_ext_iio_iif_test 128
+	traceroute_ext_iio_iif_test 129
+
+	log_test "IPv4 traceroute with ICMP extensions"
+
+	cleanup_traceroute_ext
 }
 
 ################################################################################
@@ -288,15 +753,16 @@ run_traceroute()
 run_tests()
 {
 	run_traceroute6
+	run_traceroute6_vrf
+	run_traceroute6_ext
 	run_traceroute
+	run_traceroute_vrf
+	run_traceroute_ext
 }
 
 ################################################################################
 # main
 
-declare -i nfail=0
-declare -i nsuccess=0
-
 while getopts :pv o
 do
 	case $o in
@@ -306,7 +772,10 @@ do
 	esac
 done
 
+require_command traceroute6
+require_command traceroute
+require_command jq
+
 run_tests
 
-printf "\nTests passed: %3d\n" ${nsuccess}
-printf "Tests failed: %3d\n"   ${nfail}
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c
index fa83918b62d1..cf106a49b55e 100644
--- a/tools/testing/selftests/net/tun.c
+++ b/tools/testing/selftests/net/tun.c
@@ -8,14 +8,119 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <linux/if.h>
 #include <linux/if_tun.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
 #include <sys/ioctl.h>
 #include <sys/socket.h>
 
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
+#include "tuntap_helpers.h"
+
+static const char param_dev_geneve_name[] = "geneve1";
+static unsigned char param_hwaddr_outer_dst[] = { 0x00, 0xfe, 0x98,
+						  0x14, 0x22, 0x42 };
+static unsigned char param_hwaddr_outer_src[] = { 0x00, 0xfe, 0x98,
+						  0x94, 0xd2, 0x43 };
+static unsigned char param_hwaddr_inner_dst[] = { 0x00, 0xfe, 0x98,
+						  0x94, 0x22, 0xcc };
+static unsigned char param_hwaddr_inner_src[] = { 0x00, 0xfe, 0x98,
+						  0x94, 0xd2, 0xdd };
+
+static struct in_addr param_ipaddr4_outer_dst = {
+	__constant_htonl(0xac100001),
+};
+
+static struct in_addr param_ipaddr4_outer_src = {
+	__constant_htonl(0xac100002),
+};
+
+static struct in_addr param_ipaddr4_inner_dst = {
+	__constant_htonl(0xac100101),
+};
+
+static struct in_addr param_ipaddr4_inner_src = {
+	__constant_htonl(0xac100102),
+};
+
+static struct in6_addr param_ipaddr6_outer_dst = {
+	{ { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } },
+};
+
+static struct in6_addr param_ipaddr6_outer_src = {
+	{ { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } },
+};
+
+static struct in6_addr param_ipaddr6_inner_dst = {
+	{ { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } },
+};
+
+static struct in6_addr param_ipaddr6_inner_src = {
+	{ { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } },
+};
+
+#ifndef BIT
+#define BIT(nr) (1UL << (nr))
+#endif
+
+#define VN_ID 1
+#define VN_PORT 4789
+#define UDP_SRC_PORT 22
+#define UDP_DST_PORT 48878
+#define IPPREFIX_LEN 24
+#define IP6PREFIX_LEN 64
+#define TIMEOUT_SEC 10
+#define TIMEOUT_USEC 100000
+#define MAX_RETRIES 20
+
+#define UDP_TUNNEL_GENEVE_4IN4 0x01
+#define UDP_TUNNEL_GENEVE_6IN4 0x02
+#define UDP_TUNNEL_GENEVE_4IN6 0x04
+#define UDP_TUNNEL_GENEVE_6IN6 0x08
+
+#define UDP_TUNNEL_MAX_SEGMENTS BIT(7)
+
+#define UDP_TUNNEL_OUTER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_6IN4)
+#define UDP_TUNNEL_INNER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_4IN6)
+
+#define UDP_TUNNEL_GENEVE_4IN4_HDRLEN                        \
+	(ETH_HLEN + 2 * sizeof(struct iphdr) + GENEVE_HLEN + \
+	 2 * sizeof(struct udphdr))
+#define UDP_TUNNEL_GENEVE_6IN6_HDRLEN                          \
+	(ETH_HLEN + 2 * sizeof(struct ipv6hdr) + GENEVE_HLEN + \
+	 2 * sizeof(struct udphdr))
+#define UDP_TUNNEL_GENEVE_4IN6_HDRLEN                               \
+	(ETH_HLEN + sizeof(struct iphdr) + sizeof(struct ipv6hdr) + \
+	 GENEVE_HLEN + 2 * sizeof(struct udphdr))
+#define UDP_TUNNEL_GENEVE_6IN4_HDRLEN                               \
+	(ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct iphdr) + \
+	 GENEVE_HLEN + 2 * sizeof(struct udphdr))
+
+#define UDP_TUNNEL_HDRLEN(type)                                             \
+	((type) == UDP_TUNNEL_GENEVE_4IN4 ? UDP_TUNNEL_GENEVE_4IN4_HDRLEN : \
+	 (type) == UDP_TUNNEL_GENEVE_6IN6 ? UDP_TUNNEL_GENEVE_6IN6_HDRLEN : \
+	 (type) == UDP_TUNNEL_GENEVE_4IN6 ? UDP_TUNNEL_GENEVE_4IN6_HDRLEN : \
+	 (type) == UDP_TUNNEL_GENEVE_6IN4 ? UDP_TUNNEL_GENEVE_6IN4_HDRLEN : \
+					    0)
+
+#define UDP_TUNNEL_MSS(type) (ETH_DATA_LEN - UDP_TUNNEL_HDRLEN(type))
+#define UDP_TUNNEL_MAX(type, is_tap) \
+	(ETH_MAX_MTU - UDP_TUNNEL_HDRLEN(type) - ((is_tap) ? ETH_HLEN : 0))
+
+#define TUN_VNET_TNL_SIZE sizeof(struct virtio_net_hdr_v1_hash_tunnel)
+#define MAX_VNET_TUNNEL_PACKET_SZ                                       \
+	(TUN_VNET_TNL_SIZE + ETH_HLEN + UDP_TUNNEL_GENEVE_6IN6_HDRLEN + \
+	 ETH_MAX_MTU)
+
+struct geneve_setup_config {
+	int family;
+	union {
+		struct in_addr r4;
+		struct in6_addr r6;
+	} remote;
+	__be32 vnid;
+	__be16 vnport;
+	unsigned char hwaddr[6];
+	uint8_t csum;
+};
 
 static int tun_attach(int fd, char *dev)
 {
@@ -25,7 +130,7 @@ static int tun_attach(int fd, char *dev)
 	strcpy(ifr.ifr_name, dev);
 	ifr.ifr_flags = IFF_ATTACH_QUEUE;
 
-	return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+	return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
 }
 
 static int tun_detach(int fd, char *dev)
@@ -36,7 +141,7 @@ static int tun_detach(int fd, char *dev)
 	strcpy(ifr.ifr_name, dev);
 	ifr.ifr_flags = IFF_DETACH_QUEUE;
 
-	return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+	return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
 }
 
 static int tun_alloc(char *dev)
@@ -54,7 +159,7 @@ static int tun_alloc(char *dev)
 	strcpy(ifr.ifr_name, dev);
 	ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE;
 
-	err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+	err = ioctl(fd, TUNSETIFF, (void *)&ifr);
 	if (err < 0) {
 		fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno));
 		close(fd);
@@ -66,42 +171,315 @@ static int tun_alloc(char *dev)
 
 static int tun_delete(char *dev)
 {
-	struct {
-		struct nlmsghdr  nh;
-		struct ifinfomsg ifm;
-		unsigned char    data[64];
-	} req;
-	struct rtattr *rta;
-	int ret, rtnl;
+	return ip_link_del(dev);
+}
+
+static int tun_open(char *dev, const int flags, const int hdrlen,
+		    const int features, const unsigned char *mac_addr)
+{
+	struct ifreq ifr = { 0 };
+	int fd, sk = -1;
+
+	fd = open("/dev/net/tun", O_RDWR);
+	if (fd < 0) {
+		perror("open");
+		return -1;
+	}
+
+	ifr.ifr_flags = flags;
+	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
+		perror("ioctl(TUNSETIFF)");
+		goto err;
+	}
+	strcpy(dev, ifr.ifr_name);
+
+	if (hdrlen > 0) {
+		if (ioctl(fd, TUNSETVNETHDRSZ, &hdrlen) < 0) {
+			perror("ioctl(TUNSETVNETHDRSZ)");
+			goto err;
+		}
+	}
+
+	if (features) {
+		if (ioctl(fd, TUNSETOFFLOAD, features) < 0) {
+			perror("ioctl(TUNSETOFFLOAD)");
+			goto err;
+		}
+	}
+
+	sk = socket(PF_INET, SOCK_DGRAM, 0);
+	if (sk < 0) {
+		perror("socket");
+		goto err;
+	}
+
+	if (ioctl(sk, SIOCGIFFLAGS, &ifr) < 0) {
+		perror("ioctl(SIOCGIFFLAGS)");
+		goto err;
+	}
+
+	ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
+	if (ioctl(sk, SIOCSIFFLAGS, &ifr) < 0) {
+		perror("ioctl(SIOCSIFFLAGS)");
+		goto err;
+	}
+
+	if (mac_addr && flags & IFF_TAP) {
+		ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
+		memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, ETH_ALEN);
+
+		if (ioctl(sk, SIOCSIFHWADDR, &ifr) < 0) {
+			perror("ioctl(SIOCSIFHWADDR)");
+			goto err;
+		}
+	}
+
+out:
+	if (sk >= 0)
+		close(sk);
+	return fd;
+
+err:
+	close(fd);
+	fd = -1;
+	goto out;
+}
+
+static size_t sockaddr_len(int family)
+{
+	return (family == AF_INET) ? sizeof(struct sockaddr_in) :
+				     sizeof(struct sockaddr_in6);
+}
+
+static int geneve_fill_newlink(struct rt_link_newlink_req *req, void *data)
+{
+	struct geneve_setup_config *cfg = data;
+
+#define SET_GENEVE_REMOTE rt_link_newlink_req_set_linkinfo_data_geneve_remote
+#define SET_GENEVE_REMOTE6 rt_link_newlink_req_set_linkinfo_data_geneve_remote6
+
+	rt_link_newlink_req_set_address(req, cfg->hwaddr, ETH_ALEN);
+	rt_link_newlink_req_set_linkinfo_data_geneve_id(req, cfg->vnid);
+	rt_link_newlink_req_set_linkinfo_data_geneve_port(req, cfg->vnport);
+	rt_link_newlink_req_set_linkinfo_data_geneve_udp_csum(req, cfg->csum);
+
+	if (cfg->family == AF_INET)
+		SET_GENEVE_REMOTE(req, cfg->remote.r4.s_addr);
+	else
+		SET_GENEVE_REMOTE6(req, &cfg->remote.r6,
+				   sizeof(cfg->remote.r6));
+
+	return 0;
+}
+
+static int geneve_create(const char *dev, int family, void *remote,
+			 void *hwaddr)
+{
+	struct geneve_setup_config geneve;
+
+	memset(&geneve, 0, sizeof(geneve));
+	geneve.vnid = VN_ID;
+	geneve.vnport = htons(VN_PORT);
+	geneve.csum = 1;
+	geneve.family = family;
+	if (family == AF_INET)
+		memcpy(&geneve.remote.r4, remote, sizeof(struct in_addr));
+	else
+		memcpy(&geneve.remote.r6, remote, sizeof(struct in6_addr));
+	memcpy(geneve.hwaddr, hwaddr, ETH_ALEN);
+
+	return ip_link_add(dev, "geneve", geneve_fill_newlink, (void *)&geneve);
+}
+
+static int set_pmtu_discover(int fd, bool is_ipv4)
+{
+	int level, name, val;
+
+	if (is_ipv4) {
+		level = SOL_IP;
+		name = IP_MTU_DISCOVER;
+		val = IP_PMTUDISC_DO;
+	} else {
+		level = SOL_IPV6;
+		name = IPV6_MTU_DISCOVER;
+		val = IPV6_PMTUDISC_DO;
+	}
+
+	return setsockopt(fd, level, name, &val, sizeof(val));
+}
+
+static int udp_socket_open(struct sockaddr_storage *ssa, bool do_frag,
+			   bool do_connect, struct sockaddr_storage *dsa)
+{
+	struct timeval to = { .tv_sec = TIMEOUT_SEC };
+	int fd, family = ssa->ss_family;
+	int salen = sockaddr_len(family);
+
+	fd = socket(family, SOCK_DGRAM, 0);
+	if (fd < 0)
+		return -1;
+
+	if (bind(fd, (struct sockaddr *)ssa, salen) < 0) {
+		perror("bind");
+		goto err;
+	}
+
+	if (do_connect && connect(fd, (struct sockaddr *)dsa, salen) < 0) {
+		perror("connect");
+		goto err;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &to, sizeof(to)) < 0) {
+		perror("setsockopt(SO_RCVTIMEO)");
+		goto err;
+	}
+
+	if (!do_frag && set_pmtu_discover(fd, family == AF_INET) < 0) {
+		perror("set_pmtu_discover");
+		goto err;
+	}
+	return fd;
+
+err:
+	close(fd);
+	return -1;
+}
+
+static void parse_route_rsp(struct rt_route_getroute_rsp *rsp, void *rtm_type)
+{
+	*(uint8_t *)rtm_type = rsp->_hdr.rtm_type;
+}
+
+static int ip_route_check(const char *intf, int family, void *addr)
+{
+	uint8_t rtm_type, table = RT_TABLE_LOCAL;
+	int retries = MAX_RETRIES;
 
-	rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
-	if (rtnl < 0) {
-		fprintf(stderr, "can't open rtnl: %s\n", strerror(errno));
-		return 1;
+	while (retries-- > 0) {
+		if (ip_route_get(intf, family, table, addr, parse_route_rsp,
+				 &rtm_type) == 0 &&
+		    rtm_type == RTN_LOCAL)
+			break;
+
+		usleep(TIMEOUT_USEC);
 	}
 
-	memset(&req, 0, sizeof(req));
-	req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm)));
-	req.nh.nlmsg_flags = NLM_F_REQUEST;
-	req.nh.nlmsg_type = RTM_DELLINK;
+	if (retries < 0)
+		return -1;
+
+	return 0;
+}
+
+static int send_gso_udp_msg(int socket, struct sockaddr_storage *addr,
+			    uint8_t *send_buf, int send_len, int gso_size)
+{
+	char control[CMSG_SPACE(sizeof(uint16_t))] = { 0 };
+	int alen = sockaddr_len(addr->ss_family);
+	struct msghdr msg = { 0 };
+	struct iovec iov = { 0 };
+	int ret;
+
+	iov.iov_base = send_buf;
+	iov.iov_len = send_len;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+	msg.msg_name = addr;
+	msg.msg_namelen = alen;
 
-	req.ifm.ifi_family = AF_UNSPEC;
+	if (gso_size > 0) {
+		struct cmsghdr *cmsg;
 
-	rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len));
-	rta->rta_type = IFLA_IFNAME;
-	rta->rta_len = RTA_LENGTH(IFNAMSIZ);
-	req.nh.nlmsg_len += rta->rta_len;
-	memcpy(RTA_DATA(rta), dev, IFNAMSIZ);
+		msg.msg_control = control;
+		msg.msg_controllen = sizeof(control);
 
-	ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
+		cmsg = CMSG_FIRSTHDR(&msg);
+		cmsg->cmsg_level = SOL_UDP;
+		cmsg->cmsg_type = UDP_SEGMENT;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+		*(uint16_t *)CMSG_DATA(cmsg) = gso_size;
+	}
+
+	ret = sendmsg(socket, &msg, 0);
 	if (ret < 0)
-		fprintf(stderr, "can't send: %s\n", strerror(errno));
-	ret = (unsigned int)ret != req.nh.nlmsg_len;
+		perror("sendmsg");
 
-	close(rtnl);
 	return ret;
 }
 
+static int validate_hdrlen(uint8_t **cur, int *len, int x)
+{
+	if (*len < x)
+		return -1;
+	*cur += x;
+	*len -= x;
+	return 0;
+}
+
+static int parse_udp_tunnel_vnet_packet(uint8_t *buf, int len, int tunnel_type,
+					bool is_tap)
+{
+	struct ipv6hdr *iph6;
+	struct udphdr *udph;
+	struct iphdr *iph4;
+	uint8_t *cur = buf;
+
+	if (validate_hdrlen(&cur, &len, TUN_VNET_TNL_SIZE))
+		return -1;
+
+	if (is_tap) {
+		if (validate_hdrlen(&cur, &len, ETH_HLEN))
+			return -1;
+	}
+
+	if (tunnel_type & UDP_TUNNEL_OUTER_IPV4) {
+		iph4 = (struct iphdr *)cur;
+		if (validate_hdrlen(&cur, &len, sizeof(struct iphdr)))
+			return -1;
+		if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP)
+			return -1;
+	} else {
+		iph6 = (struct ipv6hdr *)cur;
+		if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr)))
+			return -1;
+		if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP)
+			return -1;
+	}
+
+	udph = (struct udphdr *)cur;
+	if (validate_hdrlen(&cur, &len, sizeof(struct udphdr)))
+		return -1;
+	if (ntohs(udph->dest) != VN_PORT)
+		return -1;
+
+	if (validate_hdrlen(&cur, &len, GENEVE_HLEN))
+		return -1;
+	if (validate_hdrlen(&cur, &len, ETH_HLEN))
+		return -1;
+
+	if (tunnel_type & UDP_TUNNEL_INNER_IPV4) {
+		iph4 = (struct iphdr *)cur;
+		if (validate_hdrlen(&cur, &len, sizeof(struct iphdr)))
+			return -1;
+		if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP)
+			return -1;
+	} else {
+		iph6 = (struct ipv6hdr *)cur;
+		if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr)))
+			return -1;
+		if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP)
+			return -1;
+	}
+
+	udph = (struct udphdr *)cur;
+	if (validate_hdrlen(&cur, &len, sizeof(struct udphdr)))
+		return -1;
+	if (ntohs(udph->dest) != UDP_DST_PORT)
+		return -1;
+
+	return len;
+}
+
 FIXTURE(tun)
 {
 	char ifname[IFNAMSIZ];
@@ -127,31 +505,36 @@ FIXTURE_TEARDOWN(tun)
 		close(self->fd2);
 }
 
-TEST_F(tun, delete_detach_close) {
+TEST_F(tun, delete_detach_close)
+{
 	EXPECT_EQ(tun_delete(self->ifname), 0);
 	EXPECT_EQ(tun_detach(self->fd, self->ifname), -1);
 	EXPECT_EQ(errno, 22);
 }
 
-TEST_F(tun, detach_delete_close) {
+TEST_F(tun, detach_delete_close)
+{
 	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
 	EXPECT_EQ(tun_delete(self->ifname), 0);
 }
 
-TEST_F(tun, detach_close_delete) {
+TEST_F(tun, detach_close_delete)
+{
 	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
 	close(self->fd);
 	self->fd = -1;
 	EXPECT_EQ(tun_delete(self->ifname), 0);
 }
 
-TEST_F(tun, reattach_delete_close) {
+TEST_F(tun, reattach_delete_close)
+{
 	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
 	EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
 	EXPECT_EQ(tun_delete(self->ifname), 0);
 }
 
-TEST_F(tun, reattach_close_delete) {
+TEST_F(tun, reattach_close_delete)
+{
 	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
 	EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
 	close(self->fd);
@@ -159,4 +542,447 @@ TEST_F(tun, reattach_close_delete) {
 	EXPECT_EQ(tun_delete(self->ifname), 0);
 }
 
+FIXTURE(tun_vnet_udptnl)
+{
+	char ifname[IFNAMSIZ];
+	int fd, sock;
+};
+
+FIXTURE_VARIANT(tun_vnet_udptnl)
+{
+	int tunnel_type;
+	int gso_size;
+	int data_size;
+	int r_num_mss;
+	bool is_tap, no_gso;
+};
+
+/* clang-format off */
+#define TUN_VNET_UDPTNL_VARIANT_ADD(type, desc)                              \
+	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1byte) {         \
+		/* no GSO: send a single byte */                             \
+		.tunnel_type = type,                                         \
+		.data_size = 1,                                              \
+		.r_num_mss = 1,                                              \
+		.is_tap = true,                                              \
+		.no_gso = true,                                              \
+	};                                                                   \
+	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1mss) {          \
+		/* no GSO: send a single MSS, fall back to no GSO */         \
+		.tunnel_type = type,                                         \
+		.data_size = UDP_TUNNEL_MSS(type),                           \
+		.r_num_mss = 1,                                              \
+		.is_tap = true,                                              \
+		.no_gso = true,                                              \
+	};                                                                   \
+	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_gtmss) {         \
+		/* no GSO: send a single MSS + 1B: fail */                   \
+		.tunnel_type = type,                                         \
+		.data_size = UDP_TUNNEL_MSS(type) + 1,                       \
+		.r_num_mss = 1,                                              \
+		.is_tap = true,                                              \
+		.no_gso = true,                                              \
+	};                                                                   \
+	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1byte) {                 \
+		/* GSO: send 1 byte, gso 1 byte, fall back to no GSO */      \
+		.tunnel_type = type,                                         \
+		.gso_size = 1,                                               \
+		.data_size = 1,                                              \
+		.r_num_mss = 1,                                              \
+		.is_tap = true,                                              \
+		.no_gso = true,                                              \
+	};                                                                   \
+	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1mss) {                  \
+		/* send a single MSS: fall back to no GSO */                 \
+		.tunnel_type = type,                                         \
+		.gso_size = UDP_TUNNEL_MSS(type),                            \
+		.data_size = UDP_TUNNEL_MSS(type),                           \
+		.r_num_mss = 1,                                              \
+		.is_tap = true,                                              \
+		.no_gso = true,                                              \
+	};                                                                   \
+	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_ltgso) {                 \
+		/* data <= MSS < gso: will fall back to no GSO */            \
+		.tunnel_type = type,                                         \
+		.gso_size = UDP_TUNNEL_MSS(type) + 1,                        \
+		.data_size = UDP_TUNNEL_MSS(type),                           \
+		.r_num_mss = 1,                                              \
+		.is_tap = true,                                              \
+		.no_gso = true,                                              \
+	};                                                                   \
+	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_gtgso) {                 \
+		/* GSO: a single MSS + 1B */                                 \
+		.tunnel_type = type,                                         \
+		.gso_size = UDP_TUNNEL_MSS(type),                            \
+		.data_size = UDP_TUNNEL_MSS(type) + 1,                       \
+		.r_num_mss = 2,                                              \
+		.is_tap = true,                                              \
+	};                                                                   \
+	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_2mss) {                  \
+		/* no GSO: send exactly 2 MSS */                             \
+		.tunnel_type = type,                                         \
+		.gso_size = UDP_TUNNEL_MSS(type),                            \
+		.data_size = UDP_TUNNEL_MSS(type) * 2,                       \
+		.r_num_mss = 2,                                              \
+		.is_tap = true,                                              \
+	};                                                                   \
+	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxbytes) {              \
+		/* GSO: send max bytes */                                    \
+		.tunnel_type = type,                                         \
+		.gso_size = UDP_TUNNEL_MSS(type),                            \
+		.data_size = UDP_TUNNEL_MAX(type, true),                     \
+		.r_num_mss = UDP_TUNNEL_MAX(type, true) /                    \
+			     UDP_TUNNEL_MSS(type) + 1,                       \
+		.is_tap = true,                                              \
+	};                                                                   \
+	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_over_maxbytes) {         \
+		/* GSO: send oversize max bytes: fail */                     \
+		.tunnel_type = type,                                         \
+		.gso_size = UDP_TUNNEL_MSS(type),                            \
+		.data_size = ETH_MAX_MTU,                                    \
+		.r_num_mss = ETH_MAX_MTU / UDP_TUNNEL_MSS(type) + 1,         \
+		.is_tap = true,                                              \
+	};                                                                   \
+	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxsegs) {               \
+		/* GSO: send max number of min sized segments */             \
+		.tunnel_type = type,                                         \
+		.gso_size = 1,                                               \
+		.data_size = UDP_TUNNEL_MAX_SEGMENTS,                        \
+		.r_num_mss = UDP_TUNNEL_MAX_SEGMENTS,                        \
+		.is_tap = true,                                              \
+	};                                                                   \
+	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_5byte) {                 \
+		/* GSO: send 5 bytes, gso 2 bytes */                         \
+		.tunnel_type = type,                                         \
+		.gso_size = 2,                                               \
+		.data_size = 5,                                              \
+		.r_num_mss = 3,                                              \
+		.is_tap = true,                                              \
+	} /* clang-format on */
+
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN4, 4in4);
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN4, 6in4);
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN6, 4in6);
+TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN6, 6in6);
+
+static void assign_ifaddr_vars(int family, int is_outer, void **srcip,
+			       void **dstip, void **srcmac, void **dstmac)
+{
+	if (is_outer) {
+		if (family == AF_INET) {
+			*srcip = (void *)&param_ipaddr4_outer_src;
+			*dstip = (void *)&param_ipaddr4_outer_dst;
+		} else {
+			*srcip = (void *)&param_ipaddr6_outer_src;
+			*dstip = (void *)&param_ipaddr6_outer_dst;
+		}
+		*srcmac = param_hwaddr_outer_src;
+		*dstmac = param_hwaddr_outer_dst;
+	} else {
+		if (family == AF_INET) {
+			*srcip = (void *)&param_ipaddr4_inner_src;
+			*dstip = (void *)&param_ipaddr4_inner_dst;
+		} else {
+			*srcip = (void *)&param_ipaddr6_inner_src;
+			*dstip = (void *)&param_ipaddr6_inner_dst;
+		}
+		*srcmac = param_hwaddr_inner_src;
+		*dstmac = param_hwaddr_inner_dst;
+	}
+}
+
+static void assign_sockaddr_vars(int family, int is_outer,
+				 struct sockaddr_storage *src,
+				 struct sockaddr_storage *dst)
+{
+	src->ss_family = family;
+	dst->ss_family = family;
+
+	if (family == AF_INET) {
+		struct sockaddr_in *s4 = (struct sockaddr_in *)src;
+		struct sockaddr_in *d4 = (struct sockaddr_in *)dst;
+
+		s4->sin_addr = is_outer ? param_ipaddr4_outer_src :
+					  param_ipaddr4_inner_src;
+		d4->sin_addr = is_outer ? param_ipaddr4_outer_dst :
+					  param_ipaddr4_inner_dst;
+		if (!is_outer) {
+			s4->sin_port = htons(UDP_SRC_PORT);
+			d4->sin_port = htons(UDP_DST_PORT);
+		}
+	} else {
+		struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)src;
+		struct sockaddr_in6 *d6 = (struct sockaddr_in6 *)dst;
+
+		s6->sin6_addr = is_outer ? param_ipaddr6_outer_src :
+					   param_ipaddr6_inner_src;
+		d6->sin6_addr = is_outer ? param_ipaddr6_outer_dst :
+					   param_ipaddr6_inner_dst;
+		if (!is_outer) {
+			s6->sin6_port = htons(UDP_SRC_PORT);
+			d6->sin6_port = htons(UDP_DST_PORT);
+		}
+	}
+}
+
+FIXTURE_SETUP(tun_vnet_udptnl)
+{
+	int ret, family, prefix, flags, features;
+	int tunnel_type = variant->tunnel_type;
+	struct sockaddr_storage ssa, dsa;
+	void *sip, *dip, *smac, *dmac;
+
+	flags = (variant->is_tap ? IFF_TAP : IFF_TUN) | IFF_VNET_HDR |
+		IFF_MULTI_QUEUE | IFF_NO_PI;
+	features = TUN_F_CSUM | TUN_F_UDP_TUNNEL_GSO |
+		   TUN_F_UDP_TUNNEL_GSO_CSUM | TUN_F_USO4 | TUN_F_USO6;
+	self->fd = tun_open(self->ifname, flags, TUN_VNET_TNL_SIZE, features,
+			    param_hwaddr_outer_src);
+	ASSERT_GE(self->fd, 0);
+
+	family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET : AF_INET6;
+	prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN;
+	assign_ifaddr_vars(family, 1, &sip, &dip, &smac, &dmac);
+
+	ret = ip_addr_add(self->ifname, family, sip, prefix);
+	ASSERT_EQ(ret, 0);
+	ret = ip_neigh_add(self->ifname, family, dip, dmac);
+	ASSERT_EQ(ret, 0);
+	ret = ip_route_check(self->ifname, family, sip);
+	ASSERT_EQ(ret, 0);
+
+	ret = geneve_create(param_dev_geneve_name, family, dip,
+			    param_hwaddr_inner_src);
+	ASSERT_EQ(ret, 0);
+
+	family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET : AF_INET6;
+	prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN;
+	assign_ifaddr_vars(family, 0, &sip, &dip, &smac, &dmac);
+
+	ret = ip_addr_add(param_dev_geneve_name, family, sip, prefix);
+	ASSERT_EQ(ret, 0);
+	ret = ip_neigh_add(param_dev_geneve_name, family, dip, dmac);
+	ASSERT_EQ(ret, 0);
+	ret = ip_route_check(param_dev_geneve_name, family, sip);
+	ASSERT_EQ(ret, 0);
+
+	assign_sockaddr_vars(family, 0, &ssa, &dsa);
+	self->sock = udp_socket_open(&ssa, false, true, &dsa);
+	ASSERT_GE(self->sock, 0);
+}
+
+FIXTURE_TEARDOWN(tun_vnet_udptnl)
+{
+	int ret;
+
+	if (self->sock != -1)
+		close(self->sock);
+
+	ret = ip_link_del(param_dev_geneve_name);
+	EXPECT_EQ(ret, 0);
+
+	ret = tun_delete(self->ifname);
+	EXPECT_EQ(ret, 0);
+}
+
+static int build_gso_packet_into_tun(const FIXTURE_VARIANT(tun_vnet_udptnl) *
+					     variant,
+				     uint8_t *buf)
+{
+	int pktlen, hlen, proto, inner_family, outer_family;
+	int tunnel_type = variant->tunnel_type;
+	int payload_len = variant->data_size;
+	int gso_size = variant->gso_size;
+	uint8_t *outer_udph, *cur = buf;
+	void *sip, *dip, *smac, *dmac;
+	bool is_tap = variant->is_tap;
+
+	hlen = (is_tap ? ETH_HLEN : 0) + UDP_TUNNEL_HDRLEN(tunnel_type);
+	inner_family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET :
+							       AF_INET6;
+	outer_family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET :
+							       AF_INET6;
+
+	cur += build_virtio_net_hdr_v1_hash_tunnel(cur, is_tap, hlen, gso_size,
+						   outer_family, inner_family);
+
+	pktlen = hlen + payload_len;
+	assign_ifaddr_vars(outer_family, 1, &sip, &dip, &smac, &dmac);
+
+	if (is_tap) {
+		proto = outer_family == AF_INET ? ETH_P_IP : ETH_P_IPV6;
+		pktlen -= ETH_HLEN;
+		cur += build_eth(cur, proto, dmac, smac);
+	}
+
+	if (outer_family == AF_INET) {
+		pktlen = pktlen - sizeof(struct iphdr);
+		cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip);
+	} else {
+		pktlen = pktlen - sizeof(struct ipv6hdr);
+		cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip);
+	}
+
+	outer_udph = cur;
+	assign_ifaddr_vars(inner_family, 0, &sip, &dip, &smac, &dmac);
+
+	pktlen -= sizeof(struct udphdr);
+	proto = inner_family == AF_INET ? ETH_P_IP : ETH_P_IPV6;
+	cur += build_udp_header(cur, UDP_SRC_PORT, VN_PORT, pktlen);
+	cur += build_geneve_header(cur, VN_ID);
+	cur += build_eth(cur, proto, dmac, smac);
+
+	pktlen = sizeof(struct udphdr) + payload_len;
+	if (inner_family == AF_INET)
+		cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip);
+	else
+		cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip);
+
+	cur += build_udp_packet(cur, UDP_DST_PORT, UDP_SRC_PORT, payload_len,
+				inner_family, false);
+
+	build_udp_packet_csum(outer_udph, outer_family, false);
+
+	return cur - buf;
+}
+
+static int
+receive_gso_packet_from_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self,
+			       const FIXTURE_VARIANT(tun_vnet_udptnl) * variant,
+			       int *r_num_mss)
+{
+	uint8_t packet_buf[MAX_VNET_TUNNEL_PACKET_SZ];
+	int len, total_len = 0, socket = self->sock;
+	int payload_len = variant->data_size;
+
+	while (total_len < payload_len) {
+		len = recv(socket, packet_buf, sizeof(packet_buf), 0);
+		if (len <= 0) {
+			if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK)
+				perror("recv");
+			break;
+		}
+
+		(*r_num_mss)++;
+		total_len += len;
+	}
+
+	return total_len;
+}
+
+static int send_gso_packet_into_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self,
+				       const FIXTURE_VARIANT(tun_vnet_udptnl) *
+					       variant)
+{
+	int family = (variant->tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET :
+								      AF_INET6;
+	uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ] = { 0 };
+	int payload_len = variant->data_size;
+	int gso_size = variant->gso_size;
+	struct sockaddr_storage ssa, dsa;
+
+	assign_sockaddr_vars(family, 0, &ssa, &dsa);
+	return send_gso_udp_msg(self->sock, &dsa, buf, payload_len, gso_size);
+}
+
+static int
+receive_gso_packet_from_tun(FIXTURE_DATA(tun_vnet_udptnl) * self,
+			    const FIXTURE_VARIANT(tun_vnet_udptnl) * variant,
+			    struct virtio_net_hdr_v1_hash_tunnel *vnet_hdr)
+{
+	struct timeval timeout = { .tv_sec = TIMEOUT_SEC };
+	uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ];
+	int tunnel_type = variant->tunnel_type;
+	int payload_len = variant->data_size;
+	bool is_tap = variant->is_tap;
+	int ret, len, total_len = 0;
+	int tun_fd = self->fd;
+	fd_set fdset;
+
+	while (total_len < payload_len) {
+		FD_ZERO(&fdset);
+		FD_SET(tun_fd, &fdset);
+
+		ret = select(tun_fd + 1, &fdset, NULL, NULL, &timeout);
+		if (ret <= 0) {
+			perror("select");
+			break;
+		}
+		if (!FD_ISSET(tun_fd, &fdset))
+			continue;
+
+		len = read(tun_fd, buf, sizeof(buf));
+		if (len <= 0) {
+			if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK)
+				perror("read");
+			break;
+		}
+
+		len = parse_udp_tunnel_vnet_packet(buf, len, tunnel_type,
+						   is_tap);
+		if (len < 0)
+			continue;
+
+		if (total_len == 0)
+			memcpy(vnet_hdr, buf, TUN_VNET_TNL_SIZE);
+
+		total_len += len;
+	}
+
+	return total_len;
+}
+
+TEST_F(tun_vnet_udptnl, send_gso_packet)
+{
+	uint8_t pkt[MAX_VNET_TUNNEL_PACKET_SZ];
+	int r_num_mss = 0;
+	int ret, off;
+
+	memset(pkt, 0, sizeof(pkt));
+	off = build_gso_packet_into_tun(variant, pkt);
+	ret = write(self->fd, pkt, off);
+	ASSERT_EQ(ret, off);
+
+	ret = receive_gso_packet_from_tunnel(self, variant, &r_num_mss);
+	EXPECT_EQ(ret, variant->data_size);
+	EXPECT_EQ(r_num_mss, variant->r_num_mss);
+}
+
+TEST_F(tun_vnet_udptnl, recv_gso_packet)
+{
+	struct virtio_net_hdr_v1_hash_tunnel vnet_hdr = { 0 };
+	struct virtio_net_hdr_v1 *vh = &vnet_hdr.hash_hdr.hdr;
+	int ret, gso_type = VIRTIO_NET_HDR_GSO_UDP_L4;
+
+	ret = send_gso_packet_into_tunnel(self, variant);
+	EXPECT_EQ(ret, variant->data_size);
+
+	memset(&vnet_hdr, 0, sizeof(vnet_hdr));
+	ret = receive_gso_packet_from_tun(self, variant, &vnet_hdr);
+	EXPECT_EQ(ret, variant->data_size);
+
+	if (!variant->no_gso) {
+		EXPECT_EQ(vh->gso_size, variant->gso_size);
+		gso_type |= (variant->tunnel_type & UDP_TUNNEL_OUTER_IPV4) ?
+				    (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4) :
+				    (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6);
+		EXPECT_EQ(vh->gso_type, gso_type);
+	}
+}
+
+XFAIL_ADD(tun_vnet_udptnl, 4in4_nogsosz_gtmss, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in4_nogsosz_gtmss, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 4in6_nogsosz_gtmss, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in6_nogsosz_gtmss, recv_gso_packet);
+
+XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, send_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, send_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, send_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, send_gso_packet);
+
+XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, recv_gso_packet);
+XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, recv_gso_packet);
+
 TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/tuntap_helpers.h b/tools/testing/selftests/net/tuntap_helpers.h
new file mode 100644
index 000000000000..d6c0437136ec
--- /dev/null
+++ b/tools/testing/selftests/net/tuntap_helpers.h
@@ -0,0 +1,390 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _TUNTAP_HELPERS_H
+#define _TUNTAP_HELPERS_H
+
+#include <errno.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/virtio_net.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/udp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ynl.h>
+
+#include "rt-route-user.h"
+#include "rt-addr-user.h"
+#include "rt-neigh-user.h"
+#include "rt-link-user.h"
+
+#define GENEVE_HLEN 8
+#define PKT_DATA 0xCB
+#define TUNTAP_DEFAULT_TTL 8
+#define TUNTAP_DEFAULT_IPID 1337
+
+unsigned int if_nametoindex(const char *ifname);
+
+static inline int ip_addr_len(int family)
+{
+	return (family == AF_INET) ? sizeof(struct in_addr) :
+				     sizeof(struct in6_addr);
+}
+
+static inline void fill_ifaddr_msg(struct ifaddrmsg *ifam, int family,
+				   int prefix, int flags, const char *dev)
+{
+	ifam->ifa_family = family;
+	ifam->ifa_prefixlen = prefix;
+	ifam->ifa_index = if_nametoindex(dev);
+	ifam->ifa_flags = flags;
+	ifam->ifa_scope = RT_SCOPE_UNIVERSE;
+}
+
+static inline int ip_addr_add(const char *dev, int family, void *addr,
+			      uint8_t prefix)
+{
+	int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+	int ifa_flags = IFA_F_PERMANENT | IFA_F_NODAD;
+	int ret = -1, ipalen = ip_addr_len(family);
+	struct rt_addr_newaddr_req *req;
+	struct ynl_sock *ys;
+
+	ys = ynl_sock_create(&ynl_rt_addr_family, NULL);
+	if (!ys)
+		return -1;
+
+	req = rt_addr_newaddr_req_alloc();
+	if (!req)
+		goto err_req_alloc;
+
+	fill_ifaddr_msg(&req->_hdr, family, prefix, ifa_flags, dev);
+	rt_addr_newaddr_req_set_nlflags(req, nl_flags);
+	rt_addr_newaddr_req_set_local(req, addr, ipalen);
+
+	ret = rt_addr_newaddr(ys, req);
+	rt_addr_newaddr_req_free(req);
+err_req_alloc:
+	ynl_sock_destroy(ys);
+	return ret;
+}
+
+static inline void fill_neigh_req_header(struct ndmsg *ndm, int family,
+					 int state, const char *dev)
+{
+	ndm->ndm_family = family;
+	ndm->ndm_ifindex = if_nametoindex(dev);
+	ndm->ndm_state = state;
+	ndm->ndm_flags = 0;
+	ndm->ndm_type = RTN_UNICAST;
+}
+
+static inline int ip_neigh_add(const char *dev, int family, void *addr,
+			       unsigned char *lladdr)
+{
+	int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+	int ret = -1, ipalen = ip_addr_len(family);
+	struct rt_neigh_newneigh_req *req;
+	struct ynl_sock *ys;
+
+	ys = ynl_sock_create(&ynl_rt_neigh_family, NULL);
+	if (!ys)
+		return -1;
+
+	req = rt_neigh_newneigh_req_alloc();
+	if (!req)
+		goto err_req_alloc;
+
+	fill_neigh_req_header(&req->_hdr, family, NUD_PERMANENT, dev);
+	rt_neigh_newneigh_req_set_nlflags(req, nl_flags);
+	rt_neigh_newneigh_req_set_dst(req, addr, ipalen);
+	rt_neigh_newneigh_req_set_lladdr(req, lladdr, ETH_ALEN);
+	rt_neigh_newneigh_req_set_ifindex(req, if_nametoindex(dev));
+
+	ret = rt_neigh_newneigh(ys, req);
+	rt_neigh_newneigh_req_free(req);
+err_req_alloc:
+	ynl_sock_destroy(ys);
+	return ret;
+}
+
+static inline void fill_route_req_header(struct rtmsg *rtm, int family,
+					 int table)
+{
+	rtm->rtm_family = family;
+	rtm->rtm_table = table;
+}
+
+static inline int
+ip_route_get(const char *dev, int family, int table, void *dst,
+	     void (*parse_rsp)(struct rt_route_getroute_rsp *rsp, void *out),
+	     void *out)
+{
+	int ret = -1, ipalen = ip_addr_len(family);
+	struct rt_route_getroute_req *req;
+	struct rt_route_getroute_rsp *rsp;
+	struct ynl_sock *ys;
+
+	ys = ynl_sock_create(&ynl_rt_route_family, NULL);
+	if (!ys)
+		return -1;
+
+	req = rt_route_getroute_req_alloc();
+	if (!req)
+		goto err_req_alloc;
+
+	fill_route_req_header(&req->_hdr, family, table);
+	rt_route_getroute_req_set_nlflags(req, NLM_F_REQUEST);
+	rt_route_getroute_req_set_dst(req, dst, ipalen);
+	rt_route_getroute_req_set_oif(req, if_nametoindex(dev));
+
+	rsp = rt_route_getroute(ys, req);
+	if (!rsp)
+		goto err_rsp_get;
+
+	ret = 0;
+	if (parse_rsp)
+		parse_rsp(rsp, out);
+
+	rt_route_getroute_rsp_free(rsp);
+err_rsp_get:
+	rt_route_getroute_req_free(req);
+err_req_alloc:
+	ynl_sock_destroy(ys);
+	return ret;
+}
+
+static inline int
+ip_link_add(const char *dev, char *link_type,
+	    int (*fill_link_attr)(struct rt_link_newlink_req *req, void *data),
+	    void *data)
+{
+	int nl_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+	struct rt_link_newlink_req *req;
+	struct ynl_sock *ys;
+	int ret = -1;
+
+	ys = ynl_sock_create(&ynl_rt_link_family, NULL);
+	if (!ys)
+		return -1;
+
+	req = rt_link_newlink_req_alloc();
+	if (!req)
+		goto err_req_alloc;
+
+	req->_hdr.ifi_flags = IFF_UP;
+	rt_link_newlink_req_set_nlflags(req, nl_flags);
+	rt_link_newlink_req_set_ifname(req, dev);
+	rt_link_newlink_req_set_linkinfo_kind(req, link_type);
+
+	if (fill_link_attr && fill_link_attr(req, data) < 0)
+		goto err_attr_fill;
+
+	ret = rt_link_newlink(ys, req);
+err_attr_fill:
+	rt_link_newlink_req_free(req);
+err_req_alloc:
+	ynl_sock_destroy(ys);
+	return ret;
+}
+
+static inline int ip_link_del(const char *dev)
+{
+	struct rt_link_dellink_req *req;
+	struct ynl_sock *ys;
+	int ret = -1;
+
+	ys = ynl_sock_create(&ynl_rt_link_family, NULL);
+	if (!ys)
+		return -1;
+
+	req = rt_link_dellink_req_alloc();
+	if (!req)
+		goto err_req_alloc;
+
+	rt_link_dellink_req_set_nlflags(req, NLM_F_REQUEST);
+	rt_link_dellink_req_set_ifname(req, dev);
+
+	ret = rt_link_dellink(ys, req);
+	rt_link_dellink_req_free(req);
+err_req_alloc:
+	ynl_sock_destroy(ys);
+	return ret;
+}
+
+static inline size_t build_eth(uint8_t *buf, uint16_t proto, unsigned char *src,
+			       unsigned char *dest)
+{
+	struct ethhdr *eth = (struct ethhdr *)buf;
+
+	eth->h_proto = htons(proto);
+	memcpy(eth->h_source, src, ETH_ALEN);
+	memcpy(eth->h_dest, dest, ETH_ALEN);
+
+	return ETH_HLEN;
+}
+
+static inline uint32_t add_csum(const uint8_t *buf, int len)
+{
+	uint16_t *sbuf = (uint16_t *)buf;
+	uint32_t sum = 0;
+
+	while (len > 1) {
+		sum += *sbuf++;
+		len -= 2;
+	}
+
+	if (len)
+		sum += *(uint8_t *)sbuf;
+
+	return sum;
+}
+
+static inline uint16_t finish_ip_csum(uint32_t sum)
+{
+	while (sum >> 16)
+		sum = (sum & 0xffff) + (sum >> 16);
+	return ~((uint16_t)sum);
+}
+
+static inline uint16_t build_ip_csum(const uint8_t *buf, int len, uint32_t sum)
+{
+	sum += add_csum(buf, len);
+	return finish_ip_csum(sum);
+}
+
+static inline int build_ipv4_header(uint8_t *buf, uint8_t proto,
+				    int payload_len, struct in_addr *src,
+				    struct in_addr *dst)
+{
+	struct iphdr *iph = (struct iphdr *)buf;
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->ttl = TUNTAP_DEFAULT_TTL;
+	iph->tot_len = htons(sizeof(*iph) + payload_len);
+	iph->id = htons(TUNTAP_DEFAULT_IPID);
+	iph->protocol = proto;
+	iph->saddr = src->s_addr;
+	iph->daddr = dst->s_addr;
+	iph->check = build_ip_csum(buf, iph->ihl << 2, 0);
+
+	return iph->ihl << 2;
+}
+
+static inline void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
+{
+	uint16_t val, *ptr = (uint16_t *)ip6h;
+
+	val = ntohs(*ptr);
+	val &= 0xF00F;
+	val |= ((uint16_t)dsfield) << 4;
+	*ptr = htons(val);
+}
+
+static inline int build_ipv6_header(uint8_t *buf, uint8_t proto,
+				    uint8_t dsfield, int payload_len,
+				    struct in6_addr *src, struct in6_addr *dst)
+{
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)buf;
+
+	ip6h->version = 6;
+	ip6h->payload_len = htons(payload_len);
+	ip6h->nexthdr = proto;
+	ip6h->hop_limit = TUNTAP_DEFAULT_TTL;
+	ipv6_set_dsfield(ip6h, dsfield);
+	memcpy(&ip6h->saddr, src, sizeof(ip6h->saddr));
+	memcpy(&ip6h->daddr, dst, sizeof(ip6h->daddr));
+
+	return sizeof(struct ipv6hdr);
+}
+
+static inline int build_geneve_header(uint8_t *buf, uint32_t vni)
+{
+	uint16_t protocol = htons(ETH_P_TEB);
+	uint32_t geneve_vni = htonl((vni << 8) & 0xffffff00);
+
+	memcpy(buf + 2, &protocol, 2);
+	memcpy(buf + 4, &geneve_vni, 4);
+	return GENEVE_HLEN;
+}
+
+static inline int build_udp_header(uint8_t *buf, uint16_t sport, uint16_t dport,
+				   int payload_len)
+{
+	struct udphdr *udph = (struct udphdr *)buf;
+
+	udph->source = htons(sport);
+	udph->dest = htons(dport);
+	udph->len = htons(sizeof(*udph) + payload_len);
+	return sizeof(*udph);
+}
+
+static inline void build_udp_packet_csum(uint8_t *buf, int family,
+					 bool csum_off)
+{
+	struct udphdr *udph = (struct udphdr *)buf;
+	size_t ipalen = ip_addr_len(family);
+	uint32_t sum;
+
+	/* No extension IPv4 and IPv6 headers addresses are the last fields */
+	sum = add_csum(buf - 2 * ipalen, 2 * ipalen);
+	sum += htons(IPPROTO_UDP) + udph->len;
+
+	if (!csum_off)
+		sum += add_csum(buf, udph->len);
+
+	udph->check = finish_ip_csum(sum);
+}
+
+static inline int build_udp_packet(uint8_t *buf, uint16_t sport, uint16_t dport,
+				   int payload_len, int family, bool csum_off)
+{
+	struct udphdr *udph = (struct udphdr *)buf;
+
+	build_udp_header(buf, sport, dport, payload_len);
+	memset(buf + sizeof(*udph), PKT_DATA, payload_len);
+	build_udp_packet_csum(buf, family, csum_off);
+
+	return sizeof(*udph) + payload_len;
+}
+
+static inline int build_virtio_net_hdr_v1_hash_tunnel(uint8_t *buf, bool is_tap,
+						      int hdr_len, int gso_size,
+						      int outer_family,
+						      int inner_family)
+{
+	struct virtio_net_hdr_v1_hash_tunnel *vh_tunnel = (void *)buf;
+	struct virtio_net_hdr_v1 *vh = &vh_tunnel->hash_hdr.hdr;
+	int outer_iphlen, inner_iphlen, eth_hlen, gso_type;
+
+	eth_hlen = is_tap ? ETH_HLEN : 0;
+	outer_iphlen = (outer_family == AF_INET) ? sizeof(struct iphdr) :
+						   sizeof(struct ipv6hdr);
+	inner_iphlen = (inner_family == AF_INET) ? sizeof(struct iphdr) :
+						   sizeof(struct ipv6hdr);
+
+	vh_tunnel->outer_th_offset = eth_hlen + outer_iphlen;
+	vh_tunnel->inner_nh_offset = vh_tunnel->outer_th_offset + ETH_HLEN +
+				     GENEVE_HLEN + sizeof(struct udphdr);
+
+	vh->csum_start = vh_tunnel->inner_nh_offset + inner_iphlen;
+	vh->csum_offset = __builtin_offsetof(struct udphdr, check);
+	vh->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+	vh->hdr_len = hdr_len;
+	vh->gso_size = gso_size;
+
+	if (gso_size) {
+		gso_type = outer_family == AF_INET ?
+				   VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 :
+				   VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6;
+		vh->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4 | gso_type;
+	}
+
+	return sizeof(struct virtio_net_hdr_v1_hash_tunnel);
+}
+
+#endif /* _TUNTAP_HELPERS_H */
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index dae91eb97d69..170be192f5c7 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -206,18 +206,16 @@ static void __print_timestamp(const char *name, struct timespec *cur,
 	fprintf(stderr, "\n");
 }
 
-static void print_timestamp_usr(void)
+static void record_timestamp_usr(void)
 {
 	if (clock_gettime(CLOCK_REALTIME, &ts_usr))
 		error(1, errno, "clock_gettime");
-
-	__print_timestamp("  USR", &ts_usr, 0, 0);
 }
 
 static void print_timestamp(struct scm_timestamping *tss, int tstype,
 			    int tskey, int payload_len)
 {
-	const char *tsname;
+	const char *tsname = NULL;
 
 	validate_key(tskey, tstype);
 
@@ -599,8 +597,6 @@ static void do_test(int family, unsigned int report_opt)
 			fill_header_udp(buf + off, family == PF_INET);
 		}
 
-		print_timestamp_usr();
-
 		iov.iov_base = buf;
 		iov.iov_len = total_len;
 
@@ -655,10 +651,14 @@ static void do_test(int family, unsigned int report_opt)
 
 		}
 
+		record_timestamp_usr();
+
 		val = sendmsg(fd, &msg, 0);
 		if (val != total_len)
 			error(1, errno, "send");
 
+		__print_timestamp("  USR", &ts_usr, 0, 0);
+
 		/* wait for all errors to be queued, else ACKs arrive OOO */
 		if (cfg_sleep_usec)
 			usleep(cfg_sleep_usec);
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index d5ffd8c9172e..b17e032a6d75 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -3,7 +3,7 @@
 #
 # Run a series of udpgro functional tests.
 
-source net_helper.sh
+source lib.sh
 
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
@@ -48,7 +48,7 @@ run_one() {
 
 	cfg_veth
 
-	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} &
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} &
 	local PID1=$!
 
 	wait_local_port_listen ${PEER_NS} 8000 udp
@@ -95,7 +95,7 @@ run_one_nat() {
 	# will land on the 'plain' one
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 &
 	local PID1=$!
-	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} &
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${family} -b ${addr2%/*} ${rx_args} &
 	local PID2=$!
 
 	wait_local_port_listen "${PEER_NS}" 8000 udp
@@ -117,9 +117,9 @@ run_one_2sock() {
 
 	cfg_veth
 
-	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 &
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} -p 12345 &
 	local PID1=$!
-	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} &
+	ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 100 ${rx_args} &
 	local PID2=$!
 
 	wait_local_port_listen "${PEER_NS}" 12345 udp
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index c51ea90a1395..54fa4821bc5e 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -3,11 +3,11 @@
 #
 # Run a series of udpgro benchmarks
 
-source net_helper.sh
+source lib.sh
 
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
 
 cleanup() {
 	local -r jobs="$(jobs -p)"
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index 17404f49cdb6..9a2cfec1153e 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -3,11 +3,11 @@
 #
 # Run a series of udpgro benchmarks
 
-source net_helper.sh
+source lib.sh
 
 readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
 
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
 
 cleanup() {
 	local -r jobs="$(jobs -p)"
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 550d8eb3e224..9b722c1e4b0f 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-source net_helper.sh
+source lib.sh
 
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
 readonly BASE="ns-$(mktemp -u XXXXXX)"
 readonly SRC=2
 readonly DST=1
@@ -162,6 +162,39 @@ run_test() {
 	echo " ok"
 }
 
+run_test_csum() {
+	local -r msg="$1"
+	local -r dst="$2"
+	local csum_error_filter=UdpInCsumErrors
+	local csum_errors
+
+	printf "%-40s" "$msg"
+
+	is_ipv6 "$dst" && csum_error_filter=Udp6InCsumErrors
+
+	ip netns exec "$NS_DST" iperf3 -s -1 >/dev/null &
+	wait_local_port_listen "$NS_DST" 5201 tcp
+	local spid="$!"
+	ip netns exec "$NS_SRC" iperf3 -c "$dst" -t 2 >/dev/null
+	local retc="$?"
+	wait "$spid"
+	local rets="$?"
+	if [ "$rets" -ne 0 ] || [ "$retc" -ne 0 ]; then
+		echo " fail client exit code $retc, server $rets"
+		ret=1
+		return
+	fi
+
+	csum_errors=$(ip netns exec "$NS_DST" nstat -as "$csum_error_filter" |
+		      grep "$csum_error_filter" | awk '{print $2}')
+	if [ -n "$csum_errors" ] && [ "$csum_errors" -gt 0 ]; then
+		echo " fail - csum error on receive $csum_errors, expected 0"
+		ret=1
+		return
+	fi
+	echo " ok"
+}
+
 run_bench() {
 	local -r msg=$1
 	local -r dst=$2
@@ -260,6 +293,37 @@ for family in 4 6; do
 	ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
 	run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST
 	cleanup
+
+	# force segmentation and re-aggregation
+	create_vxlan_pair
+	ip netns exec "$NS_DST" ethtool -K veth"$DST" generic-receive-offload on
+	ip netns exec "$NS_SRC" ethtool -K veth"$SRC" tso off
+	ip -n "$NS_SRC" link set dev veth"$SRC" mtu 1430
+
+	# forward to a 2nd veth pair
+	ip -n "$NS_DST" link add br0 type bridge
+	ip -n "$NS_DST" link set dev veth"$DST" master br0
+
+	# segment the aggregated TSO packet, without csum offload
+	ip -n "$NS_DST" link add veth_segment type veth peer veth_rx
+	for FEATURE in tso tx-udp-segmentation tx-checksumming; do
+		ip netns exec "$NS_DST" ethtool -K veth_segment "$FEATURE" off
+	done
+	ip -n "$NS_DST" link set dev veth_segment master br0 up
+	ip -n "$NS_DST" link set dev br0 up
+	ip -n "$NS_DST" link set dev veth_rx up
+
+	# move the lower layer IP in the last added veth
+	for ADDR in "$BM_NET_V4$DST/24" "$BM_NET_V6$DST/64"; do
+		# the dad argument will let iproute emit a unharmful warning
+		# with ipv4 addresses
+		ip -n "$NS_DST" addr del dev veth"$DST" "$ADDR"
+		ip -n "$NS_DST" addr add dev veth_rx "$ADDR" \
+			nodad 2>/dev/null
+	done
+
+	run_test_csum "GSO after GRO" "$OL_NET$DST"
+	cleanup
 done
 
 exit $ret
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index 477392715a9a..86d80cce55b4 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -25,7 +25,7 @@
 #include <sys/types.h>
 #include <unistd.h>
 
-#include "../kselftest.h"
+#include "kselftest.h"
 
 #ifndef ETH_MAX_MTU
 #define ETH_MAX_MTU 0xFFFFU
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 6bb7dfaa30b6..9709dd067c72 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -1,7 +1,7 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
 readonly STATS="$(mktemp -p /tmp ns-XXXXXX)"
 readonly BASE=`basename $STATS`
 readonly SRC=2
diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh
index e7cb8c678bde..e8c02c64e03a 100755
--- a/tools/testing/selftests/net/vlan_bridge_binding.sh
+++ b/tools/testing/selftests/net/vlan_bridge_binding.sh
@@ -18,29 +18,29 @@ setup_prepare()
 {
 	local port
 
-	ip_link_add br up type bridge vlan_filtering 1
+	adf_ip_link_add br up type bridge vlan_filtering 1
 
 	for port in d1 d2 d3; do
-		ip_link_add $port type veth peer name r$port
-		ip_link_set_up $port
-		ip_link_set_up r$port
-		ip_link_set_master $port br
+		adf_ip_link_add $port type veth peer name r$port
+		adf_ip_link_set_up $port
+		adf_ip_link_set_up r$port
+		adf_ip_link_set_master $port br
 	done
 
-	bridge_vlan_add vid 11 dev br self
-	bridge_vlan_add vid 11 dev d1 master
+	adf_bridge_vlan_add vid 11 dev br self
+	adf_bridge_vlan_add vid 11 dev d1 master
 
-	bridge_vlan_add vid 12 dev br self
-	bridge_vlan_add vid 12 dev d2 master
+	adf_bridge_vlan_add vid 12 dev br self
+	adf_bridge_vlan_add vid 12 dev d2 master
 
-	bridge_vlan_add vid 13 dev br self
-	bridge_vlan_add vid 13 dev d1 master
-	bridge_vlan_add vid 13 dev d2 master
+	adf_bridge_vlan_add vid 13 dev br self
+	adf_bridge_vlan_add vid 13 dev d1 master
+	adf_bridge_vlan_add vid 13 dev d2 master
 
-	bridge_vlan_add vid 14 dev br self
-	bridge_vlan_add vid 14 dev d1 master
-	bridge_vlan_add vid 14 dev d2 master
-	bridge_vlan_add vid 14 dev d3 master
+	adf_bridge_vlan_add vid 14 dev br self
+	adf_bridge_vlan_add vid 14 dev d1 master
+	adf_bridge_vlan_add vid 14 dev d2 master
+	adf_bridge_vlan_add vid 14 dev d3 master
 }
 
 operstate_is()
@@ -74,7 +74,7 @@ add_one_vlan()
 	local link=$1; shift
 	local id=$1; shift
 
-	ip_link_add $link.$id link $link type vlan id $id "$@"
+	adf_ip_link_add $link.$id link $link type vlan id $id "$@"
 }
 
 add_vlans()
@@ -98,7 +98,7 @@ down_netdevs()
 	local dev
 
 	for dev in "$@"; do
-		ip_link_set_down $dev
+		adf_ip_link_set_down $dev
 	done
 }
 
@@ -207,13 +207,13 @@ test_binding_toggle_off()
 	do_test_binding_off : "on->off"
 }
 
-dfr_set_binding_on()
+adf_set_binding_on()
 {
 	set_vlans type vlan bridge_binding on
 	defer set_vlans type vlan bridge_binding off
 }
 
-dfr_set_binding_off()
+adf_set_binding_off()
 {
 	set_vlans type vlan bridge_binding off
 	defer set_vlans type vlan bridge_binding on
@@ -223,14 +223,14 @@ test_binding_toggle_on_when_lower_down()
 {
 	add_vlans bridge_binding off
 	set_vlans up
-	do_test_binding_on dfr_set_binding_on "off->on when lower down"
+	do_test_binding_on adf_set_binding_on "off->on when lower down"
 }
 
 test_binding_toggle_off_when_lower_down()
 {
 	add_vlans bridge_binding on
 	set_vlans up
-	do_test_binding_off dfr_set_binding_off "on->off when lower down"
+	do_test_binding_off adf_set_binding_off "on->off when lower down"
 }
 
 test_binding_toggle_on_when_upper_down()
@@ -249,6 +249,8 @@ test_binding_toggle_off_when_upper_down()
 	do_test_binding_off : "on->off when upper down"
 }
 
+require_command jq
+
 trap defer_scopes_cleanup EXIT
 setup_prepare
 tests_run
diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh
index 7bc804ffaf7c..e195d5cab6f7 100755
--- a/tools/testing/selftests/net/vlan_hw_filter.sh
+++ b/tools/testing/selftests/net/vlan_hw_filter.sh
@@ -3,27 +3,101 @@
 
 readonly NETNS="ns-$(mktemp -u XXXXXX)"
 
+ALL_TESTS="
+	test_vlan_filter_check
+	test_vlan0_del_crash_01
+	test_vlan0_del_crash_02
+	test_vlan0_del_crash_03
+	test_vid0_memleak
+"
+
 ret=0
 
+setup() {
+	ip netns add ${NETNS}
+}
+
 cleanup() {
-	ip netns del $NETNS
+	ip netns del $NETNS 2>/dev/null
 }
 
 trap cleanup EXIT
 
 fail() {
-    echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
-    ret=1
+	echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+	ret=1
+}
+
+tests_run()
+{
+	local current_test
+	for current_test in ${TESTS:-$ALL_TESTS}; do
+		$current_test
+	done
+}
+
+test_vlan_filter_check() {
+	setup
+	ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+	ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2
+	ip netns exec ${NETNS} ip link set bond_slave_1 master bond0
+	ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+	ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0
+	ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0
+	ip netns exec ${NETNS} ip link set bond_slave_1 nomaster
+	ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function"
+	cleanup
 }
 
-ip netns add ${NETNS}
-ip netns exec ${NETNS} ip link add bond0 type bond mode 0
-ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2
-ip netns exec ${NETNS} ip link set bond_slave_1 master bond0
-ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
-ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0
-ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0
-ip netns exec ${NETNS} ip link set bond_slave_1 nomaster
-ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function"
+#enable vlan_filter feature of real_dev with vlan0 during running time
+test_vlan0_del_crash_01() {
+	setup
+	ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+	ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+	ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+	ip netns exec ${NETNS} ip link set dev bond0 up
+	ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+	ip netns exec ${NETNS} ip link set dev bond0 down
+	ip netns exec ${NETNS} ip link set dev bond0 up
+	ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+	cleanup
+}
+
+#enable vlan_filter feature and add vlan0 for real_dev during running time
+test_vlan0_del_crash_02() {
+	setup
+	ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+	ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+	ip netns exec ${NETNS} ip link set dev bond0 up
+	ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+	ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+	ip netns exec ${NETNS} ip link set dev bond0 down
+	ip netns exec ${NETNS} ip link set dev bond0 up
+	ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+	cleanup
+}
+
+#enable vlan_filter feature of real_dev during running time
+#test kernel_bug of vlan unregister
+test_vlan0_del_crash_03() {
+	setup
+	ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+	ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+	ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+	ip netns exec ${NETNS} ip link set dev bond0 up
+	ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+	ip netns exec ${NETNS} ip link set dev bond0 down
+	ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+	cleanup
+}
+
+test_vid0_memleak() {
+	setup
+	ip netns exec ${NETNS} ip link add bond0 up type bond mode 0
+	ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+	ip netns exec ${NETNS} ip link del dev bond0 || fail "Please check vlan HW filter function"
+	cleanup
+}
 
+tests_run
 exit $ret
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index e9c2f71da207..ce34cb2e6e0b 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -275,7 +275,7 @@ setup_sym()
 
 
 	# Wait for ip config to settle
-	sleep 2
+	slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
 }
 
 setup_asym()
@@ -370,7 +370,7 @@ setup_asym()
 	ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
 
 	# Wait for ip config to settle
-	sleep 2
+	slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
 }
 
 check_connectivity()
diff --git a/tools/testing/selftests/net/xdp_dummy.bpf.c b/tools/testing/selftests/net/xdp_dummy.bpf.c
deleted file mode 100644
index d988b2e0cee8..000000000000
--- a/tools/testing/selftests/net/xdp_dummy.bpf.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#define KBUILD_MODNAME "xdp_dummy"
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-SEC("xdp")
-int xdp_dummy_prog(struct xdp_md *ctx)
-{
-	return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/xfrm_state.sh b/tools/testing/selftests/net/xfrm_state.sh
new file mode 100755
index 000000000000..f6c54a6496d7
--- /dev/null
+++ b/tools/testing/selftests/net/xfrm_state.sh
@@ -0,0 +1,613 @@
+#!/bin/bash -e
+# SPDX-License-Identifier: GPL-2.0
+#
+# xfrm/IPsec tests.
+# Currently implemented:
+# - ICMP error source address verification (IETF RFC 4301 section 6)
+# - ICMP MTU exceeded handling over IPsec tunnels.
+#
+# Addresses and topology:
+# IPv4 prefix 10.1.c.d IPv6 prefix fc00:c::d/64 where c is the segment number
+# and d is the interface identifier.
+# IPv6 uses the same c:d as IPv4, and start with IPv6 prefix instead ipv4 prefix
+#
+# Network topology default: ns_set_v4 or ns_set_v6
+#   1.1   1.2   2.1   2.2   3.1   3.2   4.1   4.2   5.1   5.2  6.1  6.2
+#  eth0  eth1  eth0  eth1  eth0  eth1  eth0  eth1  eth0  eth1 eth0  eth1
+# a -------- r1 -------- s1 -------- r2 -------- s2 -------- r3 -------- b
+# a, b = Alice and Bob hosts without IPsec.
+# r1, r2, r3 routers, without IPsec
+# s1, s2, IPsec gateways/routers that setup tunnel(s).
+
+# Network topology x: IPsec gateway that generates ICMP response - ns_set_v4x or ns_set_v6x
+#   1.1   1.2   2.1   2.2   3.1   3.2   4.1   4.2   5.1   5.2
+#  eth0  eth1  eth0  eth1  eth0  eth1  eth0  eth1  eth0  eth1
+# a -------- r1 -------- s1 -------- r2 -------- s2 -------- b
+
+. lib.sh
+
+EXIT_ON_TEST_FAIL=no
+PAUSE=no
+VERBOSE=${VERBOSE:-0}
+DEBUG=0
+
+#	Name				Description
+tests="
+	unreachable_ipv4		IPv4 unreachable from router r3
+	unreachable_ipv6		IPv6 unreachable from router r3
+	unreachable_gw_ipv4		IPv4 unreachable from IPsec gateway s2
+	unreachable_gw_ipv6		IPv6 unreachable from IPsec gateway s2
+	mtu_ipv4_s2			IPv4 MTU exceeded from IPsec gateway s2
+	mtu_ipv6_s2			IPv6 MTU exceeded from IPsec gateway s2
+	mtu_ipv4_r2			IPv4 MTU exceeded from ESP router r2
+	mtu_ipv6_r2			IPv6 MTU exceeded from ESP router r2
+	mtu_ipv4_r3			IPv4 MTU exceeded from router r3
+	mtu_ipv6_r3			IPv6 MTU exceeded from router r3"
+
+prefix4="10.1"
+prefix6="fc00"
+
+run_cmd_err() {
+	cmd="$*"
+
+	if [ "$VERBOSE" -gt 0 ]; then
+		printf "  COMMAND: %s\n" "$cmd"
+	fi
+
+	out="$($cmd 2>&1)" && rc=0 || rc=$?
+	if [ "$VERBOSE" -gt 1 ] && [ -n "$out" ]; then
+		echo "  $out"
+		echo
+	fi
+	return 0
+}
+
+run_cmd() {
+	run_cmd_err "$@" || exit 1
+}
+
+run_test() {
+	# If errexit is set, unset it for sub-shell and restore after test
+	errexit=0
+	if [[ $- =~ "e" ]]; then
+		errexit=1
+		set +e
+	fi
+
+	(
+		unset IFS
+
+		# shellcheck disable=SC2030 # fail is read by trap/cleanup within this subshell
+		fail="yes"
+
+		# Since cleanup() relies on variables modified by this sub shell,
+		# it has to run in this context.
+		trap 'log_test_error $?; cleanup' EXIT INT TERM
+
+		if [ "$VERBOSE" -gt 0 ]; then
+			printf "\n#############################################################\n\n"
+		fi
+
+		ret=0
+		case "${name}" in
+		# can't use eval and test names shell check will complain about unused code
+		unreachable_ipv4)    test_unreachable_ipv4 ;;
+		unreachable_ipv6)    test_unreachable_ipv6 ;;
+		unreachable_gw_ipv4) test_unreachable_gw_ipv4 ;;
+		unreachable_gw_ipv6) test_unreachable_gw_ipv6 ;;
+		mtu_ipv4_s2)         test_mtu_ipv4_s2 ;;
+		mtu_ipv6_s2)         test_mtu_ipv6_s2 ;;
+		mtu_ipv4_r2)         test_mtu_ipv4_r2 ;;
+		mtu_ipv6_r2)         test_mtu_ipv6_r2 ;;
+		mtu_ipv4_r3)         test_mtu_ipv4_r3 ;;
+		mtu_ipv6_r3)         test_mtu_ipv6_r3 ;;
+		esac
+		ret=$?
+
+		if [ $ret -eq 0 ]; then
+			fail="no"
+
+			if [ "$VERBOSE" -gt 1 ]; then
+				show_icmp_filter
+			fi
+
+			printf "TEST: %-60s [ PASS ]\n" "${desc}"
+		elif [ $ret -eq "$ksft_skip" ]; then
+			fail="no"
+			printf "TEST: %-60s [SKIP]\n" "${desc}"
+		fi
+
+		return $ret
+	)
+	ret=$?
+
+	[ $errexit -eq 1 ] && set -e
+
+	case $ret in
+	0)
+		all_skipped=false
+		[ "$exitcode" -eq "$ksft_skip" ] && exitcode=0
+		;;
+	"$ksft_skip")
+		[ $all_skipped = true ] && exitcode=$ksft_skip
+		;;
+	*)
+		all_skipped=false
+		exitcode=1
+		;;
+	esac
+
+	return 0 # don't trigger errexit (-e); actual status in exitcode
+}
+
+setup_namespaces() {
+	local namespaces=""
+
+	NS_A=""
+	NS_B=""
+	NS_R1=""
+	NS_R2=""
+	NS_R3=""
+	NS_S1=""
+	NS_S2=""
+
+	for ns in ${ns_set}; do
+		namespaces="$namespaces NS_${ns^^}"
+	done
+
+	# shellcheck disable=SC2086 # setup_ns expects unquoted list
+	setup_ns $namespaces
+
+	ns_active= #ordered list of namespaces for this test.
+
+	[ -n "${NS_A}" ] && ns_a=(ip netns exec "${NS_A}") && ns_active="${ns_active} $NS_A"
+	[ -n "${NS_R1}" ] && ns_active="${ns_active} $NS_R1"
+	[ -n "${NS_S1}" ] && ns_s1=(ip netns exec "${NS_S1}") && ns_active="${ns_active} $NS_S1"
+	[ -n "${NS_R2}" ] && ns_r2=(ip netns exec "${NS_R2}") && ns_active="${ns_active} $NS_R2"
+	[ -n "${NS_S2}" ] && ns_s2=(ip netns exec "${NS_S2}") && ns_active="${ns_active} $NS_S2"
+	[ -n "${NS_R3}" ] && ns_r3=(ip netns exec "${NS_R3}") && ns_active="${ns_active} $NS_R3"
+	[ -n "${NS_B}" ] && ns_active="${ns_active} $NS_B"
+}
+
+addr_add() {
+	local -a ns_cmd=(ip netns exec "$1")
+	local addr="$2"
+	local dev="$3"
+
+	run_cmd "${ns_cmd[@]}" ip addr add "${addr}" dev "${dev}"
+	run_cmd "${ns_cmd[@]}" ip link set up "${dev}"
+}
+
+veth_add() {
+	local ns=$2
+	local pns=$1
+	local -a ns_cmd=(ip netns exec "${pns}")
+	local ln="eth0"
+	local rn="eth1"
+
+	run_cmd "${ns_cmd[@]}" ip link add "${ln}" type veth peer name "${rn}" netns "${ns}"
+}
+
+show_icmp_filter() {
+	run_cmd "${ns_r2[@]}" nft list ruleset
+	echo "$out"
+}
+
+setup_icmp_filter() {
+	run_cmd "${ns_r2[@]}" nft add table inet filter
+	run_cmd "${ns_r2[@]}" nft add chain inet filter FORWARD \
+		'{ type filter hook forward priority filter; policy drop ; }'
+	run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD counter ip protocol esp \
+		counter log accept
+	run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD counter ip protocol \
+		icmp counter log drop
+
+	if [ "$VERBOSE" -gt 0 ]; then
+		run_cmd "${ns_r2[@]}" nft list ruleset
+		echo "$out"
+	fi
+}
+
+setup_icmpv6_filter() {
+	run_cmd "${ns_r2[@]}" nft add table inet filter
+	run_cmd "${ns_r2[@]}" nft add chain inet filter FORWARD \
+		'{ type filter hook forward priority filter; policy drop ; }'
+	run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD ip6 nexthdr \
+		ipv6-icmp icmpv6 type echo-request counter log drop
+	run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD ip6 nexthdr esp \
+		counter log accept
+	run_cmd "${ns_r2[@]}" nft add rule inet filter FORWARD ip6 nexthdr \
+		ipv6-icmp icmpv6 type \
+		'{nd-neighbor-solicit,nd-neighbor-advert,nd-router-solicit,nd-router-advert}' \
+		counter log drop
+	if [ "$VERBOSE" -gt 0 ]; then
+		run_cmd "${ns_r2[@]}" nft list ruleset
+		echo "$out"
+	fi
+}
+
+set_xfrm_params() {
+	s1_src=${src}
+	s1_dst=${dst}
+	s1_src_net=${src_net}
+	s1_dst_net=${dst_net}
+}
+
+setup_ns_set_v4() {
+	ns_set="a r1 s1 r2 s2 r3 b"    # Network topology default
+	imax=$(echo "$ns_set" | wc -w) # number of namespaces in this topology
+
+	src="10.1.3.1"
+	dst="10.1.4.2"
+	src_net="10.1.1.0/24"
+	dst_net="10.1.6.0/24"
+
+	prefix=${prefix4}
+	prefix_len=24
+	s="."
+	S="."
+
+	set_xfrm_params
+}
+
+setup_ns_set_v4x() {
+	ns_set="a r1 s1 r2 s2 b"       # Network topology: x
+	imax=$(echo "$ns_set" | wc -w) # number of namespaces in this topology
+	prefix=${prefix4}
+	s="."
+	S="."
+	src="10.1.3.1"
+	dst="10.1.4.2"
+	src_net="10.1.1.0/24"
+	dst_net="10.1.5.0/24"
+	prefix_len=24
+
+	set_xfrm_params
+}
+
+setup_ns_set_v6() {
+	ns_set="a r1 s1 r2 s2 r3 b"    # Network topology default
+	imax=$(echo "$ns_set" | wc -w) # number of namespaces in this topology
+	prefix=${prefix6}
+	s=":"
+	S="::"
+	src="fc00:3::1"
+	dst="fc00:4::2"
+	src_net="fc00:1::0/64"
+	dst_net="fc00:6::0/64"
+	prefix_len=64
+
+	set_xfrm_params
+}
+
+setup_ns_set_v6x() {
+	ns_set="a r1 s1 r2 s2 b" # Network topology: x
+	imax=$(echo "$ns_set" | wc -w)
+	prefix=${prefix6}
+	s=":"
+	S="::"
+	src="fc00:3::1"
+	dst="fc00:4::2"
+	src_net="fc00:1::0/64"
+	dst_net="fc00:5::0/64"
+	prefix_len=64
+
+	set_xfrm_params
+}
+
+setup_network() {
+	# Create veths and add addresses
+	local -a ns_cmd
+	i=1
+	p=""
+	for ns in ${ns_active}; do
+		ns_cmd=(ip netns exec "${ns}")
+
+		if [ "${i}" -ne 1 ]; then
+			# Create veth between previous and current namespace
+			veth_add "${p}" "${ns}"
+			# Add addresses: previous gets .1 on eth0, current gets .2 on eth1
+			addr_add "${p}" "${prefix}${s}$((i-1))${S}1/${prefix_len}" eth0
+			addr_add "${ns}" "${prefix}${s}$((i-1))${S}2/${prefix_len}" eth1
+		fi
+
+		# Enable forwarding
+		run_cmd "${ns_cmd[@]}" sysctl -q net/ipv4/ip_forward=1
+		run_cmd "${ns_cmd[@]}" sysctl -q net/ipv6/conf/all/forwarding=1
+		run_cmd "${ns_cmd[@]}" sysctl -q net/ipv6/conf/default/accept_dad=0
+
+		p=${ns}
+		i=$((i + 1))
+	done
+
+	# Add routes (needs all addresses to exist first)
+	i=1
+	for ns in ${ns_active}; do
+		ns_cmd=(ip netns exec "${ns}")
+
+		# Forward routes to networks beyond this node
+		if [ "${i}" -ne "${imax}" ]; then
+			nhf="${prefix}${s}${i}${S}2" # nexthop forward
+			for j in $(seq $((i + 1)) "${imax}"); do
+				run_cmd "${ns_cmd[@]}" ip route replace \
+				       	"${prefix}${s}${j}${S}0/${prefix_len}" via "${nhf}"
+			done
+		fi
+
+		# Reverse routes to networks before this node
+		if [ "${i}" -gt 1 ]; then
+			nhr="${prefix}${s}$((i-1))${S}1" # nexthop reverse
+			for j in $(seq 1 $((i - 2))); do
+				run_cmd "${ns_cmd[@]}" ip route replace \
+					"${prefix}${s}${j}${S}0/${prefix_len}" via "${nhr}"
+			done
+		fi
+
+		i=$((i + 1))
+	done
+}
+
+setup_xfrm_mode() {
+	local MODE=${1:-tunnel}
+	if [ "${MODE}" != "tunnel" ] && [ "${MODE}" != "beet" ]; then
+		echo "xfrm mode ${MODE} not supported"
+		log_test_error
+		return 1
+	fi
+
+	run_cmd "${ns_s1[@]}" ip xfrm policy add src "${s1_src_net}" dst "${s1_dst_net}" dir out \
+		tmpl src "${s1_src}" dst "${s1_dst}" proto esp reqid 1 mode "${MODE}"
+
+	# no "input" policies. we are only doing forwarding so far
+
+	run_cmd "${ns_s1[@]}" ip xfrm policy add src "${s1_dst_net}" dst "${s1_src_net}" dir fwd \
+		flag icmp tmpl src "${s1_dst}" dst "${s1_src}" proto esp reqid 2 mode "${MODE}"
+
+	run_cmd "${ns_s1[@]}" ip xfrm state add src "${s1_src}" dst "${s1_dst}" proto esp spi 1 \
+		reqid 1 mode "${MODE}" aead 'rfc4106(gcm(aes))' \
+		0x1111111111111111111111111111111111111111 96 \
+		sel src "${s1_src_net}" dst "${s1_dst_net}" dir out
+
+	run_cmd "${ns_s1[@]}" ip xfrm state add src "${s1_dst}" dst "${s1_src}" proto esp spi 2 \
+		reqid 2 flag icmp replay-window 8 mode "${MODE}" aead 'rfc4106(gcm(aes))' \
+		0x2222222222222222222222222222222222222222 96 \
+		sel src "${s1_dst_net}" dst "${s1_src_net}" dir in
+
+	run_cmd "${ns_s2[@]}" ip xfrm policy add src "${s1_dst_net}" dst "${s1_src_net}" dir out \
+		flag icmp tmpl src "${s1_dst}" dst "${s1_src}" proto esp reqid 2 mode "${MODE}"
+
+	run_cmd "${ns_s2[@]}" ip xfrm policy add src "${s1_src_net}" dst "${s1_dst_net}" dir fwd \
+		tmpl src "${s1_src}" dst "${s1_dst}" proto esp reqid 1 mode "${MODE}"
+
+	run_cmd "${ns_s2[@]}" ip xfrm state add src "${s1_dst}" dst "${s1_src}" proto esp spi 2 \
+		reqid 2 mode "${MODE}" aead 'rfc4106(gcm(aes))' \
+		0x2222222222222222222222222222222222222222 96 \
+		sel src "${s1_dst_net}" dst "${s1_src_net}" dir out
+
+	run_cmd "${ns_s2[@]}" ip xfrm state add src "${s1_src}" dst "${s1_dst}" proto esp spi 1 \
+		reqid 1 flag icmp replay-window 8 mode "${MODE}" aead 'rfc4106(gcm(aes))' \
+		0x1111111111111111111111111111111111111111 96 \
+		sel src "${s1_src_net}" dst "${s1_dst_net}" dir in
+}
+
+setup_xfrm() {
+	setup_xfrm_mode tunnel
+}
+
+setup() {
+	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return "$ksft_skip"
+
+	for arg; do
+		case "${arg}" in
+		ns_set_v4)     setup_ns_set_v4 ;;
+		ns_set_v4x)    setup_ns_set_v4x ;;
+		ns_set_v6)     setup_ns_set_v6 ;;
+		ns_set_v6x)    setup_ns_set_v6x ;;
+		namespaces)    setup_namespaces ;;
+		network)       setup_network ;;
+		xfrm)          setup_xfrm ;;
+		icmp_filter)   setup_icmp_filter ;;
+		icmpv6_filter) setup_icmpv6_filter ;;
+		*) echo "  ${arg} not supported"; return 1 ;;
+		esac || return 1
+	done
+}
+
+# shellcheck disable=SC2317 # called via trap
+pause() {
+	echo
+	echo "Pausing. Hit enter to continue"
+	read -r _
+}
+
+# shellcheck disable=SC2317 # called via trap
+log_test_error() {
+	# shellcheck disable=SC2031 # fail is set in subshell, read via trap
+	if [ "${fail}" = "yes" ] && [ -n "${desc}" ]; then
+		if [ "$VERBOSE" -gt 0 ]; then
+			show_icmp_filter
+		fi
+		printf "TEST: %-60s [ FAIL ]  %s\n" "${desc}" "${name}"
+		[ -n "${cmd}" ] && printf '%s\n\n' "${cmd}"
+		[ -n "${out}" ] && printf '%s\n\n' "${out}"
+	fi
+}
+
+# shellcheck disable=SC2317 # called via trap
+cleanup() {
+	# shellcheck disable=SC2031 # fail is set in subshell, read via trap
+	[[ "$PAUSE" = "always" || ( "$PAUSE" = "fail" && "$fail" = "yes" ) ]] && pause
+	cleanup_all_ns
+	# shellcheck disable=SC2031 # fail is set in subshell, read via trap
+	[ "${EXIT_ON_TEST_FAIL}" = "yes" ] && [ "${fail}" = "yes" ] && exit 1
+}
+
+test_unreachable_ipv6() {
+	setup ns_set_v6 namespaces network xfrm icmpv6_filter || return "$ksft_skip"
+	run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:6::2
+	run_cmd_err "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:6::3
+	rc=0
+	echo -e "$out" | grep -q -E 'From fc00:5::2 icmp_seq.* Destination' || rc=1
+	return "${rc}"
+}
+
+test_unreachable_gw_ipv6() {
+	setup ns_set_v6x namespaces network xfrm icmpv6_filter || return "$ksft_skip"
+	run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:5::2
+	run_cmd_err "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:5::3
+	rc=0
+	echo -e "$out" | grep -q -E 'From fc00:4::2 icmp_seq.* Destination' || rc=1
+	return "${rc}"
+}
+
+test_unreachable_ipv4() {
+	setup ns_set_v4 namespaces network icmp_filter xfrm || return "$ksft_skip"
+	run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.6.2
+	run_cmd_err "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.6.3
+	rc=0
+	echo -e "$out" | grep -q -E 'From 10.1.5.2 icmp_seq.* Destination' || rc=1
+	return "${rc}"
+}
+
+test_unreachable_gw_ipv4() {
+	setup ns_set_v4x namespaces network icmp_filter xfrm || return "$ksft_skip"
+	run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.5.2
+	run_cmd_err "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.5.3
+	rc=0
+	echo -e "$out" | grep -q -E 'From 10.1.4.2 icmp_seq.* Destination' || rc=1
+	return "${rc}"
+}
+
+test_mtu_ipv4_r2() {
+	setup ns_set_v4 namespaces network icmp_filter xfrm || return "$ksft_skip"
+	run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.6.2
+	run_cmd "${ns_r2[@]}" ip route replace 10.1.3.0/24 dev eth1 src 10.1.3.2 mtu 1300
+	run_cmd "${ns_r2[@]}" ip route replace 10.1.4.0/24 dev eth0 src 10.1.4.1 mtu 1300
+	# shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword
+	run_cmd "${ns_a[@]}" ping -M do -s 1300 -W 5 -w 4 -c 1 10.1.6.2 || true
+	rc=0
+	echo -e "$out" | grep -q -E "From 10.1.2.2 icmp_seq=.* Frag needed and DF set" || rc=1
+	return "${rc}"
+}
+
+test_mtu_ipv6_r2() {
+	setup ns_set_v6 namespaces network xfrm icmpv6_filter || return "$ksft_skip"
+	run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:6::2
+	run_cmd "${ns_r2[@]}" ip -6 route replace fc00:3::/64 \
+		dev eth1 metric 256 src fc00:3::2 mtu 1300
+	run_cmd "${ns_r2[@]}" ip -6 route replace fc00:4::/64 \
+		dev eth0 metric 256 src fc00:4::1 mtu 1300
+	# shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword
+	run_cmd "${ns_a[@]}" ping -M do -s 1300 -W 5 -w 4 -c 1 fc00:6::2 || true
+	rc=0
+	echo -e "$out" | grep -q -E "From fc00:2::2 icmp_seq=.* Packet too big: mtu=1230" || rc=1
+	return "${rc}"
+}
+
+test_mtu_ipv4_r3() {
+	setup ns_set_v4 namespaces network icmp_filter xfrm || return "$ksft_skip"
+	run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.6.2
+	run_cmd "${ns_r3[@]}" ip route replace 10.1.6.0/24 dev eth0 mtu 1300
+	# shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword
+	run_cmd "${ns_a[@]}" ping -M do -s 1350 -W 5 -w 4 -c 1 10.1.6.2 || true
+	rc=0
+	echo -e "$out" | grep -q -E "From 10.1.5.2 .* Frag needed and DF set \(mtu = 1300\)" || rc=1
+	return "${rc}"
+}
+
+test_mtu_ipv4_s2() {
+	setup ns_set_v4x namespaces network icmp_filter xfrm || return "$ksft_skip"
+	run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 10.1.5.2
+	run_cmd "${ns_s2[@]}" ip route replace 10.1.5.0/24 dev eth0 src 10.1.5.1 mtu 1300
+	# shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword
+	run_cmd "${ns_a[@]}" ping -M do -s 1350 -W 5 -w 4 -c 1 10.1.5.2 || true
+	rc=0
+	echo -e "$out" | grep -q -E "From 10.1.4.2.*Frag needed and DF set \(mtu = 1300\)" || rc=1
+	return "${rc}"
+}
+
+test_mtu_ipv6_s2() {
+	setup ns_set_v6x namespaces network xfrm icmpv6_filter || return "$ksft_skip"
+	run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:5::2
+	run_cmd "${ns_s2[@]}" ip -6 route replace fc00:5::/64 dev eth0 metric 256 mtu 1300
+	# shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword
+	run_cmd "${ns_a[@]}" ping -M do -s 1350 -W 5 -w 4 -c 1 fc00:5::2 || true
+	rc=0
+	echo -e "$out" | grep -q -E "From fc00:4::2.*Packet too big: mtu=1300" || rc=1
+	return "${rc}"
+}
+
+test_mtu_ipv6_r3() {
+	setup ns_set_v6 namespaces network xfrm icmpv6_filter || return "$ksft_skip"
+	run_cmd "${ns_a[@]}" ping -W 5 -w 4 -c 1 fc00:6::2
+	run_cmd "${ns_r3[@]}" ip -6 route replace fc00:6::/64 dev eth1 metric 256 mtu 1300
+	# shellcheck disable=SC1010 # -M do: do = dont-fragment, not shell keyword
+	run_cmd "${ns_a[@]}" ping -M do -s 1300 -W 5 -w 4 -c 1 fc00:6::2 || true
+	rc=0
+	echo -e "$out" | grep -q -E "From fc00:5::2 icmp_seq=.* Packet too big: mtu=1300" || rc=1
+	return "${rc}"
+}
+
+################################################################################
+#
+usage() {
+	echo
+	echo "$0 [OPTIONS] [TEST]..."
+	echo "If no TEST argument is given, all tests will be run."
+	echo
+	echo -e "\t-p Pause on fail. Namespaces are kept for diagnostics"
+	echo -e "\t-P Pause after the test. Namespaces are kept for diagnostics"
+	echo -e "\t-v Verbose output. Show commands; -vv Show output and nft rules also"
+	echo "Available tests${tests}"
+	exit 1
+}
+
+################################################################################
+#
+exitcode=0
+all_skipped=true
+out=
+cmd=
+
+while getopts :epPv o; do
+	case $o in
+	e) EXIT_ON_TEST_FAIL=yes ;;
+	P) PAUSE=always ;;
+	p) PAUSE=fail ;;
+	v) VERBOSE=$((VERBOSE + 1)) ;;
+	*) usage ;;
+	esac
+done
+shift $((OPTIND - 1))
+
+IFS=$'\t\n'
+
+for arg; do
+	# Check first that all requested tests are available before running any
+	command -v "test_${arg}" >/dev/null || {
+		echo "=== Test ${arg} not found"
+		usage
+	}
+done
+
+name=""
+desc=""
+fail="no"
+
+for t in ${tests}; do
+	[ "${name}" = "" ] && name="${t}" && continue
+	[ "${desc}" = "" ] && desc="${t}"
+
+	run_this=1
+	for arg; do
+		[ "${arg}" = "${name}" ] && run_this=1 && break
+		run_this=0
+	done
+	if [ $run_this -eq 1 ]; then
+		run_test
+	fi
+	name=""
+	desc=""
+done
+
+exit ${exitcode}
diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk
index 12e7cae251be..793a2fc33d9f 100644
--- a/tools/testing/selftests/net/ynl.mk
+++ b/tools/testing/selftests/net/ynl.mk
@@ -5,10 +5,11 @@
 # Inputs:
 #
 # YNL_GENS:      families we need in the selftests
-# YNL_PROGS:     TEST_PROGS which need YNL (TODO, none exist, yet)
+# YNL_GEN_PROGS: TEST_GEN_PROGS which need YNL
 # YNL_GEN_FILES: TEST_GEN_FILES which need YNL
 
-YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES))
+YNL_OUTPUTS :=	$(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) \
+		$(patsubst %,$(OUTPUT)/%,$(YNL_GEN_PROGS))
 YNL_SPECS := \
 	$(patsubst %,$(top_srcdir)/Documentation/netlink/specs/%.yaml,$(YNL_GENS))
 
@@ -27,7 +28,8 @@ $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig:
 
 $(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig
 	$(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a
-	$(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a
+	$(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \
+		GENS="$(YNL_GENS)" RSTS="" libynl.a
 	$(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a
 
 EXTRA_CLEAN += \